2015-08-24 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob213793e001c82e77880400830709d0b0aaf4fdc7
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "alias.h"
48 #include "backend.h"
49 #include "cfghooks.h"
50 #include "tree.h"
51 #include "gimple.h"
52 #include "hard-reg-set.h"
53 #include "ssa.h"
54 #include "options.h"
55 #include "fold-const.h"
56 #include "cfganal.h"
57 #include "internal-fn.h"
58 #include "gimple-iterator.h"
59 #include "gimplify-me.h"
60 #include "stor-layout.h"
61 #include "tree-cfg.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "tree-ssa-loop.h"
64 #include "tree-into-ssa.h"
65 #include "tree-ssa.h"
66 #include "cfgloop.h"
67 #include "tree-chrec.h"
68 #include "tree-data-ref.h"
69 #include "tree-scalar-evolution.h"
70 #include "tree-pass.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-vectorizer.h"
75 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
76 typedef struct rdg_vertex
78 /* The statement represented by this vertex. */
79 gimple stmt;
81 /* Vector of data-references in this statement. */
82 vec<data_reference_p> datarefs;
84 /* True when the statement contains a write to memory. */
85 bool has_mem_write;
87 /* True when the statement contains a read from memory. */
88 bool has_mem_reads;
89 } *rdg_vertex_p;
91 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
92 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
93 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
94 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
95 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
96 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
97 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
98 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
100 /* Data dependence type. */
102 enum rdg_dep_type
104 /* Read After Write (RAW). */
105 flow_dd = 'f',
107 /* Control dependence (execute conditional on). */
108 control_dd = 'c'
111 /* Dependence information attached to an edge of the RDG. */
113 typedef struct rdg_edge
115 /* Type of the dependence. */
116 enum rdg_dep_type type;
117 } *rdg_edge_p;
119 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
121 /* Dump vertex I in RDG to FILE. */
123 static void
124 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
126 struct vertex *v = &(rdg->vertices[i]);
127 struct graph_edge *e;
129 fprintf (file, "(vertex %d: (%s%s) (in:", i,
130 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
131 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
133 if (v->pred)
134 for (e = v->pred; e; e = e->pred_next)
135 fprintf (file, " %d", e->src);
137 fprintf (file, ") (out:");
139 if (v->succ)
140 for (e = v->succ; e; e = e->succ_next)
141 fprintf (file, " %d", e->dest);
143 fprintf (file, ")\n");
144 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
145 fprintf (file, ")\n");
148 /* Call dump_rdg_vertex on stderr. */
150 DEBUG_FUNCTION void
151 debug_rdg_vertex (struct graph *rdg, int i)
153 dump_rdg_vertex (stderr, rdg, i);
156 /* Dump the reduced dependence graph RDG to FILE. */
158 static void
159 dump_rdg (FILE *file, struct graph *rdg)
161 fprintf (file, "(rdg\n");
162 for (int i = 0; i < rdg->n_vertices; i++)
163 dump_rdg_vertex (file, rdg, i);
164 fprintf (file, ")\n");
167 /* Call dump_rdg on stderr. */
169 DEBUG_FUNCTION void
170 debug_rdg (struct graph *rdg)
172 dump_rdg (stderr, rdg);
175 static void
176 dot_rdg_1 (FILE *file, struct graph *rdg)
178 int i;
179 pretty_printer buffer;
180 pp_needs_newline (&buffer) = false;
181 buffer.buffer->stream = file;
183 fprintf (file, "digraph RDG {\n");
185 for (i = 0; i < rdg->n_vertices; i++)
187 struct vertex *v = &(rdg->vertices[i]);
188 struct graph_edge *e;
190 fprintf (file, "%d [label=\"[%d] ", i, i);
191 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
192 pp_flush (&buffer);
193 fprintf (file, "\"]\n");
195 /* Highlight reads from memory. */
196 if (RDG_MEM_READS_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
199 /* Highlight stores to memory. */
200 if (RDG_MEM_WRITE_STMT (rdg, i))
201 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
203 if (v->succ)
204 for (e = v->succ; e; e = e->succ_next)
205 switch (RDGE_TYPE (e))
207 case flow_dd:
208 /* These are the most common dependences: don't print these. */
209 fprintf (file, "%d -> %d \n", i, e->dest);
210 break;
212 case control_dd:
213 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
214 break;
216 default:
217 gcc_unreachable ();
221 fprintf (file, "}\n\n");
224 /* Display the Reduced Dependence Graph using dotty. */
226 DEBUG_FUNCTION void
227 dot_rdg (struct graph *rdg)
229 /* When debugging, you may want to enable the following code. */
230 #ifdef HAVE_POPEN
231 FILE *file = popen ("dot -Tx11", "w");
232 if (!file)
233 return;
234 dot_rdg_1 (file, rdg);
235 fflush (file);
236 close (fileno (file));
237 pclose (file);
238 #else
239 dot_rdg_1 (stderr, rdg);
240 #endif
243 /* Returns the index of STMT in RDG. */
245 static int
246 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
248 int index = gimple_uid (stmt);
249 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
250 return index;
253 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
254 the index of DEF in RDG. */
256 static void
257 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
259 use_operand_p imm_use_p;
260 imm_use_iterator iterator;
262 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
264 struct graph_edge *e;
265 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
267 if (use < 0)
268 continue;
270 e = add_edge (rdg, idef, use);
271 e->data = XNEW (struct rdg_edge);
272 RDGE_TYPE (e) = flow_dd;
276 /* Creates an edge for the control dependences of BB to the vertex V. */
278 static void
279 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
280 int v, control_dependences *cd)
282 bitmap_iterator bi;
283 unsigned edge_n;
284 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
285 0, edge_n, bi)
287 basic_block cond_bb = cd->get_edge (edge_n)->src;
288 gimple stmt = last_stmt (cond_bb);
289 if (stmt && is_ctrl_stmt (stmt))
291 struct graph_edge *e;
292 int c = rdg_vertex_for_stmt (rdg, stmt);
293 if (c < 0)
294 continue;
296 e = add_edge (rdg, c, v);
297 e->data = XNEW (struct rdg_edge);
298 RDGE_TYPE (e) = control_dd;
303 /* Creates the edges of the reduced dependence graph RDG. */
305 static void
306 create_rdg_flow_edges (struct graph *rdg)
308 int i;
309 def_operand_p def_p;
310 ssa_op_iter iter;
312 for (i = 0; i < rdg->n_vertices; i++)
313 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
314 iter, SSA_OP_DEF)
315 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
318 /* Creates the edges of the reduced dependence graph RDG. */
320 static void
321 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
323 int i;
325 for (i = 0; i < rdg->n_vertices; i++)
327 gimple stmt = RDG_STMT (rdg, i);
328 if (gimple_code (stmt) == GIMPLE_PHI)
330 edge_iterator ei;
331 edge e;
332 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
333 create_edge_for_control_dependence (rdg, e->src, i, cd);
335 else
336 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
340 /* Build the vertices of the reduced dependence graph RDG. Return false
341 if that failed. */
343 static bool
344 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
345 vec<data_reference_p> *datarefs)
347 int i;
348 gimple stmt;
350 FOR_EACH_VEC_ELT (stmts, i, stmt)
352 struct vertex *v = &(rdg->vertices[i]);
354 /* Record statement to vertex mapping. */
355 gimple_set_uid (stmt, i);
357 v->data = XNEW (struct rdg_vertex);
358 RDGV_STMT (v) = stmt;
359 RDGV_DATAREFS (v).create (0);
360 RDGV_HAS_MEM_WRITE (v) = false;
361 RDGV_HAS_MEM_READS (v) = false;
362 if (gimple_code (stmt) == GIMPLE_PHI)
363 continue;
365 unsigned drp = datarefs->length ();
366 if (!find_data_references_in_stmt (loop, stmt, datarefs))
367 return false;
368 for (unsigned j = drp; j < datarefs->length (); ++j)
370 data_reference_p dr = (*datarefs)[j];
371 if (DR_IS_READ (dr))
372 RDGV_HAS_MEM_READS (v) = true;
373 else
374 RDGV_HAS_MEM_WRITE (v) = true;
375 RDGV_DATAREFS (v).safe_push (dr);
378 return true;
381 /* Initialize STMTS with all the statements of LOOP. The order in
382 which we discover statements is important as
383 generate_loops_for_partition is using the same traversal for
384 identifying statements in loop copies. */
386 static void
387 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
389 unsigned int i;
390 basic_block *bbs = get_loop_body_in_dom_order (loop);
392 for (i = 0; i < loop->num_nodes; i++)
394 basic_block bb = bbs[i];
396 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
398 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
399 stmts->safe_push (bsi.phi ());
401 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
402 gsi_next (&bsi))
404 gimple stmt = gsi_stmt (bsi);
405 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
406 stmts->safe_push (stmt);
410 free (bbs);
413 /* Free the reduced dependence graph RDG. */
415 static void
416 free_rdg (struct graph *rdg)
418 int i;
420 for (i = 0; i < rdg->n_vertices; i++)
422 struct vertex *v = &(rdg->vertices[i]);
423 struct graph_edge *e;
425 for (e = v->succ; e; e = e->succ_next)
426 free (e->data);
428 if (v->data)
430 gimple_set_uid (RDGV_STMT (v), -1);
431 free_data_refs (RDGV_DATAREFS (v));
432 free (v->data);
436 free_graph (rdg);
439 /* Build the Reduced Dependence Graph (RDG) with one vertex per
440 statement of the loop nest LOOP_NEST, and one edge per data dependence or
441 scalar dependence. */
443 static struct graph *
444 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
446 struct graph *rdg;
447 vec<data_reference_p> datarefs;
449 /* Create the RDG vertices from the stmts of the loop nest. */
450 auto_vec<gimple, 10> stmts;
451 stmts_from_loop (loop_nest[0], &stmts);
452 rdg = new_graph (stmts.length ());
453 datarefs.create (10);
454 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
456 datarefs.release ();
457 free_rdg (rdg);
458 return NULL;
460 stmts.release ();
462 create_rdg_flow_edges (rdg);
463 if (cd)
464 create_rdg_cd_edges (rdg, cd);
466 datarefs.release ();
468 return rdg;
473 enum partition_kind {
474 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
477 typedef struct partition_s
479 bitmap stmts;
480 bitmap loops;
481 bool reduction_p;
482 enum partition_kind kind;
483 /* data-references a kind != PKIND_NORMAL partition is about. */
484 data_reference_p main_dr;
485 data_reference_p secondary_dr;
486 tree niter;
487 bool plus_one;
488 } *partition_t;
491 /* Allocate and initialize a partition from BITMAP. */
493 static partition_t
494 partition_alloc (bitmap stmts, bitmap loops)
496 partition_t partition = XCNEW (struct partition_s);
497 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
498 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
499 partition->reduction_p = false;
500 partition->kind = PKIND_NORMAL;
501 return partition;
504 /* Free PARTITION. */
506 static void
507 partition_free (partition_t partition)
509 BITMAP_FREE (partition->stmts);
510 BITMAP_FREE (partition->loops);
511 free (partition);
514 /* Returns true if the partition can be generated as a builtin. */
516 static bool
517 partition_builtin_p (partition_t partition)
519 return partition->kind != PKIND_NORMAL;
522 /* Returns true if the partition contains a reduction. */
524 static bool
525 partition_reduction_p (partition_t partition)
527 return partition->reduction_p;
530 /* Merge PARTITION into the partition DEST. */
532 static void
533 partition_merge_into (partition_t dest, partition_t partition)
535 dest->kind = PKIND_NORMAL;
536 bitmap_ior_into (dest->stmts, partition->stmts);
537 if (partition_reduction_p (partition))
538 dest->reduction_p = true;
542 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
543 the LOOP. */
545 static bool
546 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
548 imm_use_iterator imm_iter;
549 use_operand_p use_p;
551 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
553 gimple use_stmt = USE_STMT (use_p);
554 if (!is_gimple_debug (use_stmt)
555 && loop != loop_containing_stmt (use_stmt))
556 return true;
559 return false;
562 /* Returns true when STMT defines a scalar variable used after the
563 loop LOOP. */
565 static bool
566 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
568 def_operand_p def_p;
569 ssa_op_iter op_iter;
571 if (gimple_code (stmt) == GIMPLE_PHI)
572 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
574 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
575 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
576 return true;
578 return false;
581 /* Return a copy of LOOP placed before LOOP. */
583 static struct loop *
584 copy_loop_before (struct loop *loop)
586 struct loop *res;
587 edge preheader = loop_preheader_edge (loop);
589 initialize_original_copy_tables ();
590 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
591 gcc_assert (res != NULL);
592 free_original_copy_tables ();
593 delete_update_ssa ();
595 return res;
598 /* Creates an empty basic block after LOOP. */
600 static void
601 create_bb_after_loop (struct loop *loop)
603 edge exit = single_exit (loop);
605 if (!exit)
606 return;
608 split_edge (exit);
611 /* Generate code for PARTITION from the code in LOOP. The loop is
612 copied when COPY_P is true. All the statements not flagged in the
613 PARTITION bitmap are removed from the loop or from its copy. The
614 statements are indexed in sequence inside a basic block, and the
615 basic blocks of a loop are taken in dom order. */
617 static void
618 generate_loops_for_partition (struct loop *loop, partition_t partition,
619 bool copy_p)
621 unsigned i;
622 basic_block *bbs;
624 if (copy_p)
626 loop = copy_loop_before (loop);
627 gcc_assert (loop != NULL);
628 create_preheader (loop, CP_SIMPLE_PREHEADERS);
629 create_bb_after_loop (loop);
632 /* Remove stmts not in the PARTITION bitmap. */
633 bbs = get_loop_body_in_dom_order (loop);
635 if (MAY_HAVE_DEBUG_STMTS)
636 for (i = 0; i < loop->num_nodes; i++)
638 basic_block bb = bbs[i];
640 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
641 gsi_next (&bsi))
643 gphi *phi = bsi.phi ();
644 if (!virtual_operand_p (gimple_phi_result (phi))
645 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
646 reset_debug_uses (phi);
649 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
651 gimple stmt = gsi_stmt (bsi);
652 if (gimple_code (stmt) != GIMPLE_LABEL
653 && !is_gimple_debug (stmt)
654 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
655 reset_debug_uses (stmt);
659 for (i = 0; i < loop->num_nodes; i++)
661 basic_block bb = bbs[i];
663 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
665 gphi *phi = bsi.phi ();
666 if (!virtual_operand_p (gimple_phi_result (phi))
667 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
668 remove_phi_node (&bsi, true);
669 else
670 gsi_next (&bsi);
673 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
675 gimple stmt = gsi_stmt (bsi);
676 if (gimple_code (stmt) != GIMPLE_LABEL
677 && !is_gimple_debug (stmt)
678 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
680 /* Choose an arbitrary path through the empty CFG part
681 that this unnecessary control stmt controls. */
682 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
684 gimple_cond_make_false (cond_stmt);
685 update_stmt (stmt);
687 else if (gimple_code (stmt) == GIMPLE_SWITCH)
689 gswitch *switch_stmt = as_a <gswitch *> (stmt);
690 gimple_switch_set_index
691 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
692 update_stmt (stmt);
694 else
696 unlink_stmt_vdef (stmt);
697 gsi_remove (&bsi, true);
698 release_defs (stmt);
699 continue;
702 gsi_next (&bsi);
706 free (bbs);
709 /* Build the size argument for a memory operation call. */
711 static tree
712 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
713 bool plus_one)
715 tree size = fold_convert_loc (loc, sizetype, nb_iter);
716 if (plus_one)
717 size = size_binop (PLUS_EXPR, size, size_one_node);
718 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
719 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
720 size = fold_convert_loc (loc, size_type_node, size);
721 return size;
724 /* Build an address argument for a memory operation call. */
726 static tree
727 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
729 tree addr_base;
731 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
732 addr_base = fold_convert_loc (loc, sizetype, addr_base);
734 /* Test for a negative stride, iterating over every element. */
735 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
737 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
738 fold_convert_loc (loc, sizetype, nb_bytes));
739 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
740 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
743 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
746 /* If VAL memory representation contains the same value in all bytes,
747 return that value, otherwise return -1.
748 E.g. for 0x24242424 return 0x24, for IEEE double
749 747708026454360457216.0 return 0x44, etc. */
751 static int
752 const_with_all_bytes_same (tree val)
754 unsigned char buf[64];
755 int i, len;
757 if (integer_zerop (val)
758 || real_zerop (val)
759 || (TREE_CODE (val) == CONSTRUCTOR
760 && !TREE_CLOBBER_P (val)
761 && CONSTRUCTOR_NELTS (val) == 0))
762 return 0;
764 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
765 return -1;
767 len = native_encode_expr (val, buf, sizeof (buf));
768 if (len == 0)
769 return -1;
770 for (i = 1; i < len; i++)
771 if (buf[i] != buf[0])
772 return -1;
773 return buf[0];
776 /* Generate a call to memset for PARTITION in LOOP. */
778 static void
779 generate_memset_builtin (struct loop *loop, partition_t partition)
781 gimple_stmt_iterator gsi;
782 gimple stmt, fn_call;
783 tree mem, fn, nb_bytes;
784 location_t loc;
785 tree val;
787 stmt = DR_STMT (partition->main_dr);
788 loc = gimple_location (stmt);
790 /* The new statements will be placed before LOOP. */
791 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
793 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
794 partition->plus_one);
795 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
796 false, GSI_CONTINUE_LINKING);
797 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
798 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
799 false, GSI_CONTINUE_LINKING);
801 /* This exactly matches the pattern recognition in classify_partition. */
802 val = gimple_assign_rhs1 (stmt);
803 /* Handle constants like 0x15151515 and similarly
804 floating point constants etc. where all bytes are the same. */
805 int bytev = const_with_all_bytes_same (val);
806 if (bytev != -1)
807 val = build_int_cst (integer_type_node, bytev);
808 else if (TREE_CODE (val) == INTEGER_CST)
809 val = fold_convert (integer_type_node, val);
810 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
812 tree tem = make_ssa_name (integer_type_node);
813 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
814 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
815 val = tem;
818 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
819 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
820 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
822 if (dump_file && (dump_flags & TDF_DETAILS))
824 fprintf (dump_file, "generated memset");
825 if (bytev == 0)
826 fprintf (dump_file, " zero\n");
827 else
828 fprintf (dump_file, "\n");
832 /* Generate a call to memcpy for PARTITION in LOOP. */
834 static void
835 generate_memcpy_builtin (struct loop *loop, partition_t partition)
837 gimple_stmt_iterator gsi;
838 gimple stmt, fn_call;
839 tree dest, src, fn, nb_bytes;
840 location_t loc;
841 enum built_in_function kind;
843 stmt = DR_STMT (partition->main_dr);
844 loc = gimple_location (stmt);
846 /* The new statements will be placed before LOOP. */
847 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
849 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
850 partition->plus_one);
851 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
852 false, GSI_CONTINUE_LINKING);
853 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
854 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
855 if (ptr_derefs_may_alias_p (dest, src))
856 kind = BUILT_IN_MEMMOVE;
857 else
858 kind = BUILT_IN_MEMCPY;
860 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
861 false, GSI_CONTINUE_LINKING);
862 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
863 false, GSI_CONTINUE_LINKING);
864 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
865 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
866 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
868 if (dump_file && (dump_flags & TDF_DETAILS))
870 if (kind == BUILT_IN_MEMCPY)
871 fprintf (dump_file, "generated memcpy\n");
872 else
873 fprintf (dump_file, "generated memmove\n");
877 /* Remove and destroy the loop LOOP. */
879 static void
880 destroy_loop (struct loop *loop)
882 unsigned nbbs = loop->num_nodes;
883 edge exit = single_exit (loop);
884 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
885 basic_block *bbs;
886 unsigned i;
888 bbs = get_loop_body_in_dom_order (loop);
890 redirect_edge_pred (exit, src);
891 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
892 exit->flags |= EDGE_FALLTHRU;
893 cancel_loop_tree (loop);
894 rescan_loop_exit (exit, false, true);
896 for (i = 0; i < nbbs; i++)
898 /* We have made sure to not leave any dangling uses of SSA
899 names defined in the loop. With the exception of virtuals.
900 Make sure we replace all uses of virtual defs that will remain
901 outside of the loop with the bare symbol as delete_basic_block
902 will release them. */
903 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
904 gsi_next (&gsi))
906 gphi *phi = gsi.phi ();
907 if (virtual_operand_p (gimple_phi_result (phi)))
908 mark_virtual_phi_result_for_renaming (phi);
910 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
911 gsi_next (&gsi))
913 gimple stmt = gsi_stmt (gsi);
914 tree vdef = gimple_vdef (stmt);
915 if (vdef && TREE_CODE (vdef) == SSA_NAME)
916 mark_virtual_operand_for_renaming (vdef);
918 delete_basic_block (bbs[i]);
920 free (bbs);
922 set_immediate_dominator (CDI_DOMINATORS, dest,
923 recompute_dominator (CDI_DOMINATORS, dest));
926 /* Generates code for PARTITION. */
928 static void
929 generate_code_for_partition (struct loop *loop,
930 partition_t partition, bool copy_p)
932 switch (partition->kind)
934 case PKIND_NORMAL:
935 /* Reductions all have to be in the last partition. */
936 gcc_assert (!partition_reduction_p (partition)
937 || !copy_p);
938 generate_loops_for_partition (loop, partition, copy_p);
939 return;
941 case PKIND_MEMSET:
942 generate_memset_builtin (loop, partition);
943 break;
945 case PKIND_MEMCPY:
946 generate_memcpy_builtin (loop, partition);
947 break;
949 default:
950 gcc_unreachable ();
953 /* Common tail for partitions we turn into a call. If this was the last
954 partition for which we generate code, we have to destroy the loop. */
955 if (!copy_p)
956 destroy_loop (loop);
960 /* Returns a partition with all the statements needed for computing
961 the vertex V of the RDG, also including the loop exit conditions. */
963 static partition_t
964 build_rdg_partition_for_vertex (struct graph *rdg, int v)
966 partition_t partition = partition_alloc (NULL, NULL);
967 auto_vec<int, 3> nodes;
968 unsigned i;
969 int x;
971 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
973 FOR_EACH_VEC_ELT (nodes, i, x)
975 bitmap_set_bit (partition->stmts, x);
976 bitmap_set_bit (partition->loops,
977 loop_containing_stmt (RDG_STMT (rdg, x))->num);
980 return partition;
983 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
984 For the moment we detect only the memset zero pattern. */
986 static void
987 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
989 bitmap_iterator bi;
990 unsigned i;
991 tree nb_iter;
992 data_reference_p single_load, single_store;
993 bool volatiles_p = false;
994 bool plus_one = false;
996 partition->kind = PKIND_NORMAL;
997 partition->main_dr = NULL;
998 partition->secondary_dr = NULL;
999 partition->niter = NULL_TREE;
1000 partition->plus_one = false;
1002 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1004 gimple stmt = RDG_STMT (rdg, i);
1006 if (gimple_has_volatile_ops (stmt))
1007 volatiles_p = true;
1009 /* If the stmt has uses outside of the loop mark it as reduction. */
1010 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1012 partition->reduction_p = true;
1013 return;
1017 /* Perform general partition disqualification for builtins. */
1018 if (volatiles_p
1019 || !flag_tree_loop_distribute_patterns)
1020 return;
1022 /* Detect memset and memcpy. */
1023 single_load = NULL;
1024 single_store = NULL;
1025 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1027 gimple stmt = RDG_STMT (rdg, i);
1028 data_reference_p dr;
1029 unsigned j;
1031 if (gimple_code (stmt) == GIMPLE_PHI)
1032 continue;
1034 /* Any scalar stmts are ok. */
1035 if (!gimple_vuse (stmt))
1036 continue;
1038 /* Otherwise just regular loads/stores. */
1039 if (!gimple_assign_single_p (stmt))
1040 return;
1042 /* But exactly one store and/or load. */
1043 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1045 if (DR_IS_READ (dr))
1047 if (single_load != NULL)
1048 return;
1049 single_load = dr;
1051 else
1053 if (single_store != NULL)
1054 return;
1055 single_store = dr;
1060 if (!single_store)
1061 return;
1063 nb_iter = number_of_latch_executions (loop);
1064 if (!nb_iter || nb_iter == chrec_dont_know)
1065 return;
1066 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1067 gimple_bb (DR_STMT (single_store))))
1068 plus_one = true;
1070 if (single_store && !single_load)
1072 gimple stmt = DR_STMT (single_store);
1073 tree rhs = gimple_assign_rhs1 (stmt);
1074 if (const_with_all_bytes_same (rhs) == -1
1075 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1076 || (TYPE_MODE (TREE_TYPE (rhs))
1077 != TYPE_MODE (unsigned_char_type_node))))
1078 return;
1079 if (TREE_CODE (rhs) == SSA_NAME
1080 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1081 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1082 return;
1083 if (!adjacent_dr_p (single_store)
1084 || !dominated_by_p (CDI_DOMINATORS,
1085 loop->latch, gimple_bb (stmt)))
1086 return;
1087 partition->kind = PKIND_MEMSET;
1088 partition->main_dr = single_store;
1089 partition->niter = nb_iter;
1090 partition->plus_one = plus_one;
1092 else if (single_store && single_load)
1094 gimple store = DR_STMT (single_store);
1095 gimple load = DR_STMT (single_load);
1096 /* Direct aggregate copy or via an SSA name temporary. */
1097 if (load != store
1098 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1099 return;
1100 if (!adjacent_dr_p (single_store)
1101 || !adjacent_dr_p (single_load)
1102 || !operand_equal_p (DR_STEP (single_store),
1103 DR_STEP (single_load), 0)
1104 || !dominated_by_p (CDI_DOMINATORS,
1105 loop->latch, gimple_bb (store)))
1106 return;
1107 /* Now check that if there is a dependence this dependence is
1108 of a suitable form for memmove. */
1109 vec<loop_p> loops = vNULL;
1110 ddr_p ddr;
1111 loops.safe_push (loop);
1112 ddr = initialize_data_dependence_relation (single_load, single_store,
1113 loops);
1114 compute_affine_dependence (ddr, loop);
1115 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1117 free_dependence_relation (ddr);
1118 loops.release ();
1119 return;
1121 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1123 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1125 free_dependence_relation (ddr);
1126 loops.release ();
1127 return;
1129 lambda_vector dist_v;
1130 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1132 int dist = dist_v[index_in_loop_nest (loop->num,
1133 DDR_LOOP_NEST (ddr))];
1134 if (dist > 0 && !DDR_REVERSED_P (ddr))
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1142 free_dependence_relation (ddr);
1143 loops.release ();
1144 partition->kind = PKIND_MEMCPY;
1145 partition->main_dr = single_store;
1146 partition->secondary_dr = single_load;
1147 partition->niter = nb_iter;
1148 partition->plus_one = plus_one;
1152 /* For a data reference REF, return the declaration of its base
1153 address or NULL_TREE if the base is not determined. */
1155 static tree
1156 ref_base_address (data_reference_p dr)
1158 tree base_address = DR_BASE_ADDRESS (dr);
1159 if (base_address
1160 && TREE_CODE (base_address) == ADDR_EXPR)
1161 return TREE_OPERAND (base_address, 0);
1163 return base_address;
1166 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1167 accesses in RDG. */
1169 static bool
1170 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1171 partition_t partition2)
1173 unsigned i, j, k, l;
1174 bitmap_iterator bi, bj;
1175 data_reference_p ref1, ref2;
1177 /* First check whether in the intersection of the two partitions are
1178 any loads or stores. Common loads are the situation that happens
1179 most often. */
1180 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1181 if (RDG_MEM_WRITE_STMT (rdg, i)
1182 || RDG_MEM_READS_STMT (rdg, i))
1183 return true;
1185 /* Then check all data-references against each other. */
1186 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1187 if (RDG_MEM_WRITE_STMT (rdg, i)
1188 || RDG_MEM_READS_STMT (rdg, i))
1189 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1190 if (RDG_MEM_WRITE_STMT (rdg, j)
1191 || RDG_MEM_READS_STMT (rdg, j))
1193 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1195 tree base1 = ref_base_address (ref1);
1196 if (base1)
1197 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1198 if (base1 == ref_base_address (ref2))
1199 return true;
1203 return false;
1206 /* Aggregate several components into a useful partition that is
1207 registered in the PARTITIONS vector. Partitions will be
1208 distributed in different loops. */
1210 static void
1211 rdg_build_partitions (struct graph *rdg,
1212 vec<gimple> starting_stmts,
1213 vec<partition_t> *partitions)
1215 bitmap processed = BITMAP_ALLOC (NULL);
1216 int i;
1217 gimple stmt;
1219 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1221 int v = rdg_vertex_for_stmt (rdg, stmt);
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file,
1225 "ldist asked to generate code for vertex %d\n", v);
1227 /* If the vertex is already contained in another partition so
1228 is the partition rooted at it. */
1229 if (bitmap_bit_p (processed, v))
1230 continue;
1232 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1233 bitmap_ior_into (processed, partition->stmts);
1235 if (dump_file && (dump_flags & TDF_DETAILS))
1237 fprintf (dump_file, "ldist useful partition:\n");
1238 dump_bitmap (dump_file, partition->stmts);
1241 partitions->safe_push (partition);
1244 /* All vertices should have been assigned to at least one partition now,
1245 other than vertices belonging to dead code. */
1247 BITMAP_FREE (processed);
1250 /* Dump to FILE the PARTITIONS. */
1252 static void
1253 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1255 int i;
1256 partition_t partition;
1258 FOR_EACH_VEC_ELT (partitions, i, partition)
1259 debug_bitmap_file (file, partition->stmts);
1262 /* Debug PARTITIONS. */
1263 extern void debug_rdg_partitions (vec<partition_t> );
1265 DEBUG_FUNCTION void
1266 debug_rdg_partitions (vec<partition_t> partitions)
1268 dump_rdg_partitions (stderr, partitions);
1271 /* Returns the number of read and write operations in the RDG. */
1273 static int
1274 number_of_rw_in_rdg (struct graph *rdg)
1276 int i, res = 0;
1278 for (i = 0; i < rdg->n_vertices; i++)
1280 if (RDG_MEM_WRITE_STMT (rdg, i))
1281 ++res;
1283 if (RDG_MEM_READS_STMT (rdg, i))
1284 ++res;
1287 return res;
1290 /* Returns the number of read and write operations in a PARTITION of
1291 the RDG. */
1293 static int
1294 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1296 int res = 0;
1297 unsigned i;
1298 bitmap_iterator ii;
1300 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1302 if (RDG_MEM_WRITE_STMT (rdg, i))
1303 ++res;
1305 if (RDG_MEM_READS_STMT (rdg, i))
1306 ++res;
1309 return res;
1312 /* Returns true when one of the PARTITIONS contains all the read or
1313 write operations of RDG. */
1315 static bool
1316 partition_contains_all_rw (struct graph *rdg,
1317 vec<partition_t> partitions)
1319 int i;
1320 partition_t partition;
1321 int nrw = number_of_rw_in_rdg (rdg);
1323 FOR_EACH_VEC_ELT (partitions, i, partition)
1324 if (nrw == number_of_rw_in_partition (rdg, partition))
1325 return true;
1327 return false;
1330 /* Compute partition dependence created by the data references in DRS1
1331 and DRS2 and modify and return DIR according to that. */
1333 static int
1334 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1335 vec<data_reference_p> drs1,
1336 vec<data_reference_p> drs2)
1338 data_reference_p dr1, dr2;
1340 /* dependence direction - 0 is no dependence, -1 is back,
1341 1 is forth, 2 is both (we can stop then, merging will occur). */
1342 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1343 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1345 data_reference_p saved_dr1 = dr1;
1346 int this_dir = 1;
1347 ddr_p ddr;
1348 /* Re-shuffle data-refs to be in dominator order. */
1349 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1350 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1352 std::swap (dr1, dr2);
1353 this_dir = -this_dir;
1355 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1356 compute_affine_dependence (ddr, loops[0]);
1357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1358 this_dir = 2;
1359 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1361 if (DDR_REVERSED_P (ddr))
1363 std::swap (dr1, dr2);
1364 this_dir = -this_dir;
1366 /* Known dependences can still be unordered througout the
1367 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1368 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1369 this_dir = 2;
1370 /* If the overlap is exact preserve stmt order. */
1371 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1373 else
1375 /* Else as the distance vector is lexicographic positive
1376 swap the dependence direction. */
1377 this_dir = -this_dir;
1380 else
1381 this_dir = 0;
1382 free_dependence_relation (ddr);
1383 if (dir == 0)
1384 dir = this_dir;
1385 else if (dir != this_dir)
1386 return 2;
1387 /* Shuffle "back" dr1. */
1388 dr1 = saved_dr1;
1390 return dir;
1393 /* Compare postorder number of the partition graph vertices V1 and V2. */
1395 static int
1396 pgcmp (const void *v1_, const void *v2_)
1398 const vertex *v1 = (const vertex *)v1_;
1399 const vertex *v2 = (const vertex *)v2_;
1400 return v2->post - v1->post;
1403 /* Distributes the code from LOOP in such a way that producer
1404 statements are placed before consumer statements. Tries to separate
1405 only the statements from STMTS into separate loops.
1406 Returns the number of distributed loops. */
1408 static int
1409 distribute_loop (struct loop *loop, vec<gimple> stmts,
1410 control_dependences *cd, int *nb_calls)
1412 struct graph *rdg;
1413 partition_t partition;
1414 bool any_builtin;
1415 int i, nbp;
1416 graph *pg = NULL;
1417 int num_sccs = 1;
1419 *nb_calls = 0;
1420 auto_vec<loop_p, 3> loop_nest;
1421 if (!find_loop_nest (loop, &loop_nest))
1422 return 0;
1424 rdg = build_rdg (loop_nest, cd);
1425 if (!rdg)
1427 if (dump_file && (dump_flags & TDF_DETAILS))
1428 fprintf (dump_file,
1429 "Loop %d not distributed: failed to build the RDG.\n",
1430 loop->num);
1432 return 0;
1435 if (dump_file && (dump_flags & TDF_DETAILS))
1436 dump_rdg (dump_file, rdg);
1438 auto_vec<partition_t, 3> partitions;
1439 rdg_build_partitions (rdg, stmts, &partitions);
1441 any_builtin = false;
1442 FOR_EACH_VEC_ELT (partitions, i, partition)
1444 classify_partition (loop, rdg, partition);
1445 any_builtin |= partition_builtin_p (partition);
1448 /* If we are only distributing patterns but did not detect any,
1449 simply bail out. */
1450 if (!flag_tree_loop_distribution
1451 && !any_builtin)
1453 nbp = 0;
1454 goto ldist_done;
1457 /* If we are only distributing patterns fuse all partitions that
1458 were not classified as builtins. This also avoids chopping
1459 a loop into pieces, separated by builtin calls. That is, we
1460 only want no or a single loop body remaining. */
1461 partition_t into;
1462 if (!flag_tree_loop_distribution)
1464 for (i = 0; partitions.iterate (i, &into); ++i)
1465 if (!partition_builtin_p (into))
1466 break;
1467 for (++i; partitions.iterate (i, &partition); ++i)
1468 if (!partition_builtin_p (partition))
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1472 fprintf (dump_file, "fusing non-builtin partitions\n");
1473 dump_bitmap (dump_file, into->stmts);
1474 dump_bitmap (dump_file, partition->stmts);
1476 partition_merge_into (into, partition);
1477 partitions.unordered_remove (i);
1478 partition_free (partition);
1479 i--;
1483 /* Due to limitations in the transform phase we have to fuse all
1484 reduction partitions into the last partition so the existing
1485 loop will contain all loop-closed PHI nodes. */
1486 for (i = 0; partitions.iterate (i, &into); ++i)
1487 if (partition_reduction_p (into))
1488 break;
1489 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1490 if (partition_reduction_p (partition))
1492 if (dump_file && (dump_flags & TDF_DETAILS))
1494 fprintf (dump_file, "fusing partitions\n");
1495 dump_bitmap (dump_file, into->stmts);
1496 dump_bitmap (dump_file, partition->stmts);
1497 fprintf (dump_file, "because they have reductions\n");
1499 partition_merge_into (into, partition);
1500 partitions.unordered_remove (i);
1501 partition_free (partition);
1502 i--;
1505 /* Apply our simple cost model - fuse partitions with similar
1506 memory accesses. */
1507 for (i = 0; partitions.iterate (i, &into); ++i)
1509 if (partition_builtin_p (into))
1510 continue;
1511 for (int j = i + 1;
1512 partitions.iterate (j, &partition); ++j)
1514 if (!partition_builtin_p (partition)
1515 && similar_memory_accesses (rdg, into, partition))
1517 if (dump_file && (dump_flags & TDF_DETAILS))
1519 fprintf (dump_file, "fusing partitions\n");
1520 dump_bitmap (dump_file, into->stmts);
1521 dump_bitmap (dump_file, partition->stmts);
1522 fprintf (dump_file, "because they have similar "
1523 "memory accesses\n");
1525 partition_merge_into (into, partition);
1526 partitions.unordered_remove (j);
1527 partition_free (partition);
1528 j--;
1533 /* Build the partition dependency graph. */
1534 if (partitions.length () > 1)
1536 pg = new_graph (partitions.length ());
1537 struct pgdata {
1538 partition_t partition;
1539 vec<data_reference_p> writes;
1540 vec<data_reference_p> reads;
1542 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1543 for (i = 0; partitions.iterate (i, &partition); ++i)
1545 vertex *v = &pg->vertices[i];
1546 pgdata *data = new pgdata;
1547 data_reference_p dr;
1548 /* FIXME - leaks. */
1549 v->data = data;
1550 bitmap_iterator bi;
1551 unsigned j;
1552 data->partition = partition;
1553 data->reads = vNULL;
1554 data->writes = vNULL;
1555 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1556 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1557 if (DR_IS_READ (dr))
1558 data->reads.safe_push (dr);
1559 else
1560 data->writes.safe_push (dr);
1562 partition_t partition1, partition2;
1563 for (i = 0; partitions.iterate (i, &partition1); ++i)
1564 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1566 /* dependence direction - 0 is no dependence, -1 is back,
1567 1 is forth, 2 is both (we can stop then, merging will occur). */
1568 int dir = 0;
1569 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1570 PGDATA(i)->writes,
1571 PGDATA(j)->reads);
1572 if (dir != 2)
1573 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1574 PGDATA(i)->reads,
1575 PGDATA(j)->writes);
1576 if (dir != 2)
1577 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1578 PGDATA(i)->writes,
1579 PGDATA(j)->writes);
1580 if (dir == 1 || dir == 2)
1581 add_edge (pg, i, j);
1582 if (dir == -1 || dir == 2)
1583 add_edge (pg, j, i);
1586 /* Add edges to the reduction partition (if any) to force it last. */
1587 unsigned j;
1588 for (j = 0; partitions.iterate (j, &partition); ++j)
1589 if (partition_reduction_p (partition))
1590 break;
1591 if (j < partitions.length ())
1593 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1594 if (i != j)
1595 add_edge (pg, i, j);
1598 /* Compute partitions we cannot separate and fuse them. */
1599 num_sccs = graphds_scc (pg, NULL);
1600 for (i = 0; i < num_sccs; ++i)
1602 partition_t first;
1603 int j;
1604 for (j = 0; partitions.iterate (j, &first); ++j)
1605 if (pg->vertices[j].component == i)
1606 break;
1607 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1608 if (pg->vertices[j].component == i)
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1612 fprintf (dump_file, "fusing partitions\n");
1613 dump_bitmap (dump_file, first->stmts);
1614 dump_bitmap (dump_file, partition->stmts);
1615 fprintf (dump_file, "because they are in the same "
1616 "dependence SCC\n");
1618 partition_merge_into (first, partition);
1619 partitions[j] = NULL;
1620 partition_free (partition);
1621 PGDATA (j)->partition = NULL;
1625 /* Now order the remaining nodes in postorder. */
1626 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1627 partitions.truncate (0);
1628 for (i = 0; i < pg->n_vertices; ++i)
1630 pgdata *data = PGDATA (i);
1631 if (data->partition)
1632 partitions.safe_push (data->partition);
1633 data->reads.release ();
1634 data->writes.release ();
1635 delete data;
1637 gcc_assert (partitions.length () == (unsigned)num_sccs);
1638 free_graph (pg);
1641 nbp = partitions.length ();
1642 if (nbp == 0
1643 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1644 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1646 nbp = 0;
1647 goto ldist_done;
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 dump_rdg_partitions (dump_file, partitions);
1653 FOR_EACH_VEC_ELT (partitions, i, partition)
1655 if (partition_builtin_p (partition))
1656 (*nb_calls)++;
1657 generate_code_for_partition (loop, partition, i < nbp - 1);
1660 ldist_done:
1662 FOR_EACH_VEC_ELT (partitions, i, partition)
1663 partition_free (partition);
1665 free_rdg (rdg);
1666 return nbp - *nb_calls;
1669 /* Distribute all loops in the current function. */
1671 namespace {
1673 const pass_data pass_data_loop_distribution =
1675 GIMPLE_PASS, /* type */
1676 "ldist", /* name */
1677 OPTGROUP_LOOP, /* optinfo_flags */
1678 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1679 ( PROP_cfg | PROP_ssa ), /* properties_required */
1680 0, /* properties_provided */
1681 0, /* properties_destroyed */
1682 0, /* todo_flags_start */
1683 0, /* todo_flags_finish */
1686 class pass_loop_distribution : public gimple_opt_pass
1688 public:
1689 pass_loop_distribution (gcc::context *ctxt)
1690 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1693 /* opt_pass methods: */
1694 virtual bool gate (function *)
1696 return flag_tree_loop_distribution
1697 || flag_tree_loop_distribute_patterns;
1700 virtual unsigned int execute (function *);
1702 }; // class pass_loop_distribution
1704 unsigned int
1705 pass_loop_distribution::execute (function *fun)
1707 struct loop *loop;
1708 bool changed = false;
1709 basic_block bb;
1710 control_dependences *cd = NULL;
1712 FOR_ALL_BB_FN (bb, fun)
1714 gimple_stmt_iterator gsi;
1715 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1716 gimple_set_uid (gsi_stmt (gsi), -1);
1717 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1718 gimple_set_uid (gsi_stmt (gsi), -1);
1721 /* We can at the moment only distribute non-nested loops, thus restrict
1722 walking to innermost loops. */
1723 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1725 auto_vec<gimple> work_list;
1726 basic_block *bbs;
1727 int num = loop->num;
1728 unsigned int i;
1730 /* If the loop doesn't have a single exit we will fail anyway,
1731 so do that early. */
1732 if (!single_exit (loop))
1733 continue;
1735 /* Only optimize hot loops. */
1736 if (!optimize_loop_for_speed_p (loop))
1737 continue;
1739 /* Initialize the worklist with stmts we seed the partitions with. */
1740 bbs = get_loop_body_in_dom_order (loop);
1741 for (i = 0; i < loop->num_nodes; ++i)
1743 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1744 !gsi_end_p (gsi);
1745 gsi_next (&gsi))
1747 gphi *phi = gsi.phi ();
1748 if (virtual_operand_p (gimple_phi_result (phi)))
1749 continue;
1750 /* Distribute stmts which have defs that are used outside of
1751 the loop. */
1752 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1753 continue;
1754 work_list.safe_push (phi);
1756 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1757 !gsi_end_p (gsi);
1758 gsi_next (&gsi))
1760 gimple stmt = gsi_stmt (gsi);
1762 /* If there is a stmt with side-effects bail out - we
1763 cannot and should not distribute this loop. */
1764 if (gimple_has_side_effects (stmt))
1766 work_list.truncate (0);
1767 goto out;
1770 /* Distribute stmts which have defs that are used outside of
1771 the loop. */
1772 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1774 /* Otherwise only distribute stores for now. */
1775 else if (!gimple_vdef (stmt))
1776 continue;
1778 work_list.safe_push (stmt);
1781 out:
1782 free (bbs);
1784 int nb_generated_loops = 0;
1785 int nb_generated_calls = 0;
1786 location_t loc = find_loop_location (loop);
1787 if (work_list.length () > 0)
1789 if (!cd)
1791 calculate_dominance_info (CDI_DOMINATORS);
1792 calculate_dominance_info (CDI_POST_DOMINATORS);
1793 cd = new control_dependences (create_edge_list ());
1794 free_dominance_info (CDI_POST_DOMINATORS);
1796 nb_generated_loops = distribute_loop (loop, work_list, cd,
1797 &nb_generated_calls);
1800 if (nb_generated_loops + nb_generated_calls > 0)
1802 changed = true;
1803 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1804 loc, "Loop %d distributed: split to %d loops "
1805 "and %d library calls.\n",
1806 num, nb_generated_loops, nb_generated_calls);
1808 else if (dump_file && (dump_flags & TDF_DETAILS))
1809 fprintf (dump_file, "Loop %d is the same.\n", num);
1812 if (cd)
1813 delete cd;
1815 if (changed)
1817 /* Cached scalar evolutions now may refer to wrong or non-existing
1818 loops. */
1819 scev_reset_htab ();
1820 mark_virtual_operands_for_renaming (fun);
1821 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1824 #ifdef ENABLE_CHECKING
1825 verify_loop_structure ();
1826 #endif
1828 return 0;
1831 } // anon namespace
1833 gimple_opt_pass *
1834 make_pass_loop_distribution (gcc::context *ctxt)
1836 return new pass_loop_distribution (ctxt);