2014-10-31 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobb5a34caef41292a0670852922eb35373e63d9e7a
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "predict.h"
49 #include "vec.h"
50 #include "hashtab.h"
51 #include "hash-set.h"
52 #include "machmode.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "input.h"
56 #include "function.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfganal.h"
60 #include "basic-block.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimple-iterator.h"
67 #include "gimplify-me.h"
68 #include "stor-layout.h"
69 #include "gimple-ssa.h"
70 #include "tree-cfg.h"
71 #include "tree-phinodes.h"
72 #include "ssa-iterators.h"
73 #include "stringpool.h"
74 #include "tree-ssanames.h"
75 #include "tree-ssa-loop-manip.h"
76 #include "tree-ssa-loop.h"
77 #include "tree-into-ssa.h"
78 #include "tree-ssa.h"
79 #include "cfgloop.h"
80 #include "tree-chrec.h"
81 #include "tree-data-ref.h"
82 #include "tree-scalar-evolution.h"
83 #include "tree-pass.h"
84 #include "gimple-pretty-print.h"
85 #include "tree-vectorizer.h"
88 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
89 typedef struct rdg_vertex
91 /* The statement represented by this vertex. */
92 gimple stmt;
94 /* Vector of data-references in this statement. */
95 vec<data_reference_p> datarefs;
97 /* True when the statement contains a write to memory. */
98 bool has_mem_write;
100 /* True when the statement contains a read from memory. */
101 bool has_mem_reads;
102 } *rdg_vertex_p;
104 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
105 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
106 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
107 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
108 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
109 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
110 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
111 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
113 /* Data dependence type. */
115 enum rdg_dep_type
117 /* Read After Write (RAW). */
118 flow_dd = 'f',
120 /* Control dependence (execute conditional on). */
121 control_dd = 'c'
124 /* Dependence information attached to an edge of the RDG. */
126 typedef struct rdg_edge
128 /* Type of the dependence. */
129 enum rdg_dep_type type;
130 } *rdg_edge_p;
132 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
134 /* Dump vertex I in RDG to FILE. */
136 static void
137 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
139 struct vertex *v = &(rdg->vertices[i]);
140 struct graph_edge *e;
142 fprintf (file, "(vertex %d: (%s%s) (in:", i,
143 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
144 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
146 if (v->pred)
147 for (e = v->pred; e; e = e->pred_next)
148 fprintf (file, " %d", e->src);
150 fprintf (file, ") (out:");
152 if (v->succ)
153 for (e = v->succ; e; e = e->succ_next)
154 fprintf (file, " %d", e->dest);
156 fprintf (file, ")\n");
157 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
158 fprintf (file, ")\n");
161 /* Call dump_rdg_vertex on stderr. */
163 DEBUG_FUNCTION void
164 debug_rdg_vertex (struct graph *rdg, int i)
166 dump_rdg_vertex (stderr, rdg, i);
169 /* Dump the reduced dependence graph RDG to FILE. */
171 static void
172 dump_rdg (FILE *file, struct graph *rdg)
174 fprintf (file, "(rdg\n");
175 for (int i = 0; i < rdg->n_vertices; i++)
176 dump_rdg_vertex (file, rdg, i);
177 fprintf (file, ")\n");
180 /* Call dump_rdg on stderr. */
182 DEBUG_FUNCTION void
183 debug_rdg (struct graph *rdg)
185 dump_rdg (stderr, rdg);
188 static void
189 dot_rdg_1 (FILE *file, struct graph *rdg)
191 int i;
192 pretty_printer buffer;
193 pp_needs_newline (&buffer) = false;
194 buffer.buffer->stream = file;
196 fprintf (file, "digraph RDG {\n");
198 for (i = 0; i < rdg->n_vertices; i++)
200 struct vertex *v = &(rdg->vertices[i]);
201 struct graph_edge *e;
203 fprintf (file, "%d [label=\"[%d] ", i, i);
204 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
205 pp_flush (&buffer);
206 fprintf (file, "\"]\n");
208 /* Highlight reads from memory. */
209 if (RDG_MEM_READS_STMT (rdg, i))
210 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
212 /* Highlight stores to memory. */
213 if (RDG_MEM_WRITE_STMT (rdg, i))
214 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
216 if (v->succ)
217 for (e = v->succ; e; e = e->succ_next)
218 switch (RDGE_TYPE (e))
220 case flow_dd:
221 /* These are the most common dependences: don't print these. */
222 fprintf (file, "%d -> %d \n", i, e->dest);
223 break;
225 case control_dd:
226 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
227 break;
229 default:
230 gcc_unreachable ();
234 fprintf (file, "}\n\n");
237 /* Display the Reduced Dependence Graph using dotty. */
239 DEBUG_FUNCTION void
240 dot_rdg (struct graph *rdg)
242 /* When debugging, you may want to enable the following code. */
243 #ifdef HAVE_POPEN
244 FILE *file = popen ("dot -Tx11", "w");
245 if (!file)
246 return;
247 dot_rdg_1 (file, rdg);
248 fflush (file);
249 close (fileno (file));
250 pclose (file);
251 #else
252 dot_rdg_1 (stderr, rdg);
253 #endif
256 /* Returns the index of STMT in RDG. */
258 static int
259 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
261 int index = gimple_uid (stmt);
262 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
263 return index;
266 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
267 the index of DEF in RDG. */
269 static void
270 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
272 use_operand_p imm_use_p;
273 imm_use_iterator iterator;
275 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
277 struct graph_edge *e;
278 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
280 if (use < 0)
281 continue;
283 e = add_edge (rdg, idef, use);
284 e->data = XNEW (struct rdg_edge);
285 RDGE_TYPE (e) = flow_dd;
289 /* Creates an edge for the control dependences of BB to the vertex V. */
291 static void
292 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
293 int v, control_dependences *cd)
295 bitmap_iterator bi;
296 unsigned edge_n;
297 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
298 0, edge_n, bi)
300 basic_block cond_bb = cd->get_edge (edge_n)->src;
301 gimple stmt = last_stmt (cond_bb);
302 if (stmt && is_ctrl_stmt (stmt))
304 struct graph_edge *e;
305 int c = rdg_vertex_for_stmt (rdg, stmt);
306 if (c < 0)
307 continue;
309 e = add_edge (rdg, c, v);
310 e->data = XNEW (struct rdg_edge);
311 RDGE_TYPE (e) = control_dd;
316 /* Creates the edges of the reduced dependence graph RDG. */
318 static void
319 create_rdg_flow_edges (struct graph *rdg)
321 int i;
322 def_operand_p def_p;
323 ssa_op_iter iter;
325 for (i = 0; i < rdg->n_vertices; i++)
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
327 iter, SSA_OP_DEF)
328 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
331 /* Creates the edges of the reduced dependence graph RDG. */
333 static void
334 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
336 int i;
338 for (i = 0; i < rdg->n_vertices; i++)
340 gimple stmt = RDG_STMT (rdg, i);
341 if (gimple_code (stmt) == GIMPLE_PHI)
343 edge_iterator ei;
344 edge e;
345 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
346 create_edge_for_control_dependence (rdg, e->src, i, cd);
348 else
349 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
353 /* Build the vertices of the reduced dependence graph RDG. Return false
354 if that failed. */
356 static bool
357 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
358 vec<data_reference_p> *datarefs)
360 int i;
361 gimple stmt;
363 FOR_EACH_VEC_ELT (stmts, i, stmt)
365 struct vertex *v = &(rdg->vertices[i]);
367 /* Record statement to vertex mapping. */
368 gimple_set_uid (stmt, i);
370 v->data = XNEW (struct rdg_vertex);
371 RDGV_STMT (v) = stmt;
372 RDGV_DATAREFS (v).create (0);
373 RDGV_HAS_MEM_WRITE (v) = false;
374 RDGV_HAS_MEM_READS (v) = false;
375 if (gimple_code (stmt) == GIMPLE_PHI)
376 continue;
378 unsigned drp = datarefs->length ();
379 if (!find_data_references_in_stmt (loop, stmt, datarefs))
380 return false;
381 for (unsigned j = drp; j < datarefs->length (); ++j)
383 data_reference_p dr = (*datarefs)[j];
384 if (DR_IS_READ (dr))
385 RDGV_HAS_MEM_READS (v) = true;
386 else
387 RDGV_HAS_MEM_WRITE (v) = true;
388 RDGV_DATAREFS (v).safe_push (dr);
391 return true;
394 /* Initialize STMTS with all the statements of LOOP. The order in
395 which we discover statements is important as
396 generate_loops_for_partition is using the same traversal for
397 identifying statements in loop copies. */
399 static void
400 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
402 unsigned int i;
403 basic_block *bbs = get_loop_body_in_dom_order (loop);
405 for (i = 0; i < loop->num_nodes; i++)
407 basic_block bb = bbs[i];
408 gimple_stmt_iterator bsi;
409 gimple stmt;
411 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
412 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
413 stmts->safe_push (gsi_stmt (bsi));
415 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
417 stmt = gsi_stmt (bsi);
418 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
419 stmts->safe_push (stmt);
423 free (bbs);
426 /* Free the reduced dependence graph RDG. */
428 static void
429 free_rdg (struct graph *rdg)
431 int i;
433 for (i = 0; i < rdg->n_vertices; i++)
435 struct vertex *v = &(rdg->vertices[i]);
436 struct graph_edge *e;
438 for (e = v->succ; e; e = e->succ_next)
439 free (e->data);
441 if (v->data)
443 gimple_set_uid (RDGV_STMT (v), -1);
444 free_data_refs (RDGV_DATAREFS (v));
445 free (v->data);
449 free_graph (rdg);
452 /* Build the Reduced Dependence Graph (RDG) with one vertex per
453 statement of the loop nest LOOP_NEST, and one edge per data dependence or
454 scalar dependence. */
456 static struct graph *
457 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
459 struct graph *rdg;
460 vec<data_reference_p> datarefs;
462 /* Create the RDG vertices from the stmts of the loop nest. */
463 auto_vec<gimple, 10> stmts;
464 stmts_from_loop (loop_nest[0], &stmts);
465 rdg = new_graph (stmts.length ());
466 datarefs.create (10);
467 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
469 datarefs.release ();
470 free_rdg (rdg);
471 return NULL;
473 stmts.release ();
475 create_rdg_flow_edges (rdg);
476 if (cd)
477 create_rdg_cd_edges (rdg, cd);
479 datarefs.release ();
481 return rdg;
486 enum partition_kind {
487 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
490 typedef struct partition_s
492 bitmap stmts;
493 bitmap loops;
494 bool reduction_p;
495 enum partition_kind kind;
496 /* data-references a kind != PKIND_NORMAL partition is about. */
497 data_reference_p main_dr;
498 data_reference_p secondary_dr;
499 tree niter;
500 bool plus_one;
501 } *partition_t;
504 /* Allocate and initialize a partition from BITMAP. */
506 static partition_t
507 partition_alloc (bitmap stmts, bitmap loops)
509 partition_t partition = XCNEW (struct partition_s);
510 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
511 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
512 partition->reduction_p = false;
513 partition->kind = PKIND_NORMAL;
514 return partition;
517 /* Free PARTITION. */
519 static void
520 partition_free (partition_t partition)
522 BITMAP_FREE (partition->stmts);
523 BITMAP_FREE (partition->loops);
524 free (partition);
527 /* Returns true if the partition can be generated as a builtin. */
529 static bool
530 partition_builtin_p (partition_t partition)
532 return partition->kind != PKIND_NORMAL;
535 /* Returns true if the partition contains a reduction. */
537 static bool
538 partition_reduction_p (partition_t partition)
540 return partition->reduction_p;
543 /* Merge PARTITION into the partition DEST. */
545 static void
546 partition_merge_into (partition_t dest, partition_t partition)
548 dest->kind = PKIND_NORMAL;
549 bitmap_ior_into (dest->stmts, partition->stmts);
550 if (partition_reduction_p (partition))
551 dest->reduction_p = true;
555 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
556 the LOOP. */
558 static bool
559 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
564 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
566 gimple use_stmt = USE_STMT (use_p);
567 if (!is_gimple_debug (use_stmt)
568 && loop != loop_containing_stmt (use_stmt))
569 return true;
572 return false;
575 /* Returns true when STMT defines a scalar variable used after the
576 loop LOOP. */
578 static bool
579 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
581 def_operand_p def_p;
582 ssa_op_iter op_iter;
584 if (gimple_code (stmt) == GIMPLE_PHI)
585 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
587 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
588 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
589 return true;
591 return false;
594 /* Return a copy of LOOP placed before LOOP. */
596 static struct loop *
597 copy_loop_before (struct loop *loop)
599 struct loop *res;
600 edge preheader = loop_preheader_edge (loop);
602 initialize_original_copy_tables ();
603 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
604 gcc_assert (res != NULL);
605 free_original_copy_tables ();
606 delete_update_ssa ();
608 return res;
611 /* Creates an empty basic block after LOOP. */
613 static void
614 create_bb_after_loop (struct loop *loop)
616 edge exit = single_exit (loop);
618 if (!exit)
619 return;
621 split_edge (exit);
624 /* Generate code for PARTITION from the code in LOOP. The loop is
625 copied when COPY_P is true. All the statements not flagged in the
626 PARTITION bitmap are removed from the loop or from its copy. The
627 statements are indexed in sequence inside a basic block, and the
628 basic blocks of a loop are taken in dom order. */
630 static void
631 generate_loops_for_partition (struct loop *loop, partition_t partition,
632 bool copy_p)
634 unsigned i;
635 gimple_stmt_iterator bsi;
636 basic_block *bbs;
638 if (copy_p)
640 loop = copy_loop_before (loop);
641 gcc_assert (loop != NULL);
642 create_preheader (loop, CP_SIMPLE_PREHEADERS);
643 create_bb_after_loop (loop);
646 /* Remove stmts not in the PARTITION bitmap. */
647 bbs = get_loop_body_in_dom_order (loop);
649 if (MAY_HAVE_DEBUG_STMTS)
650 for (i = 0; i < loop->num_nodes; i++)
652 basic_block bb = bbs[i];
654 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
656 gimple phi = gsi_stmt (bsi);
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
659 reset_debug_uses (phi);
662 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
664 gimple stmt = gsi_stmt (bsi);
665 if (gimple_code (stmt) != GIMPLE_LABEL
666 && !is_gimple_debug (stmt)
667 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
668 reset_debug_uses (stmt);
672 for (i = 0; i < loop->num_nodes; i++)
674 basic_block bb = bbs[i];
676 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
678 gimple phi = gsi_stmt (bsi);
679 if (!virtual_operand_p (gimple_phi_result (phi))
680 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
681 remove_phi_node (&bsi, true);
682 else
683 gsi_next (&bsi);
686 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
688 gimple stmt = gsi_stmt (bsi);
689 if (gimple_code (stmt) != GIMPLE_LABEL
690 && !is_gimple_debug (stmt)
691 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
693 /* Choose an arbitrary path through the empty CFG part
694 that this unnecessary control stmt controls. */
695 if (gimple_code (stmt) == GIMPLE_COND)
697 gimple_cond_make_false (stmt);
698 update_stmt (stmt);
700 else if (gimple_code (stmt) == GIMPLE_SWITCH)
702 gimple_switch_set_index
703 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
704 update_stmt (stmt);
706 else
708 unlink_stmt_vdef (stmt);
709 gsi_remove (&bsi, true);
710 release_defs (stmt);
711 continue;
714 gsi_next (&bsi);
718 free (bbs);
721 /* Build the size argument for a memory operation call. */
723 static tree
724 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
725 bool plus_one)
727 tree size = fold_convert_loc (loc, sizetype, nb_iter);
728 if (plus_one)
729 size = size_binop (PLUS_EXPR, size, size_one_node);
730 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
731 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
732 size = fold_convert_loc (loc, size_type_node, size);
733 return size;
736 /* Build an address argument for a memory operation call. */
738 static tree
739 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
741 tree addr_base;
743 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
744 addr_base = fold_convert_loc (loc, sizetype, addr_base);
746 /* Test for a negative stride, iterating over every element. */
747 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
749 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
750 fold_convert_loc (loc, sizetype, nb_bytes));
751 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
752 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
755 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
758 /* If VAL memory representation contains the same value in all bytes,
759 return that value, otherwise return -1.
760 E.g. for 0x24242424 return 0x24, for IEEE double
761 747708026454360457216.0 return 0x44, etc. */
763 static int
764 const_with_all_bytes_same (tree val)
766 unsigned char buf[64];
767 int i, len;
769 if (integer_zerop (val)
770 || real_zerop (val)
771 || (TREE_CODE (val) == CONSTRUCTOR
772 && !TREE_CLOBBER_P (val)
773 && CONSTRUCTOR_NELTS (val) == 0))
774 return 0;
776 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
777 return -1;
779 len = native_encode_expr (val, buf, sizeof (buf));
780 if (len == 0)
781 return -1;
782 for (i = 1; i < len; i++)
783 if (buf[i] != buf[0])
784 return -1;
785 return buf[0];
788 /* Generate a call to memset for PARTITION in LOOP. */
790 static void
791 generate_memset_builtin (struct loop *loop, partition_t partition)
793 gimple_stmt_iterator gsi;
794 gimple stmt, fn_call;
795 tree mem, fn, nb_bytes;
796 location_t loc;
797 tree val;
799 stmt = DR_STMT (partition->main_dr);
800 loc = gimple_location (stmt);
802 /* The new statements will be placed before LOOP. */
803 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
805 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
806 partition->plus_one);
807 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
808 false, GSI_CONTINUE_LINKING);
809 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
810 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
811 false, GSI_CONTINUE_LINKING);
813 /* This exactly matches the pattern recognition in classify_partition. */
814 val = gimple_assign_rhs1 (stmt);
815 /* Handle constants like 0x15151515 and similarly
816 floating point constants etc. where all bytes are the same. */
817 int bytev = const_with_all_bytes_same (val);
818 if (bytev != -1)
819 val = build_int_cst (integer_type_node, bytev);
820 else if (TREE_CODE (val) == INTEGER_CST)
821 val = fold_convert (integer_type_node, val);
822 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
824 gimple cstmt;
825 tree tem = make_ssa_name (integer_type_node, NULL);
826 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
827 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
828 val = tem;
831 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
832 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
833 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
835 if (dump_file && (dump_flags & TDF_DETAILS))
837 fprintf (dump_file, "generated memset");
838 if (bytev == 0)
839 fprintf (dump_file, " zero\n");
840 else
841 fprintf (dump_file, "\n");
845 /* Generate a call to memcpy for PARTITION in LOOP. */
847 static void
848 generate_memcpy_builtin (struct loop *loop, partition_t partition)
850 gimple_stmt_iterator gsi;
851 gimple stmt, fn_call;
852 tree dest, src, fn, nb_bytes;
853 location_t loc;
854 enum built_in_function kind;
856 stmt = DR_STMT (partition->main_dr);
857 loc = gimple_location (stmt);
859 /* The new statements will be placed before LOOP. */
860 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
862 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
863 partition->plus_one);
864 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
865 false, GSI_CONTINUE_LINKING);
866 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
867 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
868 if (ptr_derefs_may_alias_p (dest, src))
869 kind = BUILT_IN_MEMMOVE;
870 else
871 kind = BUILT_IN_MEMCPY;
873 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
874 false, GSI_CONTINUE_LINKING);
875 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
876 false, GSI_CONTINUE_LINKING);
877 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
878 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
879 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 if (kind == BUILT_IN_MEMCPY)
884 fprintf (dump_file, "generated memcpy\n");
885 else
886 fprintf (dump_file, "generated memmove\n");
890 /* Remove and destroy the loop LOOP. */
892 static void
893 destroy_loop (struct loop *loop)
895 unsigned nbbs = loop->num_nodes;
896 edge exit = single_exit (loop);
897 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
898 basic_block *bbs;
899 unsigned i;
901 bbs = get_loop_body_in_dom_order (loop);
903 redirect_edge_pred (exit, src);
904 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
905 exit->flags |= EDGE_FALLTHRU;
906 cancel_loop_tree (loop);
907 rescan_loop_exit (exit, false, true);
909 for (i = 0; i < nbbs; i++)
911 /* We have made sure to not leave any dangling uses of SSA
912 names defined in the loop. With the exception of virtuals.
913 Make sure we replace all uses of virtual defs that will remain
914 outside of the loop with the bare symbol as delete_basic_block
915 will release them. */
916 gimple_stmt_iterator gsi;
917 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
919 gimple phi = gsi_stmt (gsi);
920 if (virtual_operand_p (gimple_phi_result (phi)))
921 mark_virtual_phi_result_for_renaming (phi);
923 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
925 gimple stmt = gsi_stmt (gsi);
926 tree vdef = gimple_vdef (stmt);
927 if (vdef && TREE_CODE (vdef) == SSA_NAME)
928 mark_virtual_operand_for_renaming (vdef);
930 delete_basic_block (bbs[i]);
932 free (bbs);
934 set_immediate_dominator (CDI_DOMINATORS, dest,
935 recompute_dominator (CDI_DOMINATORS, dest));
938 /* Generates code for PARTITION. */
940 static void
941 generate_code_for_partition (struct loop *loop,
942 partition_t partition, bool copy_p)
944 switch (partition->kind)
946 case PKIND_NORMAL:
947 /* Reductions all have to be in the last partition. */
948 gcc_assert (!partition_reduction_p (partition)
949 || !copy_p);
950 generate_loops_for_partition (loop, partition, copy_p);
951 return;
953 case PKIND_MEMSET:
954 generate_memset_builtin (loop, partition);
955 break;
957 case PKIND_MEMCPY:
958 generate_memcpy_builtin (loop, partition);
959 break;
961 default:
962 gcc_unreachable ();
965 /* Common tail for partitions we turn into a call. If this was the last
966 partition for which we generate code, we have to destroy the loop. */
967 if (!copy_p)
968 destroy_loop (loop);
972 /* Returns a partition with all the statements needed for computing
973 the vertex V of the RDG, also including the loop exit conditions. */
975 static partition_t
976 build_rdg_partition_for_vertex (struct graph *rdg, int v)
978 partition_t partition = partition_alloc (NULL, NULL);
979 auto_vec<int, 3> nodes;
980 unsigned i;
981 int x;
983 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
985 FOR_EACH_VEC_ELT (nodes, i, x)
987 bitmap_set_bit (partition->stmts, x);
988 bitmap_set_bit (partition->loops,
989 loop_containing_stmt (RDG_STMT (rdg, x))->num);
992 return partition;
995 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
996 For the moment we detect only the memset zero pattern. */
998 static void
999 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1001 bitmap_iterator bi;
1002 unsigned i;
1003 tree nb_iter;
1004 data_reference_p single_load, single_store;
1005 bool volatiles_p = false;
1006 bool plus_one = false;
1008 partition->kind = PKIND_NORMAL;
1009 partition->main_dr = NULL;
1010 partition->secondary_dr = NULL;
1011 partition->niter = NULL_TREE;
1012 partition->plus_one = false;
1014 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1016 gimple stmt = RDG_STMT (rdg, i);
1018 if (gimple_has_volatile_ops (stmt))
1019 volatiles_p = true;
1021 /* If the stmt has uses outside of the loop mark it as reduction. */
1022 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1024 partition->reduction_p = true;
1025 return;
1029 /* Perform general partition disqualification for builtins. */
1030 if (volatiles_p
1031 || !flag_tree_loop_distribute_patterns)
1032 return;
1034 /* Detect memset and memcpy. */
1035 single_load = NULL;
1036 single_store = NULL;
1037 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1039 gimple stmt = RDG_STMT (rdg, i);
1040 data_reference_p dr;
1041 unsigned j;
1043 if (gimple_code (stmt) == GIMPLE_PHI)
1044 continue;
1046 /* Any scalar stmts are ok. */
1047 if (!gimple_vuse (stmt))
1048 continue;
1050 /* Otherwise just regular loads/stores. */
1051 if (!gimple_assign_single_p (stmt))
1052 return;
1054 /* But exactly one store and/or load. */
1055 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1057 if (DR_IS_READ (dr))
1059 if (single_load != NULL)
1060 return;
1061 single_load = dr;
1063 else
1065 if (single_store != NULL)
1066 return;
1067 single_store = dr;
1072 if (!single_store)
1073 return;
1075 nb_iter = number_of_latch_executions (loop);
1076 if (!nb_iter || nb_iter == chrec_dont_know)
1077 return;
1078 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1079 gimple_bb (DR_STMT (single_store))))
1080 plus_one = true;
1082 if (single_store && !single_load)
1084 gimple stmt = DR_STMT (single_store);
1085 tree rhs = gimple_assign_rhs1 (stmt);
1086 if (const_with_all_bytes_same (rhs) == -1
1087 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1088 || (TYPE_MODE (TREE_TYPE (rhs))
1089 != TYPE_MODE (unsigned_char_type_node))))
1090 return;
1091 if (TREE_CODE (rhs) == SSA_NAME
1092 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1093 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1094 return;
1095 if (!adjacent_dr_p (single_store)
1096 || !dominated_by_p (CDI_DOMINATORS,
1097 loop->latch, gimple_bb (stmt)))
1098 return;
1099 partition->kind = PKIND_MEMSET;
1100 partition->main_dr = single_store;
1101 partition->niter = nb_iter;
1102 partition->plus_one = plus_one;
1104 else if (single_store && single_load)
1106 gimple store = DR_STMT (single_store);
1107 gimple load = DR_STMT (single_load);
1108 /* Direct aggregate copy or via an SSA name temporary. */
1109 if (load != store
1110 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1111 return;
1112 if (!adjacent_dr_p (single_store)
1113 || !adjacent_dr_p (single_load)
1114 || !operand_equal_p (DR_STEP (single_store),
1115 DR_STEP (single_load), 0)
1116 || !dominated_by_p (CDI_DOMINATORS,
1117 loop->latch, gimple_bb (store)))
1118 return;
1119 /* Now check that if there is a dependence this dependence is
1120 of a suitable form for memmove. */
1121 vec<loop_p> loops = vNULL;
1122 ddr_p ddr;
1123 loops.safe_push (loop);
1124 ddr = initialize_data_dependence_relation (single_load, single_store,
1125 loops);
1126 compute_affine_dependence (ddr, loop);
1127 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1129 free_dependence_relation (ddr);
1130 loops.release ();
1131 return;
1133 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1135 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1137 free_dependence_relation (ddr);
1138 loops.release ();
1139 return;
1141 lambda_vector dist_v;
1142 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1144 int dist = dist_v[index_in_loop_nest (loop->num,
1145 DDR_LOOP_NEST (ddr))];
1146 if (dist > 0 && !DDR_REVERSED_P (ddr))
1148 free_dependence_relation (ddr);
1149 loops.release ();
1150 return;
1154 free_dependence_relation (ddr);
1155 loops.release ();
1156 partition->kind = PKIND_MEMCPY;
1157 partition->main_dr = single_store;
1158 partition->secondary_dr = single_load;
1159 partition->niter = nb_iter;
1160 partition->plus_one = plus_one;
1164 /* For a data reference REF, return the declaration of its base
1165 address or NULL_TREE if the base is not determined. */
1167 static tree
1168 ref_base_address (data_reference_p dr)
1170 tree base_address = DR_BASE_ADDRESS (dr);
1171 if (base_address
1172 && TREE_CODE (base_address) == ADDR_EXPR)
1173 return TREE_OPERAND (base_address, 0);
1175 return base_address;
1178 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1179 accesses in RDG. */
1181 static bool
1182 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1183 partition_t partition2)
1185 unsigned i, j, k, l;
1186 bitmap_iterator bi, bj;
1187 data_reference_p ref1, ref2;
1189 /* First check whether in the intersection of the two partitions are
1190 any loads or stores. Common loads are the situation that happens
1191 most often. */
1192 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1193 if (RDG_MEM_WRITE_STMT (rdg, i)
1194 || RDG_MEM_READS_STMT (rdg, i))
1195 return true;
1197 /* Then check all data-references against each other. */
1198 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1199 if (RDG_MEM_WRITE_STMT (rdg, i)
1200 || RDG_MEM_READS_STMT (rdg, i))
1201 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1202 if (RDG_MEM_WRITE_STMT (rdg, j)
1203 || RDG_MEM_READS_STMT (rdg, j))
1205 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1207 tree base1 = ref_base_address (ref1);
1208 if (base1)
1209 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1210 if (base1 == ref_base_address (ref2))
1211 return true;
1215 return false;
1218 /* Aggregate several components into a useful partition that is
1219 registered in the PARTITIONS vector. Partitions will be
1220 distributed in different loops. */
1222 static void
1223 rdg_build_partitions (struct graph *rdg,
1224 vec<gimple> starting_stmts,
1225 vec<partition_t> *partitions)
1227 bitmap processed = BITMAP_ALLOC (NULL);
1228 int i;
1229 gimple stmt;
1231 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1233 int v = rdg_vertex_for_stmt (rdg, stmt);
1235 if (dump_file && (dump_flags & TDF_DETAILS))
1236 fprintf (dump_file,
1237 "ldist asked to generate code for vertex %d\n", v);
1239 /* If the vertex is already contained in another partition so
1240 is the partition rooted at it. */
1241 if (bitmap_bit_p (processed, v))
1242 continue;
1244 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1245 bitmap_ior_into (processed, partition->stmts);
1247 if (dump_file && (dump_flags & TDF_DETAILS))
1249 fprintf (dump_file, "ldist useful partition:\n");
1250 dump_bitmap (dump_file, partition->stmts);
1253 partitions->safe_push (partition);
1256 /* All vertices should have been assigned to at least one partition now,
1257 other than vertices belonging to dead code. */
1259 BITMAP_FREE (processed);
1262 /* Dump to FILE the PARTITIONS. */
1264 static void
1265 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1267 int i;
1268 partition_t partition;
1270 FOR_EACH_VEC_ELT (partitions, i, partition)
1271 debug_bitmap_file (file, partition->stmts);
1274 /* Debug PARTITIONS. */
1275 extern void debug_rdg_partitions (vec<partition_t> );
1277 DEBUG_FUNCTION void
1278 debug_rdg_partitions (vec<partition_t> partitions)
1280 dump_rdg_partitions (stderr, partitions);
1283 /* Returns the number of read and write operations in the RDG. */
1285 static int
1286 number_of_rw_in_rdg (struct graph *rdg)
1288 int i, res = 0;
1290 for (i = 0; i < rdg->n_vertices; i++)
1292 if (RDG_MEM_WRITE_STMT (rdg, i))
1293 ++res;
1295 if (RDG_MEM_READS_STMT (rdg, i))
1296 ++res;
1299 return res;
1302 /* Returns the number of read and write operations in a PARTITION of
1303 the RDG. */
1305 static int
1306 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1308 int res = 0;
1309 unsigned i;
1310 bitmap_iterator ii;
1312 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1314 if (RDG_MEM_WRITE_STMT (rdg, i))
1315 ++res;
1317 if (RDG_MEM_READS_STMT (rdg, i))
1318 ++res;
1321 return res;
1324 /* Returns true when one of the PARTITIONS contains all the read or
1325 write operations of RDG. */
1327 static bool
1328 partition_contains_all_rw (struct graph *rdg,
1329 vec<partition_t> partitions)
1331 int i;
1332 partition_t partition;
1333 int nrw = number_of_rw_in_rdg (rdg);
1335 FOR_EACH_VEC_ELT (partitions, i, partition)
1336 if (nrw == number_of_rw_in_partition (rdg, partition))
1337 return true;
1339 return false;
1342 /* Compute partition dependence created by the data references in DRS1
1343 and DRS2 and modify and return DIR according to that. */
1345 static int
1346 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1347 vec<data_reference_p> drs1,
1348 vec<data_reference_p> drs2)
1350 data_reference_p dr1, dr2;
1352 /* dependence direction - 0 is no dependence, -1 is back,
1353 1 is forth, 2 is both (we can stop then, merging will occur). */
1354 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1355 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1357 int this_dir = 1;
1358 ddr_p ddr;
1359 /* Re-shuffle data-refs to be in dominator order. */
1360 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1361 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1363 data_reference_p tem = dr1;
1364 dr1 = dr2;
1365 dr2 = tem;
1366 this_dir = -this_dir;
1368 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1369 compute_affine_dependence (ddr, loops[0]);
1370 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1371 this_dir = 2;
1372 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1374 if (DDR_REVERSED_P (ddr))
1376 data_reference_p tem = dr1;
1377 dr1 = dr2;
1378 dr2 = tem;
1379 this_dir = -this_dir;
1381 /* Known dependences can still be unordered througout the
1382 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1383 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1384 this_dir = 2;
1385 /* If the overlap is exact preserve stmt order. */
1386 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1388 else
1390 /* Else as the distance vector is lexicographic positive
1391 swap the dependence direction. */
1392 this_dir = -this_dir;
1395 else
1396 this_dir = 0;
1397 free_dependence_relation (ddr);
1398 if (dir == 0)
1399 dir = this_dir;
1400 else if (dir != this_dir)
1401 return 2;
1403 return dir;
1406 /* Compare postorder number of the partition graph vertices V1 and V2. */
1408 static int
1409 pgcmp (const void *v1_, const void *v2_)
1411 const vertex *v1 = (const vertex *)v1_;
1412 const vertex *v2 = (const vertex *)v2_;
1413 return v2->post - v1->post;
1416 /* Distributes the code from LOOP in such a way that producer
1417 statements are placed before consumer statements. Tries to separate
1418 only the statements from STMTS into separate loops.
1419 Returns the number of distributed loops. */
1421 static int
1422 distribute_loop (struct loop *loop, vec<gimple> stmts,
1423 control_dependences *cd, int *nb_calls)
1425 struct graph *rdg;
1426 partition_t partition;
1427 bool any_builtin;
1428 int i, nbp;
1429 graph *pg = NULL;
1430 int num_sccs = 1;
1432 *nb_calls = 0;
1433 auto_vec<loop_p, 3> loop_nest;
1434 if (!find_loop_nest (loop, &loop_nest))
1435 return 0;
1437 rdg = build_rdg (loop_nest, cd);
1438 if (!rdg)
1440 if (dump_file && (dump_flags & TDF_DETAILS))
1441 fprintf (dump_file,
1442 "Loop %d not distributed: failed to build the RDG.\n",
1443 loop->num);
1445 return 0;
1448 if (dump_file && (dump_flags & TDF_DETAILS))
1449 dump_rdg (dump_file, rdg);
1451 auto_vec<partition_t, 3> partitions;
1452 rdg_build_partitions (rdg, stmts, &partitions);
1454 any_builtin = false;
1455 FOR_EACH_VEC_ELT (partitions, i, partition)
1457 classify_partition (loop, rdg, partition);
1458 any_builtin |= partition_builtin_p (partition);
1461 /* If we are only distributing patterns but did not detect any,
1462 simply bail out. */
1463 if (!flag_tree_loop_distribution
1464 && !any_builtin)
1466 nbp = 0;
1467 goto ldist_done;
1470 /* If we are only distributing patterns fuse all partitions that
1471 were not classified as builtins. This also avoids chopping
1472 a loop into pieces, separated by builtin calls. That is, we
1473 only want no or a single loop body remaining. */
1474 partition_t into;
1475 if (!flag_tree_loop_distribution)
1477 for (i = 0; partitions.iterate (i, &into); ++i)
1478 if (!partition_builtin_p (into))
1479 break;
1480 for (++i; partitions.iterate (i, &partition); ++i)
1481 if (!partition_builtin_p (partition))
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1485 fprintf (dump_file, "fusing non-builtin partitions\n");
1486 dump_bitmap (dump_file, into->stmts);
1487 dump_bitmap (dump_file, partition->stmts);
1489 partition_merge_into (into, partition);
1490 partitions.unordered_remove (i);
1491 partition_free (partition);
1492 i--;
1496 /* Due to limitations in the transform phase we have to fuse all
1497 reduction partitions into the last partition so the existing
1498 loop will contain all loop-closed PHI nodes. */
1499 for (i = 0; partitions.iterate (i, &into); ++i)
1500 if (partition_reduction_p (into))
1501 break;
1502 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1503 if (partition_reduction_p (partition))
1505 if (dump_file && (dump_flags & TDF_DETAILS))
1507 fprintf (dump_file, "fusing partitions\n");
1508 dump_bitmap (dump_file, into->stmts);
1509 dump_bitmap (dump_file, partition->stmts);
1510 fprintf (dump_file, "because they have reductions\n");
1512 partition_merge_into (into, partition);
1513 partitions.unordered_remove (i);
1514 partition_free (partition);
1515 i--;
1518 /* Apply our simple cost model - fuse partitions with similar
1519 memory accesses. */
1520 for (i = 0; partitions.iterate (i, &into); ++i)
1522 if (partition_builtin_p (into))
1523 continue;
1524 for (int j = i + 1;
1525 partitions.iterate (j, &partition); ++j)
1527 if (!partition_builtin_p (partition)
1528 && similar_memory_accesses (rdg, into, partition))
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1532 fprintf (dump_file, "fusing partitions\n");
1533 dump_bitmap (dump_file, into->stmts);
1534 dump_bitmap (dump_file, partition->stmts);
1535 fprintf (dump_file, "because they have similar "
1536 "memory accesses\n");
1538 partition_merge_into (into, partition);
1539 partitions.unordered_remove (j);
1540 partition_free (partition);
1541 j--;
1546 /* Build the partition dependency graph. */
1547 if (partitions.length () > 1)
1549 pg = new_graph (partitions.length ());
1550 struct pgdata {
1551 partition_t partition;
1552 vec<data_reference_p> writes;
1553 vec<data_reference_p> reads;
1555 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1556 for (i = 0; partitions.iterate (i, &partition); ++i)
1558 vertex *v = &pg->vertices[i];
1559 pgdata *data = new pgdata;
1560 data_reference_p dr;
1561 /* FIXME - leaks. */
1562 v->data = data;
1563 bitmap_iterator bi;
1564 unsigned j;
1565 data->partition = partition;
1566 data->reads = vNULL;
1567 data->writes = vNULL;
1568 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1569 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1570 if (DR_IS_READ (dr))
1571 data->reads.safe_push (dr);
1572 else
1573 data->writes.safe_push (dr);
1575 partition_t partition1, partition2;
1576 for (i = 0; partitions.iterate (i, &partition1); ++i)
1577 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1579 /* dependence direction - 0 is no dependence, -1 is back,
1580 1 is forth, 2 is both (we can stop then, merging will occur). */
1581 int dir = 0;
1582 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1583 PGDATA(i)->writes,
1584 PGDATA(j)->reads);
1585 if (dir != 2)
1586 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1587 PGDATA(i)->reads,
1588 PGDATA(j)->writes);
1589 if (dir != 2)
1590 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1591 PGDATA(i)->writes,
1592 PGDATA(j)->writes);
1593 if (dir == 1 || dir == 2)
1594 add_edge (pg, i, j);
1595 if (dir == -1 || dir == 2)
1596 add_edge (pg, j, i);
1599 /* Add edges to the reduction partition (if any) to force it last. */
1600 unsigned j;
1601 for (j = 0; partitions.iterate (j, &partition); ++j)
1602 if (partition_reduction_p (partition))
1603 break;
1604 if (j < partitions.length ())
1606 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1607 if (i != j)
1608 add_edge (pg, i, j);
1611 /* Compute partitions we cannot separate and fuse them. */
1612 num_sccs = graphds_scc (pg, NULL);
1613 for (i = 0; i < num_sccs; ++i)
1615 partition_t first;
1616 int j;
1617 for (j = 0; partitions.iterate (j, &first); ++j)
1618 if (pg->vertices[j].component == i)
1619 break;
1620 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1621 if (pg->vertices[j].component == i)
1623 if (dump_file && (dump_flags & TDF_DETAILS))
1625 fprintf (dump_file, "fusing partitions\n");
1626 dump_bitmap (dump_file, first->stmts);
1627 dump_bitmap (dump_file, partition->stmts);
1628 fprintf (dump_file, "because they are in the same "
1629 "dependence SCC\n");
1631 partition_merge_into (first, partition);
1632 partitions[j] = NULL;
1633 partition_free (partition);
1634 PGDATA (j)->partition = NULL;
1638 /* Now order the remaining nodes in postorder. */
1639 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1640 partitions.truncate (0);
1641 for (i = 0; i < pg->n_vertices; ++i)
1643 pgdata *data = PGDATA (i);
1644 if (data->partition)
1645 partitions.safe_push (data->partition);
1646 data->reads.release ();
1647 data->writes.release ();
1648 delete data;
1650 gcc_assert (partitions.length () == (unsigned)num_sccs);
1651 free_graph (pg);
1654 nbp = partitions.length ();
1655 if (nbp == 0
1656 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1657 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1659 nbp = 0;
1660 goto ldist_done;
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 dump_rdg_partitions (dump_file, partitions);
1666 FOR_EACH_VEC_ELT (partitions, i, partition)
1668 if (partition_builtin_p (partition))
1669 (*nb_calls)++;
1670 generate_code_for_partition (loop, partition, i < nbp - 1);
1673 ldist_done:
1675 FOR_EACH_VEC_ELT (partitions, i, partition)
1676 partition_free (partition);
1678 free_rdg (rdg);
1679 return nbp - *nb_calls;
1682 /* Distribute all loops in the current function. */
1684 namespace {
1686 const pass_data pass_data_loop_distribution =
1688 GIMPLE_PASS, /* type */
1689 "ldist", /* name */
1690 OPTGROUP_LOOP, /* optinfo_flags */
1691 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1692 ( PROP_cfg | PROP_ssa ), /* properties_required */
1693 0, /* properties_provided */
1694 0, /* properties_destroyed */
1695 0, /* todo_flags_start */
1696 0, /* todo_flags_finish */
1699 class pass_loop_distribution : public gimple_opt_pass
1701 public:
1702 pass_loop_distribution (gcc::context *ctxt)
1703 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1706 /* opt_pass methods: */
1707 virtual bool gate (function *)
1709 return flag_tree_loop_distribution
1710 || flag_tree_loop_distribute_patterns;
1713 virtual unsigned int execute (function *);
1715 }; // class pass_loop_distribution
1717 unsigned int
1718 pass_loop_distribution::execute (function *fun)
1720 struct loop *loop;
1721 bool changed = false;
1722 basic_block bb;
1723 control_dependences *cd = NULL;
1725 FOR_ALL_BB_FN (bb, fun)
1727 gimple_stmt_iterator gsi;
1728 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1729 gimple_set_uid (gsi_stmt (gsi), -1);
1730 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1731 gimple_set_uid (gsi_stmt (gsi), -1);
1734 /* We can at the moment only distribute non-nested loops, thus restrict
1735 walking to innermost loops. */
1736 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1738 auto_vec<gimple> work_list;
1739 basic_block *bbs;
1740 int num = loop->num;
1741 unsigned int i;
1743 /* If the loop doesn't have a single exit we will fail anyway,
1744 so do that early. */
1745 if (!single_exit (loop))
1746 continue;
1748 /* Only optimize hot loops. */
1749 if (!optimize_loop_for_speed_p (loop))
1750 continue;
1752 /* Initialize the worklist with stmts we seed the partitions with. */
1753 bbs = get_loop_body_in_dom_order (loop);
1754 for (i = 0; i < loop->num_nodes; ++i)
1756 gimple_stmt_iterator gsi;
1757 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1759 gimple phi = gsi_stmt (gsi);
1760 if (virtual_operand_p (gimple_phi_result (phi)))
1761 continue;
1762 /* Distribute stmts which have defs that are used outside of
1763 the loop. */
1764 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1765 continue;
1766 work_list.safe_push (phi);
1768 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1770 gimple stmt = gsi_stmt (gsi);
1772 /* If there is a stmt with side-effects bail out - we
1773 cannot and should not distribute this loop. */
1774 if (gimple_has_side_effects (stmt))
1776 work_list.truncate (0);
1777 goto out;
1780 /* Distribute stmts which have defs that are used outside of
1781 the loop. */
1782 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1784 /* Otherwise only distribute stores for now. */
1785 else if (!gimple_vdef (stmt))
1786 continue;
1788 work_list.safe_push (stmt);
1791 out:
1792 free (bbs);
1794 int nb_generated_loops = 0;
1795 int nb_generated_calls = 0;
1796 location_t loc = find_loop_location (loop);
1797 if (work_list.length () > 0)
1799 if (!cd)
1801 calculate_dominance_info (CDI_DOMINATORS);
1802 calculate_dominance_info (CDI_POST_DOMINATORS);
1803 cd = new control_dependences (create_edge_list ());
1804 free_dominance_info (CDI_POST_DOMINATORS);
1806 nb_generated_loops = distribute_loop (loop, work_list, cd,
1807 &nb_generated_calls);
1810 if (nb_generated_loops + nb_generated_calls > 0)
1812 changed = true;
1813 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1814 loc, "Loop %d distributed: split to %d loops "
1815 "and %d library calls.\n",
1816 num, nb_generated_loops, nb_generated_calls);
1818 else if (dump_file && (dump_flags & TDF_DETAILS))
1819 fprintf (dump_file, "Loop %d is the same.\n", num);
1822 if (cd)
1823 delete cd;
1825 if (changed)
1827 mark_virtual_operands_for_renaming (fun);
1828 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1831 #ifdef ENABLE_CHECKING
1832 verify_loop_structure ();
1833 #endif
1835 return 0;
1838 } // anon namespace
1840 gimple_opt_pass *
1841 make_pass_loop_distribution (gcc::context *ctxt)
1843 return new pass_loop_distribution (ctxt);