2013-11-20 Jan-Benedict Glaw <jbglaw@lug-owl.de>
[official-gcc.git] / gcc / tree-loop-distribution.c
blob0afa52aff1e749930f6269989a2e1810d93e38ee
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
61 #include "tree-ssa.h"
62 #include "cfgloop.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "gimple-pretty-print.h"
68 #include "tree-vectorizer.h"
71 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 typedef struct rdg_vertex
74 /* The statement represented by this vertex. */
75 gimple stmt;
77 /* Vector of data-references in this statement. */
78 vec<data_reference_p> datarefs;
80 /* True when the statement contains a write to memory. */
81 bool has_mem_write;
83 /* True when the statement contains a read from memory. */
84 bool has_mem_reads;
85 } *rdg_vertex_p;
87 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
88 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
89 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
90 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
91 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
92 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
93 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
94 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
96 /* Data dependence type. */
98 enum rdg_dep_type
100 /* Read After Write (RAW). */
101 flow_dd = 'f',
103 /* Control dependence (execute conditional on). */
104 control_dd = 'c'
107 /* Dependence information attached to an edge of the RDG. */
109 typedef struct rdg_edge
111 /* Type of the dependence. */
112 enum rdg_dep_type type;
113 } *rdg_edge_p;
115 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
117 /* Dump vertex I in RDG to FILE. */
119 static void
120 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
122 struct vertex *v = &(rdg->vertices[i]);
123 struct graph_edge *e;
125 fprintf (file, "(vertex %d: (%s%s) (in:", i,
126 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
127 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
129 if (v->pred)
130 for (e = v->pred; e; e = e->pred_next)
131 fprintf (file, " %d", e->src);
133 fprintf (file, ") (out:");
135 if (v->succ)
136 for (e = v->succ; e; e = e->succ_next)
137 fprintf (file, " %d", e->dest);
139 fprintf (file, ")\n");
140 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
141 fprintf (file, ")\n");
144 /* Call dump_rdg_vertex on stderr. */
146 DEBUG_FUNCTION void
147 debug_rdg_vertex (struct graph *rdg, int i)
149 dump_rdg_vertex (stderr, rdg, i);
152 /* Dump the reduced dependence graph RDG to FILE. */
154 static void
155 dump_rdg (FILE *file, struct graph *rdg)
157 fprintf (file, "(rdg\n");
158 for (int i = 0; i < rdg->n_vertices; i++)
159 dump_rdg_vertex (file, rdg, i);
160 fprintf (file, ")\n");
163 /* Call dump_rdg on stderr. */
165 DEBUG_FUNCTION void
166 debug_rdg (struct graph *rdg)
168 dump_rdg (stderr, rdg);
171 static void
172 dot_rdg_1 (FILE *file, struct graph *rdg)
174 int i;
175 pretty_printer buffer;
176 pp_needs_newline (&buffer) = false;
177 buffer.buffer->stream = file;
179 fprintf (file, "digraph RDG {\n");
181 for (i = 0; i < rdg->n_vertices; i++)
183 struct vertex *v = &(rdg->vertices[i]);
184 struct graph_edge *e;
186 fprintf (file, "%d [label=\"[%d] ", i, i);
187 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
188 pp_flush (&buffer);
189 fprintf (file, "\"]\n");
191 /* Highlight reads from memory. */
192 if (RDG_MEM_READS_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
195 /* Highlight stores to memory. */
196 if (RDG_MEM_WRITE_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
199 if (v->succ)
200 for (e = v->succ; e; e = e->succ_next)
201 switch (RDGE_TYPE (e))
203 case flow_dd:
204 /* These are the most common dependences: don't print these. */
205 fprintf (file, "%d -> %d \n", i, e->dest);
206 break;
208 case control_dd:
209 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
210 break;
212 default:
213 gcc_unreachable ();
217 fprintf (file, "}\n\n");
220 /* Display the Reduced Dependence Graph using dotty. */
222 DEBUG_FUNCTION void
223 dot_rdg (struct graph *rdg)
225 /* When debugging, you may want to enable the following code. */
226 #if 1
227 FILE *file = popen ("dot -Tx11", "w");
228 if (!file)
229 return;
230 dot_rdg_1 (file, rdg);
231 fflush (file);
232 close (fileno (file));
233 pclose (file);
234 #else
235 dot_rdg_1 (stderr, rdg);
236 #endif
239 /* Returns the index of STMT in RDG. */
241 static int
242 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
244 int index = gimple_uid (stmt);
245 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
246 return index;
249 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
250 the index of DEF in RDG. */
252 static void
253 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
255 use_operand_p imm_use_p;
256 imm_use_iterator iterator;
258 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
260 struct graph_edge *e;
261 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
263 if (use < 0)
264 continue;
266 e = add_edge (rdg, idef, use);
267 e->data = XNEW (struct rdg_edge);
268 RDGE_TYPE (e) = flow_dd;
272 /* Creates an edge for the control dependences of BB to the vertex V. */
274 static void
275 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
276 int v, control_dependences *cd)
278 bitmap_iterator bi;
279 unsigned edge_n;
280 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
281 0, edge_n, bi)
283 basic_block cond_bb = cd->get_edge (edge_n)->src;
284 gimple stmt = last_stmt (cond_bb);
285 if (stmt && is_ctrl_stmt (stmt))
287 struct graph_edge *e;
288 int c = rdg_vertex_for_stmt (rdg, stmt);
289 if (c < 0)
290 continue;
292 e = add_edge (rdg, c, v);
293 e->data = XNEW (struct rdg_edge);
294 RDGE_TYPE (e) = control_dd;
299 /* Creates the edges of the reduced dependence graph RDG. */
301 static void
302 create_rdg_flow_edges (struct graph *rdg)
304 int i;
305 def_operand_p def_p;
306 ssa_op_iter iter;
308 for (i = 0; i < rdg->n_vertices; i++)
309 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
310 iter, SSA_OP_DEF)
311 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
314 /* Creates the edges of the reduced dependence graph RDG. */
316 static void
317 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
319 int i;
321 for (i = 0; i < rdg->n_vertices; i++)
323 gimple stmt = RDG_STMT (rdg, i);
324 if (gimple_code (stmt) == GIMPLE_PHI)
326 edge_iterator ei;
327 edge e;
328 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
329 create_edge_for_control_dependence (rdg, e->src, i, cd);
331 else
332 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
336 /* Build the vertices of the reduced dependence graph RDG. Return false
337 if that failed. */
339 static bool
340 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
341 vec<data_reference_p> *datarefs)
343 int i;
344 gimple stmt;
346 FOR_EACH_VEC_ELT (stmts, i, stmt)
348 struct vertex *v = &(rdg->vertices[i]);
350 /* Record statement to vertex mapping. */
351 gimple_set_uid (stmt, i);
353 v->data = XNEW (struct rdg_vertex);
354 RDGV_STMT (v) = stmt;
355 RDGV_DATAREFS (v).create (0);
356 RDGV_HAS_MEM_WRITE (v) = false;
357 RDGV_HAS_MEM_READS (v) = false;
358 if (gimple_code (stmt) == GIMPLE_PHI)
359 continue;
361 unsigned drp = datarefs->length ();
362 if (!find_data_references_in_stmt (loop, stmt, datarefs))
363 return false;
364 for (unsigned j = drp; j < datarefs->length (); ++j)
366 data_reference_p dr = (*datarefs)[j];
367 if (DR_IS_READ (dr))
368 RDGV_HAS_MEM_READS (v) = true;
369 else
370 RDGV_HAS_MEM_WRITE (v) = true;
371 RDGV_DATAREFS (v).safe_push (dr);
374 return true;
377 /* Initialize STMTS with all the statements of LOOP. The order in
378 which we discover statements is important as
379 generate_loops_for_partition is using the same traversal for
380 identifying statements in loop copies. */
382 static void
383 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
385 unsigned int i;
386 basic_block *bbs = get_loop_body_in_dom_order (loop);
388 for (i = 0; i < loop->num_nodes; i++)
390 basic_block bb = bbs[i];
391 gimple_stmt_iterator bsi;
392 gimple stmt;
394 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
395 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
396 stmts->safe_push (gsi_stmt (bsi));
398 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
400 stmt = gsi_stmt (bsi);
401 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
402 stmts->safe_push (stmt);
406 free (bbs);
409 /* Free the reduced dependence graph RDG. */
411 static void
412 free_rdg (struct graph *rdg)
414 int i;
416 for (i = 0; i < rdg->n_vertices; i++)
418 struct vertex *v = &(rdg->vertices[i]);
419 struct graph_edge *e;
421 for (e = v->succ; e; e = e->succ_next)
422 free (e->data);
424 if (v->data)
426 gimple_set_uid (RDGV_STMT (v), -1);
427 free_data_refs (RDGV_DATAREFS (v));
428 free (v->data);
432 free_graph (rdg);
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436 statement of the loop nest LOOP_NEST, and one edge per data dependence or
437 scalar dependence. */
439 static struct graph *
440 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
442 struct graph *rdg;
443 vec<data_reference_p> datarefs;
445 /* Create the RDG vertices from the stmts of the loop nest. */
446 stack_vec<gimple, 10> stmts;
447 stmts_from_loop (loop_nest[0], &stmts);
448 rdg = new_graph (stmts.length ());
449 datarefs.create (10);
450 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
452 datarefs.release ();
453 free_rdg (rdg);
454 return NULL;
456 stmts.release ();
458 create_rdg_flow_edges (rdg);
459 if (cd)
460 create_rdg_cd_edges (rdg, cd);
462 datarefs.release ();
464 return rdg;
469 enum partition_kind {
470 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
473 typedef struct partition_s
475 bitmap stmts;
476 bitmap loops;
477 bool reduction_p;
478 enum partition_kind kind;
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr;
481 data_reference_p secondary_dr;
482 tree niter;
483 } *partition_t;
486 /* Allocate and initialize a partition from BITMAP. */
488 static partition_t
489 partition_alloc (bitmap stmts, bitmap loops)
491 partition_t partition = XCNEW (struct partition_s);
492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494 partition->reduction_p = false;
495 partition->kind = PKIND_NORMAL;
496 return partition;
499 /* Free PARTITION. */
501 static void
502 partition_free (partition_t partition)
504 BITMAP_FREE (partition->stmts);
505 BITMAP_FREE (partition->loops);
506 free (partition);
509 /* Returns true if the partition can be generated as a builtin. */
511 static bool
512 partition_builtin_p (partition_t partition)
514 return partition->kind != PKIND_NORMAL;
517 /* Returns true if the partition contains a reduction. */
519 static bool
520 partition_reduction_p (partition_t partition)
522 return partition->reduction_p;
525 /* Merge PARTITION into the partition DEST. */
527 static void
528 partition_merge_into (partition_t dest, partition_t partition)
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
548 gimple use_stmt = USE_STMT (use_p);
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
554 return false;
557 /* Returns true when STMT defines a scalar variable used after the
558 loop LOOP. */
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
573 return false;
576 /* Return a copy of LOOP placed before LOOP. */
578 static struct loop *
579 copy_loop_before (struct loop *loop)
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
584 initialize_original_copy_tables ();
585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
586 gcc_assert (res != NULL);
587 free_original_copy_tables ();
588 delete_update_ssa ();
590 return res;
593 /* Creates an empty basic block after LOOP. */
595 static void
596 create_bb_after_loop (struct loop *loop)
598 edge exit = single_exit (loop);
600 if (!exit)
601 return;
603 split_edge (exit);
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
612 static void
613 generate_loops_for_partition (struct loop *loop, partition_t partition,
614 bool copy_p)
616 unsigned i;
617 gimple_stmt_iterator bsi;
618 basic_block *bbs;
620 if (copy_p)
622 loop = copy_loop_before (loop);
623 gcc_assert (loop != NULL);
624 create_preheader (loop, CP_SIMPLE_PREHEADERS);
625 create_bb_after_loop (loop);
628 /* Remove stmts not in the PARTITION bitmap. */
629 bbs = get_loop_body_in_dom_order (loop);
631 if (MAY_HAVE_DEBUG_STMTS)
632 for (i = 0; i < loop->num_nodes; i++)
634 basic_block bb = bbs[i];
636 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
638 gimple phi = gsi_stmt (bsi);
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
644 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
646 gimple stmt = gsi_stmt (bsi);
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 reset_debug_uses (stmt);
654 for (i = 0; i < loop->num_nodes; i++)
656 basic_block bb = bbs[i];
658 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
660 gimple phi = gsi_stmt (bsi);
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 remove_phi_node (&bsi, true);
664 else
665 gsi_next (&bsi);
668 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
670 gimple stmt = gsi_stmt (bsi);
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gimple_code (stmt) == GIMPLE_COND)
679 gimple_cond_make_false (stmt);
680 update_stmt (stmt);
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
684 gimple_switch_set_index
685 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
686 update_stmt (stmt);
688 else
690 unlink_stmt_vdef (stmt);
691 gsi_remove (&bsi, true);
692 release_defs (stmt);
693 continue;
696 gsi_next (&bsi);
700 free (bbs);
703 /* Build the size argument for a memory operation call. */
705 static tree
706 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
708 tree size;
709 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
710 fold_convert_loc (loc, sizetype, nb_iter),
711 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
712 return fold_convert_loc (loc, size_type_node, size);
715 /* Build an address argument for a memory operation call. */
717 static tree
718 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
720 tree addr_base;
722 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
723 addr_base = fold_convert_loc (loc, sizetype, addr_base);
725 /* Test for a negative stride, iterating over every element. */
726 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
728 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
729 fold_convert_loc (loc, sizetype, nb_bytes));
730 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
731 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
734 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
737 /* If VAL memory representation contains the same value in all bytes,
738 return that value, otherwise return -1.
739 E.g. for 0x24242424 return 0x24, for IEEE double
740 747708026454360457216.0 return 0x44, etc. */
742 static int
743 const_with_all_bytes_same (tree val)
745 unsigned char buf[64];
746 int i, len;
748 if (integer_zerop (val)
749 || real_zerop (val)
750 || (TREE_CODE (val) == CONSTRUCTOR
751 && !TREE_CLOBBER_P (val)
752 && CONSTRUCTOR_NELTS (val) == 0))
753 return 0;
755 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
756 return -1;
758 len = native_encode_expr (val, buf, sizeof (buf));
759 if (len == 0)
760 return -1;
761 for (i = 1; i < len; i++)
762 if (buf[i] != buf[0])
763 return -1;
764 return buf[0];
767 /* Generate a call to memset for PARTITION in LOOP. */
769 static void
770 generate_memset_builtin (struct loop *loop, partition_t partition)
772 gimple_stmt_iterator gsi;
773 gimple stmt, fn_call;
774 tree mem, fn, nb_bytes;
775 location_t loc;
776 tree val;
778 stmt = DR_STMT (partition->main_dr);
779 loc = gimple_location (stmt);
781 /* The new statements will be placed before LOOP. */
782 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
784 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
785 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
786 false, GSI_CONTINUE_LINKING);
787 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
788 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
789 false, GSI_CONTINUE_LINKING);
791 /* This exactly matches the pattern recognition in classify_partition. */
792 val = gimple_assign_rhs1 (stmt);
793 /* Handle constants like 0x15151515 and similarly
794 floating point constants etc. where all bytes are the same. */
795 int bytev = const_with_all_bytes_same (val);
796 if (bytev != -1)
797 val = build_int_cst (integer_type_node, bytev);
798 else if (TREE_CODE (val) == INTEGER_CST)
799 val = fold_convert (integer_type_node, val);
800 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
802 gimple cstmt;
803 tree tem = make_ssa_name (integer_type_node, NULL);
804 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
805 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
806 val = tem;
809 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
810 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
811 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
813 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "generated memset");
816 if (bytev == 0)
817 fprintf (dump_file, " zero\n");
818 else
819 fprintf (dump_file, "\n");
823 /* Generate a call to memcpy for PARTITION in LOOP. */
825 static void
826 generate_memcpy_builtin (struct loop *loop, partition_t partition)
828 gimple_stmt_iterator gsi;
829 gimple stmt, fn_call;
830 tree dest, src, fn, nb_bytes;
831 location_t loc;
832 enum built_in_function kind;
834 stmt = DR_STMT (partition->main_dr);
835 loc = gimple_location (stmt);
837 /* The new statements will be placed before LOOP. */
838 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
840 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter);
841 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
842 false, GSI_CONTINUE_LINKING);
843 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
844 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
845 if (ptr_derefs_may_alias_p (dest, src))
846 kind = BUILT_IN_MEMMOVE;
847 else
848 kind = BUILT_IN_MEMCPY;
850 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
851 false, GSI_CONTINUE_LINKING);
852 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
853 false, GSI_CONTINUE_LINKING);
854 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
855 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
856 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
858 if (dump_file && (dump_flags & TDF_DETAILS))
860 if (kind == BUILT_IN_MEMCPY)
861 fprintf (dump_file, "generated memcpy\n");
862 else
863 fprintf (dump_file, "generated memmove\n");
867 /* Remove and destroy the loop LOOP. */
869 static void
870 destroy_loop (struct loop *loop)
872 unsigned nbbs = loop->num_nodes;
873 edge exit = single_exit (loop);
874 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
875 basic_block *bbs;
876 unsigned i;
878 bbs = get_loop_body_in_dom_order (loop);
880 redirect_edge_pred (exit, src);
881 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
882 exit->flags |= EDGE_FALLTHRU;
883 cancel_loop_tree (loop);
884 rescan_loop_exit (exit, false, true);
886 for (i = 0; i < nbbs; i++)
888 /* We have made sure to not leave any dangling uses of SSA
889 names defined in the loop. With the exception of virtuals.
890 Make sure we replace all uses of virtual defs that will remain
891 outside of the loop with the bare symbol as delete_basic_block
892 will release them. */
893 gimple_stmt_iterator gsi;
894 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
896 gimple phi = gsi_stmt (gsi);
897 if (virtual_operand_p (gimple_phi_result (phi)))
898 mark_virtual_phi_result_for_renaming (phi);
900 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
902 gimple stmt = gsi_stmt (gsi);
903 tree vdef = gimple_vdef (stmt);
904 if (vdef && TREE_CODE (vdef) == SSA_NAME)
905 mark_virtual_operand_for_renaming (vdef);
907 delete_basic_block (bbs[i]);
909 free (bbs);
911 set_immediate_dominator (CDI_DOMINATORS, dest,
912 recompute_dominator (CDI_DOMINATORS, dest));
915 /* Generates code for PARTITION. */
917 static void
918 generate_code_for_partition (struct loop *loop,
919 partition_t partition, bool copy_p)
921 switch (partition->kind)
923 case PKIND_NORMAL:
924 /* Reductions all have to be in the last partition. */
925 gcc_assert (!partition_reduction_p (partition)
926 || !copy_p);
927 generate_loops_for_partition (loop, partition, copy_p);
928 return;
930 case PKIND_MEMSET:
931 generate_memset_builtin (loop, partition);
932 break;
934 case PKIND_MEMCPY:
935 generate_memcpy_builtin (loop, partition);
936 break;
938 default:
939 gcc_unreachable ();
942 /* Common tail for partitions we turn into a call. If this was the last
943 partition for which we generate code, we have to destroy the loop. */
944 if (!copy_p)
945 destroy_loop (loop);
949 /* Returns a partition with all the statements needed for computing
950 the vertex V of the RDG, also including the loop exit conditions. */
952 static partition_t
953 build_rdg_partition_for_vertex (struct graph *rdg, int v)
955 partition_t partition = partition_alloc (NULL, NULL);
956 stack_vec<int, 3> nodes;
957 unsigned i;
958 int x;
960 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
962 FOR_EACH_VEC_ELT (nodes, i, x)
964 bitmap_set_bit (partition->stmts, x);
965 bitmap_set_bit (partition->loops,
966 loop_containing_stmt (RDG_STMT (rdg, x))->num);
969 return partition;
972 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
973 For the moment we detect only the memset zero pattern. */
975 static void
976 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
978 bitmap_iterator bi;
979 unsigned i;
980 tree nb_iter;
981 data_reference_p single_load, single_store;
982 bool volatiles_p = false;
984 partition->kind = PKIND_NORMAL;
985 partition->main_dr = NULL;
986 partition->secondary_dr = NULL;
987 partition->niter = NULL_TREE;
989 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
991 gimple stmt = RDG_STMT (rdg, i);
993 if (gimple_has_volatile_ops (stmt))
994 volatiles_p = true;
996 /* If the stmt has uses outside of the loop mark it as reduction. */
997 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
999 partition->reduction_p = true;
1000 return;
1004 /* Perform general partition disqualification for builtins. */
1005 if (volatiles_p
1006 || !flag_tree_loop_distribute_patterns)
1007 return;
1009 /* Detect memset and memcpy. */
1010 single_load = NULL;
1011 single_store = NULL;
1012 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1014 gimple stmt = RDG_STMT (rdg, i);
1015 data_reference_p dr;
1016 unsigned j;
1018 if (gimple_code (stmt) == GIMPLE_PHI)
1019 continue;
1021 /* Any scalar stmts are ok. */
1022 if (!gimple_vuse (stmt))
1023 continue;
1025 /* Otherwise just regular loads/stores. */
1026 if (!gimple_assign_single_p (stmt))
1027 return;
1029 /* But exactly one store and/or load. */
1030 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1032 if (DR_IS_READ (dr))
1034 if (single_load != NULL)
1035 return;
1036 single_load = dr;
1038 else
1040 if (single_store != NULL)
1041 return;
1042 single_store = dr;
1047 if (!single_store)
1048 return;
1050 if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1051 gimple_bb (DR_STMT (single_store))))
1052 nb_iter = number_of_latch_executions (loop);
1053 else
1054 nb_iter = number_of_exit_cond_executions (loop);
1055 if (!nb_iter || nb_iter == chrec_dont_know)
1056 return;
1058 if (single_store && !single_load)
1060 gimple stmt = DR_STMT (single_store);
1061 tree rhs = gimple_assign_rhs1 (stmt);
1062 if (const_with_all_bytes_same (rhs) == -1
1063 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1064 || (TYPE_MODE (TREE_TYPE (rhs))
1065 != TYPE_MODE (unsigned_char_type_node))))
1066 return;
1067 if (TREE_CODE (rhs) == SSA_NAME
1068 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1069 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1070 return;
1071 if (!adjacent_dr_p (single_store)
1072 || !dominated_by_p (CDI_DOMINATORS,
1073 loop->latch, gimple_bb (stmt)))
1074 return;
1075 partition->kind = PKIND_MEMSET;
1076 partition->main_dr = single_store;
1077 partition->niter = nb_iter;
1079 else if (single_store && single_load)
1081 gimple store = DR_STMT (single_store);
1082 gimple load = DR_STMT (single_load);
1083 /* Direct aggregate copy or via an SSA name temporary. */
1084 if (load != store
1085 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1086 return;
1087 if (!adjacent_dr_p (single_store)
1088 || !adjacent_dr_p (single_load)
1089 || !operand_equal_p (DR_STEP (single_store),
1090 DR_STEP (single_load), 0)
1091 || !dominated_by_p (CDI_DOMINATORS,
1092 loop->latch, gimple_bb (store)))
1093 return;
1094 /* Now check that if there is a dependence this dependence is
1095 of a suitable form for memmove. */
1096 vec<loop_p> loops = vNULL;
1097 ddr_p ddr;
1098 loops.safe_push (loop);
1099 ddr = initialize_data_dependence_relation (single_load, single_store,
1100 loops);
1101 compute_affine_dependence (ddr, loop);
1102 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1104 free_dependence_relation (ddr);
1105 loops.release ();
1106 return;
1108 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1110 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1112 free_dependence_relation (ddr);
1113 loops.release ();
1114 return;
1116 lambda_vector dist_v;
1117 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1119 int dist = dist_v[index_in_loop_nest (loop->num,
1120 DDR_LOOP_NEST (ddr))];
1121 if (dist > 0 && !DDR_REVERSED_P (ddr))
1123 free_dependence_relation (ddr);
1124 loops.release ();
1125 return;
1129 free_dependence_relation (ddr);
1130 loops.release ();
1131 partition->kind = PKIND_MEMCPY;
1132 partition->main_dr = single_store;
1133 partition->secondary_dr = single_load;
1134 partition->niter = nb_iter;
1138 /* For a data reference REF, return the declaration of its base
1139 address or NULL_TREE if the base is not determined. */
1141 static tree
1142 ref_base_address (data_reference_p dr)
1144 tree base_address = DR_BASE_ADDRESS (dr);
1145 if (base_address
1146 && TREE_CODE (base_address) == ADDR_EXPR)
1147 return TREE_OPERAND (base_address, 0);
1149 return base_address;
1152 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1153 accesses in RDG. */
1155 static bool
1156 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1157 partition_t partition2)
1159 unsigned i, j, k, l;
1160 bitmap_iterator bi, bj;
1161 data_reference_p ref1, ref2;
1163 /* First check whether in the intersection of the two partitions are
1164 any loads or stores. Common loads are the situation that happens
1165 most often. */
1166 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1167 if (RDG_MEM_WRITE_STMT (rdg, i)
1168 || RDG_MEM_READS_STMT (rdg, i))
1169 return true;
1171 /* Then check all data-references against each other. */
1172 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1173 if (RDG_MEM_WRITE_STMT (rdg, i)
1174 || RDG_MEM_READS_STMT (rdg, i))
1175 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1176 if (RDG_MEM_WRITE_STMT (rdg, j)
1177 || RDG_MEM_READS_STMT (rdg, j))
1179 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1181 tree base1 = ref_base_address (ref1);
1182 if (base1)
1183 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1184 if (base1 == ref_base_address (ref2))
1185 return true;
1189 return false;
1192 /* Aggregate several components into a useful partition that is
1193 registered in the PARTITIONS vector. Partitions will be
1194 distributed in different loops. */
1196 static void
1197 rdg_build_partitions (struct graph *rdg,
1198 vec<gimple> starting_stmts,
1199 vec<partition_t> *partitions)
1201 bitmap processed = BITMAP_ALLOC (NULL);
1202 int i;
1203 gimple stmt;
1205 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1207 int v = rdg_vertex_for_stmt (rdg, stmt);
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 fprintf (dump_file,
1211 "ldist asked to generate code for vertex %d\n", v);
1213 /* If the vertex is already contained in another partition so
1214 is the partition rooted at it. */
1215 if (bitmap_bit_p (processed, v))
1216 continue;
1218 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1219 bitmap_ior_into (processed, partition->stmts);
1221 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file, "ldist useful partition:\n");
1224 dump_bitmap (dump_file, partition->stmts);
1227 partitions->safe_push (partition);
1230 /* All vertices should have been assigned to at least one partition now,
1231 other than vertices belonging to dead code. */
1233 BITMAP_FREE (processed);
1236 /* Dump to FILE the PARTITIONS. */
1238 static void
1239 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1241 int i;
1242 partition_t partition;
1244 FOR_EACH_VEC_ELT (partitions, i, partition)
1245 debug_bitmap_file (file, partition->stmts);
1248 /* Debug PARTITIONS. */
1249 extern void debug_rdg_partitions (vec<partition_t> );
1251 DEBUG_FUNCTION void
1252 debug_rdg_partitions (vec<partition_t> partitions)
1254 dump_rdg_partitions (stderr, partitions);
1257 /* Returns the number of read and write operations in the RDG. */
1259 static int
1260 number_of_rw_in_rdg (struct graph *rdg)
1262 int i, res = 0;
1264 for (i = 0; i < rdg->n_vertices; i++)
1266 if (RDG_MEM_WRITE_STMT (rdg, i))
1267 ++res;
1269 if (RDG_MEM_READS_STMT (rdg, i))
1270 ++res;
1273 return res;
1276 /* Returns the number of read and write operations in a PARTITION of
1277 the RDG. */
1279 static int
1280 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1282 int res = 0;
1283 unsigned i;
1284 bitmap_iterator ii;
1286 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1288 if (RDG_MEM_WRITE_STMT (rdg, i))
1289 ++res;
1291 if (RDG_MEM_READS_STMT (rdg, i))
1292 ++res;
1295 return res;
1298 /* Returns true when one of the PARTITIONS contains all the read or
1299 write operations of RDG. */
1301 static bool
1302 partition_contains_all_rw (struct graph *rdg,
1303 vec<partition_t> partitions)
1305 int i;
1306 partition_t partition;
1307 int nrw = number_of_rw_in_rdg (rdg);
1309 FOR_EACH_VEC_ELT (partitions, i, partition)
1310 if (nrw == number_of_rw_in_partition (rdg, partition))
1311 return true;
1313 return false;
1316 /* Compute partition dependence created by the data references in DRS1
1317 and DRS2 and modify and return DIR according to that. */
1319 static int
1320 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1321 vec<data_reference_p> drs1,
1322 vec<data_reference_p> drs2)
1324 data_reference_p dr1, dr2;
1326 /* dependence direction - 0 is no dependence, -1 is back,
1327 1 is forth, 2 is both (we can stop then, merging will occur). */
1328 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1329 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1331 int this_dir = 1;
1332 ddr_p ddr;
1333 /* Re-shuffle data-refs to be in dominator order. */
1334 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1335 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1337 data_reference_p tem = dr1;
1338 dr1 = dr2;
1339 dr2 = tem;
1340 this_dir = -this_dir;
1342 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1343 compute_affine_dependence (ddr, loops[0]);
1344 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1345 this_dir = 2;
1346 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1348 if (DDR_REVERSED_P (ddr))
1350 data_reference_p tem = dr1;
1351 dr1 = dr2;
1352 dr2 = tem;
1353 this_dir = -this_dir;
1355 /* Known dependences can still be unordered througout the
1356 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1357 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1358 this_dir = 2;
1359 /* If the overlap is exact preserve stmt order. */
1360 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1362 else
1364 /* Else as the distance vector is lexicographic positive
1365 swap the dependence direction. */
1366 this_dir = -this_dir;
1369 else
1370 this_dir = 0;
1371 free_dependence_relation (ddr);
1372 if (dir == 0)
1373 dir = this_dir;
1374 else if (dir != this_dir)
1375 return 2;
1377 return dir;
1380 /* Compare postorder number of the partition graph vertices V1 and V2. */
1382 static int
1383 pgcmp (const void *v1_, const void *v2_)
1385 const vertex *v1 = (const vertex *)v1_;
1386 const vertex *v2 = (const vertex *)v2_;
1387 return v2->post - v1->post;
1390 /* Distributes the code from LOOP in such a way that producer
1391 statements are placed before consumer statements. Tries to separate
1392 only the statements from STMTS into separate loops.
1393 Returns the number of distributed loops. */
1395 static int
1396 distribute_loop (struct loop *loop, vec<gimple> stmts,
1397 control_dependences *cd, int *nb_calls)
1399 struct graph *rdg;
1400 vec<partition_t> partitions;
1401 partition_t partition;
1402 bool any_builtin;
1403 int i, nbp;
1404 graph *pg = NULL;
1405 int num_sccs = 1;
1407 *nb_calls = 0;
1408 stack_vec<loop_p, 3> loop_nest;
1409 if (!find_loop_nest (loop, &loop_nest))
1410 return 0;
1412 rdg = build_rdg (loop_nest, cd);
1413 if (!rdg)
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file,
1417 "Loop %d not distributed: failed to build the RDG.\n",
1418 loop->num);
1420 return 0;
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1424 dump_rdg (dump_file, rdg);
1426 partitions.create (3);
1427 rdg_build_partitions (rdg, stmts, &partitions);
1429 any_builtin = false;
1430 FOR_EACH_VEC_ELT (partitions, i, partition)
1432 classify_partition (loop, rdg, partition);
1433 any_builtin |= partition_builtin_p (partition);
1436 /* If we are only distributing patterns but did not detect any,
1437 simply bail out. */
1438 if (!flag_tree_loop_distribution
1439 && !any_builtin)
1441 nbp = 0;
1442 goto ldist_done;
1445 /* If we are only distributing patterns fuse all partitions that
1446 were not classified as builtins. This also avoids chopping
1447 a loop into pieces, separated by builtin calls. That is, we
1448 only want no or a single loop body remaining. */
1449 partition_t into;
1450 if (!flag_tree_loop_distribution)
1452 for (i = 0; partitions.iterate (i, &into); ++i)
1453 if (!partition_builtin_p (into))
1454 break;
1455 for (++i; partitions.iterate (i, &partition); ++i)
1456 if (!partition_builtin_p (partition))
1458 if (dump_file && (dump_flags & TDF_DETAILS))
1460 fprintf (dump_file, "fusing non-builtin partitions\n");
1461 dump_bitmap (dump_file, into->stmts);
1462 dump_bitmap (dump_file, partition->stmts);
1464 partition_merge_into (into, partition);
1465 partitions.unordered_remove (i);
1466 partition_free (partition);
1467 i--;
1471 /* Due to limitations in the transform phase we have to fuse all
1472 reduction partitions into the last partition so the existing
1473 loop will contain all loop-closed PHI nodes. */
1474 for (i = 0; partitions.iterate (i, &into); ++i)
1475 if (partition_reduction_p (into))
1476 break;
1477 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1478 if (partition_reduction_p (partition))
1480 if (dump_file && (dump_flags & TDF_DETAILS))
1482 fprintf (dump_file, "fusing partitions\n");
1483 dump_bitmap (dump_file, into->stmts);
1484 dump_bitmap (dump_file, partition->stmts);
1485 fprintf (dump_file, "because they have reductions\n");
1487 partition_merge_into (into, partition);
1488 partitions.unordered_remove (i);
1489 partition_free (partition);
1490 i--;
1493 /* Apply our simple cost model - fuse partitions with similar
1494 memory accesses. */
1495 for (i = 0; partitions.iterate (i, &into); ++i)
1497 if (partition_builtin_p (into))
1498 continue;
1499 for (int j = i + 1;
1500 partitions.iterate (j, &partition); ++j)
1502 if (!partition_builtin_p (partition)
1503 && similar_memory_accesses (rdg, into, partition))
1505 if (dump_file && (dump_flags & TDF_DETAILS))
1507 fprintf (dump_file, "fusing partitions\n");
1508 dump_bitmap (dump_file, into->stmts);
1509 dump_bitmap (dump_file, partition->stmts);
1510 fprintf (dump_file, "because they have similar "
1511 "memory accesses\n");
1513 partition_merge_into (into, partition);
1514 partitions.unordered_remove (j);
1515 partition_free (partition);
1516 j--;
1521 /* Build the partition dependency graph. */
1522 if (partitions.length () > 1)
1524 pg = new_graph (partitions.length ());
1525 struct pgdata {
1526 partition_t partition;
1527 vec<data_reference_p> writes;
1528 vec<data_reference_p> reads;
1530 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1531 for (i = 0; partitions.iterate (i, &partition); ++i)
1533 vertex *v = &pg->vertices[i];
1534 pgdata *data = new pgdata;
1535 data_reference_p dr;
1536 /* FIXME - leaks. */
1537 v->data = data;
1538 bitmap_iterator bi;
1539 unsigned j;
1540 data->partition = partition;
1541 data->reads = vNULL;
1542 data->writes = vNULL;
1543 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1544 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1545 if (DR_IS_READ (dr))
1546 data->reads.safe_push (dr);
1547 else
1548 data->writes.safe_push (dr);
1550 partition_t partition1, partition2;
1551 for (i = 0; partitions.iterate (i, &partition1); ++i)
1552 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1554 /* dependence direction - 0 is no dependence, -1 is back,
1555 1 is forth, 2 is both (we can stop then, merging will occur). */
1556 int dir = 0;
1557 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1558 PGDATA(i)->writes,
1559 PGDATA(j)->reads);
1560 if (dir != 2)
1561 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1562 PGDATA(i)->reads,
1563 PGDATA(j)->writes);
1564 if (dir != 2)
1565 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1566 PGDATA(i)->writes,
1567 PGDATA(j)->writes);
1568 if (dir == 1 || dir == 2)
1569 add_edge (pg, i, j);
1570 if (dir == -1 || dir == 2)
1571 add_edge (pg, j, i);
1574 /* Add edges to the reduction partition (if any) to force it last. */
1575 unsigned j;
1576 for (j = 0; partitions.iterate (j, &partition); ++j)
1577 if (partition_reduction_p (partition))
1578 break;
1579 if (j < partitions.length ())
1581 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1582 if (i != j)
1583 add_edge (pg, i, j);
1586 /* Compute partitions we cannot separate and fuse them. */
1587 num_sccs = graphds_scc (pg, NULL);
1588 for (i = 0; i < num_sccs; ++i)
1590 partition_t first;
1591 int j;
1592 for (j = 0; partitions.iterate (j, &first); ++j)
1593 if (pg->vertices[j].component == i)
1594 break;
1595 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1596 if (pg->vertices[j].component == i)
1598 if (dump_file && (dump_flags & TDF_DETAILS))
1600 fprintf (dump_file, "fusing partitions\n");
1601 dump_bitmap (dump_file, first->stmts);
1602 dump_bitmap (dump_file, partition->stmts);
1603 fprintf (dump_file, "because they are in the same "
1604 "dependence SCC\n");
1606 partition_merge_into (first, partition);
1607 partitions[j] = NULL;
1608 partition_free (partition);
1609 PGDATA (j)->partition = NULL;
1613 /* Now order the remaining nodes in postorder. */
1614 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1615 partitions.truncate (0);
1616 for (i = 0; i < pg->n_vertices; ++i)
1618 pgdata *data = PGDATA (i);
1619 if (data->partition)
1620 partitions.safe_push (data->partition);
1621 data->reads.release ();
1622 data->writes.release ();
1623 delete data;
1625 gcc_assert (partitions.length () == (unsigned)num_sccs);
1626 free_graph (pg);
1629 nbp = partitions.length ();
1630 if (nbp == 0
1631 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1632 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1634 nbp = 0;
1635 goto ldist_done;
1638 if (dump_file && (dump_flags & TDF_DETAILS))
1639 dump_rdg_partitions (dump_file, partitions);
1641 FOR_EACH_VEC_ELT (partitions, i, partition)
1643 if (partition_builtin_p (partition))
1644 (*nb_calls)++;
1645 generate_code_for_partition (loop, partition, i < nbp - 1);
1648 ldist_done:
1650 FOR_EACH_VEC_ELT (partitions, i, partition)
1651 partition_free (partition);
1652 partitions.release ();
1654 free_rdg (rdg);
1655 return nbp - *nb_calls;
1658 /* Distribute all loops in the current function. */
1660 static unsigned int
1661 tree_loop_distribution (void)
1663 struct loop *loop;
1664 bool changed = false;
1665 basic_block bb;
1666 control_dependences *cd = NULL;
1668 FOR_ALL_BB (bb)
1670 gimple_stmt_iterator gsi;
1671 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1672 gimple_set_uid (gsi_stmt (gsi), -1);
1673 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1674 gimple_set_uid (gsi_stmt (gsi), -1);
1677 /* We can at the moment only distribute non-nested loops, thus restrict
1678 walking to innermost loops. */
1679 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1681 vec<gimple> work_list = vNULL;
1682 basic_block *bbs;
1683 int num = loop->num;
1684 unsigned int i;
1686 /* If the loop doesn't have a single exit we will fail anyway,
1687 so do that early. */
1688 if (!single_exit (loop))
1689 continue;
1691 /* Only optimize hot loops. */
1692 if (!optimize_loop_for_speed_p (loop))
1693 continue;
1695 /* Initialize the worklist with stmts we seed the partitions with. */
1696 bbs = get_loop_body_in_dom_order (loop);
1697 for (i = 0; i < loop->num_nodes; ++i)
1699 gimple_stmt_iterator gsi;
1700 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1702 gimple phi = gsi_stmt (gsi);
1703 if (virtual_operand_p (gimple_phi_result (phi)))
1704 continue;
1705 /* Distribute stmts which have defs that are used outside of
1706 the loop. */
1707 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1708 continue;
1709 work_list.safe_push (phi);
1711 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1713 gimple stmt = gsi_stmt (gsi);
1715 /* If there is a stmt with side-effects bail out - we
1716 cannot and should not distribute this loop. */
1717 if (gimple_has_side_effects (stmt))
1719 work_list.truncate (0);
1720 goto out;
1723 /* Distribute stmts which have defs that are used outside of
1724 the loop. */
1725 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1727 /* Otherwise only distribute stores for now. */
1728 else if (!gimple_vdef (stmt))
1729 continue;
1731 work_list.safe_push (stmt);
1734 out:
1735 free (bbs);
1737 int nb_generated_loops = 0;
1738 int nb_generated_calls = 0;
1739 location_t loc = find_loop_location (loop);
1740 if (work_list.length () > 0)
1742 if (!cd)
1744 calculate_dominance_info (CDI_DOMINATORS);
1745 calculate_dominance_info (CDI_POST_DOMINATORS);
1746 cd = new control_dependences (create_edge_list ());
1747 free_dominance_info (CDI_POST_DOMINATORS);
1749 nb_generated_loops = distribute_loop (loop, work_list, cd,
1750 &nb_generated_calls);
1753 if (nb_generated_loops + nb_generated_calls > 0)
1755 changed = true;
1756 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1757 loc, "Loop %d distributed: split to %d loops "
1758 "and %d library calls.\n",
1759 num, nb_generated_loops, nb_generated_calls);
1761 else if (dump_file && (dump_flags & TDF_DETAILS))
1762 fprintf (dump_file, "Loop %d is the same.\n", num);
1764 work_list.release ();
1767 if (cd)
1768 delete cd;
1770 if (changed)
1772 mark_virtual_operands_for_renaming (cfun);
1773 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1776 #ifdef ENABLE_CHECKING
1777 verify_loop_structure ();
1778 #endif
1780 return 0;
1783 static bool
1784 gate_tree_loop_distribution (void)
1786 return flag_tree_loop_distribution
1787 || flag_tree_loop_distribute_patterns;
1790 namespace {
1792 const pass_data pass_data_loop_distribution =
1794 GIMPLE_PASS, /* type */
1795 "ldist", /* name */
1796 OPTGROUP_LOOP, /* optinfo_flags */
1797 true, /* has_gate */
1798 true, /* has_execute */
1799 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1800 ( PROP_cfg | PROP_ssa ), /* properties_required */
1801 0, /* properties_provided */
1802 0, /* properties_destroyed */
1803 0, /* todo_flags_start */
1804 TODO_verify_ssa, /* todo_flags_finish */
1807 class pass_loop_distribution : public gimple_opt_pass
1809 public:
1810 pass_loop_distribution (gcc::context *ctxt)
1811 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1814 /* opt_pass methods: */
1815 bool gate () { return gate_tree_loop_distribution (); }
1816 unsigned int execute () { return tree_loop_distribution (); }
1818 }; // class pass_loop_distribution
1820 } // anon namespace
1822 gimple_opt_pass *
1823 make_pass_loop_distribution (gcc::context *ctxt)
1825 return new pass_loop_distribution (ctxt);