Automated renaming of gimple subclasses
[official-gcc.git] / gcc / tree-loop-distribution.c
blob620f13c3fa4e118285185bafb31b0714a2240225
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "basic-block.h"
49 #include "tree-ssa-alias.h"
50 #include "internal-fn.h"
51 #include "gimple-expr.h"
52 #include "is-a.h"
53 #include "gimple.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "stor-layout.h"
57 #include "gimple-ssa.h"
58 #include "tree-cfg.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-into-ssa.h"
66 #include "tree-ssa.h"
67 #include "cfgloop.h"
68 #include "tree-chrec.h"
69 #include "tree-data-ref.h"
70 #include "tree-scalar-evolution.h"
71 #include "tree-pass.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-vectorizer.h"
76 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
77 typedef struct rdg_vertex
79 /* The statement represented by this vertex. */
80 gimple stmt;
82 /* Vector of data-references in this statement. */
83 vec<data_reference_p> datarefs;
85 /* True when the statement contains a write to memory. */
86 bool has_mem_write;
88 /* True when the statement contains a read from memory. */
89 bool has_mem_reads;
90 } *rdg_vertex_p;
92 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
93 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
94 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
95 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
96 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
97 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
98 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
99 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
101 /* Data dependence type. */
103 enum rdg_dep_type
105 /* Read After Write (RAW). */
106 flow_dd = 'f',
108 /* Control dependence (execute conditional on). */
109 control_dd = 'c'
112 /* Dependence information attached to an edge of the RDG. */
114 typedef struct rdg_edge
116 /* Type of the dependence. */
117 enum rdg_dep_type type;
118 } *rdg_edge_p;
120 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
122 /* Dump vertex I in RDG to FILE. */
124 static void
125 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
127 struct vertex *v = &(rdg->vertices[i]);
128 struct graph_edge *e;
130 fprintf (file, "(vertex %d: (%s%s) (in:", i,
131 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
132 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
134 if (v->pred)
135 for (e = v->pred; e; e = e->pred_next)
136 fprintf (file, " %d", e->src);
138 fprintf (file, ") (out:");
140 if (v->succ)
141 for (e = v->succ; e; e = e->succ_next)
142 fprintf (file, " %d", e->dest);
144 fprintf (file, ")\n");
145 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
146 fprintf (file, ")\n");
149 /* Call dump_rdg_vertex on stderr. */
151 DEBUG_FUNCTION void
152 debug_rdg_vertex (struct graph *rdg, int i)
154 dump_rdg_vertex (stderr, rdg, i);
157 /* Dump the reduced dependence graph RDG to FILE. */
159 static void
160 dump_rdg (FILE *file, struct graph *rdg)
162 fprintf (file, "(rdg\n");
163 for (int i = 0; i < rdg->n_vertices; i++)
164 dump_rdg_vertex (file, rdg, i);
165 fprintf (file, ")\n");
168 /* Call dump_rdg on stderr. */
170 DEBUG_FUNCTION void
171 debug_rdg (struct graph *rdg)
173 dump_rdg (stderr, rdg);
176 static void
177 dot_rdg_1 (FILE *file, struct graph *rdg)
179 int i;
180 pretty_printer buffer;
181 pp_needs_newline (&buffer) = false;
182 buffer.buffer->stream = file;
184 fprintf (file, "digraph RDG {\n");
186 for (i = 0; i < rdg->n_vertices; i++)
188 struct vertex *v = &(rdg->vertices[i]);
189 struct graph_edge *e;
191 fprintf (file, "%d [label=\"[%d] ", i, i);
192 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
193 pp_flush (&buffer);
194 fprintf (file, "\"]\n");
196 /* Highlight reads from memory. */
197 if (RDG_MEM_READS_STMT (rdg, i))
198 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
200 /* Highlight stores to memory. */
201 if (RDG_MEM_WRITE_STMT (rdg, i))
202 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
204 if (v->succ)
205 for (e = v->succ; e; e = e->succ_next)
206 switch (RDGE_TYPE (e))
208 case flow_dd:
209 /* These are the most common dependences: don't print these. */
210 fprintf (file, "%d -> %d \n", i, e->dest);
211 break;
213 case control_dd:
214 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
215 break;
217 default:
218 gcc_unreachable ();
222 fprintf (file, "}\n\n");
225 /* Display the Reduced Dependence Graph using dotty. */
227 DEBUG_FUNCTION void
228 dot_rdg (struct graph *rdg)
230 /* When debugging, you may want to enable the following code. */
231 #ifdef HAVE_POPEN
232 FILE *file = popen ("dot -Tx11", "w");
233 if (!file)
234 return;
235 dot_rdg_1 (file, rdg);
236 fflush (file);
237 close (fileno (file));
238 pclose (file);
239 #else
240 dot_rdg_1 (stderr, rdg);
241 #endif
244 /* Returns the index of STMT in RDG. */
246 static int
247 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
249 int index = gimple_uid (stmt);
250 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
251 return index;
254 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
255 the index of DEF in RDG. */
257 static void
258 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
260 use_operand_p imm_use_p;
261 imm_use_iterator iterator;
263 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
265 struct graph_edge *e;
266 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
268 if (use < 0)
269 continue;
271 e = add_edge (rdg, idef, use);
272 e->data = XNEW (struct rdg_edge);
273 RDGE_TYPE (e) = flow_dd;
277 /* Creates an edge for the control dependences of BB to the vertex V. */
279 static void
280 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
281 int v, control_dependences *cd)
283 bitmap_iterator bi;
284 unsigned edge_n;
285 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
286 0, edge_n, bi)
288 basic_block cond_bb = cd->get_edge (edge_n)->src;
289 gimple stmt = last_stmt (cond_bb);
290 if (stmt && is_ctrl_stmt (stmt))
292 struct graph_edge *e;
293 int c = rdg_vertex_for_stmt (rdg, stmt);
294 if (c < 0)
295 continue;
297 e = add_edge (rdg, c, v);
298 e->data = XNEW (struct rdg_edge);
299 RDGE_TYPE (e) = control_dd;
304 /* Creates the edges of the reduced dependence graph RDG. */
306 static void
307 create_rdg_flow_edges (struct graph *rdg)
309 int i;
310 def_operand_p def_p;
311 ssa_op_iter iter;
313 for (i = 0; i < rdg->n_vertices; i++)
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
315 iter, SSA_OP_DEF)
316 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
319 /* Creates the edges of the reduced dependence graph RDG. */
321 static void
322 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
324 int i;
326 for (i = 0; i < rdg->n_vertices; i++)
328 gimple stmt = RDG_STMT (rdg, i);
329 if (gimple_code (stmt) == GIMPLE_PHI)
331 edge_iterator ei;
332 edge e;
333 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
334 create_edge_for_control_dependence (rdg, e->src, i, cd);
336 else
337 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
341 /* Build the vertices of the reduced dependence graph RDG. Return false
342 if that failed. */
344 static bool
345 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
346 vec<data_reference_p> *datarefs)
348 int i;
349 gimple stmt;
351 FOR_EACH_VEC_ELT (stmts, i, stmt)
353 struct vertex *v = &(rdg->vertices[i]);
355 /* Record statement to vertex mapping. */
356 gimple_set_uid (stmt, i);
358 v->data = XNEW (struct rdg_vertex);
359 RDGV_STMT (v) = stmt;
360 RDGV_DATAREFS (v).create (0);
361 RDGV_HAS_MEM_WRITE (v) = false;
362 RDGV_HAS_MEM_READS (v) = false;
363 if (gimple_code (stmt) == GIMPLE_PHI)
364 continue;
366 unsigned drp = datarefs->length ();
367 if (!find_data_references_in_stmt (loop, stmt, datarefs))
368 return false;
369 for (unsigned j = drp; j < datarefs->length (); ++j)
371 data_reference_p dr = (*datarefs)[j];
372 if (DR_IS_READ (dr))
373 RDGV_HAS_MEM_READS (v) = true;
374 else
375 RDGV_HAS_MEM_WRITE (v) = true;
376 RDGV_DATAREFS (v).safe_push (dr);
379 return true;
382 /* Initialize STMTS with all the statements of LOOP. The order in
383 which we discover statements is important as
384 generate_loops_for_partition is using the same traversal for
385 identifying statements in loop copies. */
387 static void
388 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
390 unsigned int i;
391 basic_block *bbs = get_loop_body_in_dom_order (loop);
393 for (i = 0; i < loop->num_nodes; i++)
395 basic_block bb = bbs[i];
397 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
398 gsi_next (&bsi))
399 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
400 stmts->safe_push (bsi.phi ());
402 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
403 gsi_next (&bsi))
405 gimple stmt = gsi_stmt (bsi);
406 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
407 stmts->safe_push (stmt);
411 free (bbs);
414 /* Free the reduced dependence graph RDG. */
416 static void
417 free_rdg (struct graph *rdg)
419 int i;
421 for (i = 0; i < rdg->n_vertices; i++)
423 struct vertex *v = &(rdg->vertices[i]);
424 struct graph_edge *e;
426 for (e = v->succ; e; e = e->succ_next)
427 free (e->data);
429 if (v->data)
431 gimple_set_uid (RDGV_STMT (v), -1);
432 free_data_refs (RDGV_DATAREFS (v));
433 free (v->data);
437 free_graph (rdg);
440 /* Build the Reduced Dependence Graph (RDG) with one vertex per
441 statement of the loop nest LOOP_NEST, and one edge per data dependence or
442 scalar dependence. */
444 static struct graph *
445 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
447 struct graph *rdg;
448 vec<data_reference_p> datarefs;
450 /* Create the RDG vertices from the stmts of the loop nest. */
451 auto_vec<gimple, 10> stmts;
452 stmts_from_loop (loop_nest[0], &stmts);
453 rdg = new_graph (stmts.length ());
454 datarefs.create (10);
455 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
457 datarefs.release ();
458 free_rdg (rdg);
459 return NULL;
461 stmts.release ();
463 create_rdg_flow_edges (rdg);
464 if (cd)
465 create_rdg_cd_edges (rdg, cd);
467 datarefs.release ();
469 return rdg;
474 enum partition_kind {
475 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
478 typedef struct partition_s
480 bitmap stmts;
481 bitmap loops;
482 bool reduction_p;
483 enum partition_kind kind;
484 /* data-references a kind != PKIND_NORMAL partition is about. */
485 data_reference_p main_dr;
486 data_reference_p secondary_dr;
487 tree niter;
488 bool plus_one;
489 } *partition_t;
492 /* Allocate and initialize a partition from BITMAP. */
494 static partition_t
495 partition_alloc (bitmap stmts, bitmap loops)
497 partition_t partition = XCNEW (struct partition_s);
498 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
499 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
500 partition->reduction_p = false;
501 partition->kind = PKIND_NORMAL;
502 return partition;
505 /* Free PARTITION. */
507 static void
508 partition_free (partition_t partition)
510 BITMAP_FREE (partition->stmts);
511 BITMAP_FREE (partition->loops);
512 free (partition);
515 /* Returns true if the partition can be generated as a builtin. */
517 static bool
518 partition_builtin_p (partition_t partition)
520 return partition->kind != PKIND_NORMAL;
523 /* Returns true if the partition contains a reduction. */
525 static bool
526 partition_reduction_p (partition_t partition)
528 return partition->reduction_p;
531 /* Merge PARTITION into the partition DEST. */
533 static void
534 partition_merge_into (partition_t dest, partition_t partition)
536 dest->kind = PKIND_NORMAL;
537 bitmap_ior_into (dest->stmts, partition->stmts);
538 if (partition_reduction_p (partition))
539 dest->reduction_p = true;
543 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
544 the LOOP. */
546 static bool
547 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
549 imm_use_iterator imm_iter;
550 use_operand_p use_p;
552 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
554 gimple use_stmt = USE_STMT (use_p);
555 if (!is_gimple_debug (use_stmt)
556 && loop != loop_containing_stmt (use_stmt))
557 return true;
560 return false;
563 /* Returns true when STMT defines a scalar variable used after the
564 loop LOOP. */
566 static bool
567 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
569 def_operand_p def_p;
570 ssa_op_iter op_iter;
572 if (gimple_code (stmt) == GIMPLE_PHI)
573 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
575 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
576 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
577 return true;
579 return false;
582 /* Return a copy of LOOP placed before LOOP. */
584 static struct loop *
585 copy_loop_before (struct loop *loop)
587 struct loop *res;
588 edge preheader = loop_preheader_edge (loop);
590 initialize_original_copy_tables ();
591 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
592 gcc_assert (res != NULL);
593 free_original_copy_tables ();
594 delete_update_ssa ();
596 return res;
599 /* Creates an empty basic block after LOOP. */
601 static void
602 create_bb_after_loop (struct loop *loop)
604 edge exit = single_exit (loop);
606 if (!exit)
607 return;
609 split_edge (exit);
612 /* Generate code for PARTITION from the code in LOOP. The loop is
613 copied when COPY_P is true. All the statements not flagged in the
614 PARTITION bitmap are removed from the loop or from its copy. The
615 statements are indexed in sequence inside a basic block, and the
616 basic blocks of a loop are taken in dom order. */
618 static void
619 generate_loops_for_partition (struct loop *loop, partition_t partition,
620 bool copy_p)
622 unsigned i;
623 basic_block *bbs;
625 if (copy_p)
627 loop = copy_loop_before (loop);
628 gcc_assert (loop != NULL);
629 create_preheader (loop, CP_SIMPLE_PREHEADERS);
630 create_bb_after_loop (loop);
633 /* Remove stmts not in the PARTITION bitmap. */
634 bbs = get_loop_body_in_dom_order (loop);
636 if (MAY_HAVE_DEBUG_STMTS)
637 for (i = 0; i < loop->num_nodes; i++)
639 basic_block bb = bbs[i];
641 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
642 gsi_next (&bsi))
644 gphi *phi = bsi.phi ();
645 if (!virtual_operand_p (gimple_phi_result (phi))
646 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
647 reset_debug_uses (phi);
650 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
652 gimple stmt = gsi_stmt (bsi);
653 if (gimple_code (stmt) != GIMPLE_LABEL
654 && !is_gimple_debug (stmt)
655 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
656 reset_debug_uses (stmt);
660 for (i = 0; i < loop->num_nodes; i++)
662 basic_block bb = bbs[i];
664 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
666 gphi *phi = bsi.phi ();
667 if (!virtual_operand_p (gimple_phi_result (phi))
668 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
669 remove_phi_node (&bsi, true);
670 else
671 gsi_next (&bsi);
674 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
676 gimple stmt = gsi_stmt (bsi);
677 if (gimple_code (stmt) != GIMPLE_LABEL
678 && !is_gimple_debug (stmt)
679 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
681 /* Choose an arbitrary path through the empty CFG part
682 that this unnecessary control stmt controls. */
683 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
685 gimple_cond_make_false (cond_stmt);
686 update_stmt (stmt);
688 else if (gimple_code (stmt) == GIMPLE_SWITCH)
690 gswitch *switch_stmt = as_a <gswitch *> (stmt);
691 gimple_switch_set_index
692 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
693 update_stmt (stmt);
695 else
697 unlink_stmt_vdef (stmt);
698 gsi_remove (&bsi, true);
699 release_defs (stmt);
700 continue;
703 gsi_next (&bsi);
707 free (bbs);
710 /* Build the size argument for a memory operation call. */
712 static tree
713 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
714 bool plus_one)
716 tree size = fold_convert_loc (loc, sizetype, nb_iter);
717 if (plus_one)
718 size = size_binop (PLUS_EXPR, size, size_one_node);
719 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
720 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
721 size = fold_convert_loc (loc, size_type_node, size);
722 return size;
725 /* Build an address argument for a memory operation call. */
727 static tree
728 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
730 tree addr_base;
732 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
733 addr_base = fold_convert_loc (loc, sizetype, addr_base);
735 /* Test for a negative stride, iterating over every element. */
736 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
738 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
739 fold_convert_loc (loc, sizetype, nb_bytes));
740 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
741 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
744 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
747 /* If VAL memory representation contains the same value in all bytes,
748 return that value, otherwise return -1.
749 E.g. for 0x24242424 return 0x24, for IEEE double
750 747708026454360457216.0 return 0x44, etc. */
752 static int
753 const_with_all_bytes_same (tree val)
755 unsigned char buf[64];
756 int i, len;
758 if (integer_zerop (val)
759 || real_zerop (val)
760 || (TREE_CODE (val) == CONSTRUCTOR
761 && !TREE_CLOBBER_P (val)
762 && CONSTRUCTOR_NELTS (val) == 0))
763 return 0;
765 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
766 return -1;
768 len = native_encode_expr (val, buf, sizeof (buf));
769 if (len == 0)
770 return -1;
771 for (i = 1; i < len; i++)
772 if (buf[i] != buf[0])
773 return -1;
774 return buf[0];
777 /* Generate a call to memset for PARTITION in LOOP. */
779 static void
780 generate_memset_builtin (struct loop *loop, partition_t partition)
782 gimple_stmt_iterator gsi;
783 gimple stmt, fn_call;
784 tree mem, fn, nb_bytes;
785 location_t loc;
786 tree val;
788 stmt = DR_STMT (partition->main_dr);
789 loc = gimple_location (stmt);
791 /* The new statements will be placed before LOOP. */
792 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
794 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
795 partition->plus_one);
796 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
797 false, GSI_CONTINUE_LINKING);
798 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
799 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
800 false, GSI_CONTINUE_LINKING);
802 /* This exactly matches the pattern recognition in classify_partition. */
803 val = gimple_assign_rhs1 (stmt);
804 /* Handle constants like 0x15151515 and similarly
805 floating point constants etc. where all bytes are the same. */
806 int bytev = const_with_all_bytes_same (val);
807 if (bytev != -1)
808 val = build_int_cst (integer_type_node, bytev);
809 else if (TREE_CODE (val) == INTEGER_CST)
810 val = fold_convert (integer_type_node, val);
811 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
813 gimple cstmt;
814 tree tem = make_ssa_name (integer_type_node, NULL);
815 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
816 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
817 val = tem;
820 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
821 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
822 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
824 if (dump_file && (dump_flags & TDF_DETAILS))
826 fprintf (dump_file, "generated memset");
827 if (bytev == 0)
828 fprintf (dump_file, " zero\n");
829 else
830 fprintf (dump_file, "\n");
834 /* Generate a call to memcpy for PARTITION in LOOP. */
836 static void
837 generate_memcpy_builtin (struct loop *loop, partition_t partition)
839 gimple_stmt_iterator gsi;
840 gimple stmt, fn_call;
841 tree dest, src, fn, nb_bytes;
842 location_t loc;
843 enum built_in_function kind;
845 stmt = DR_STMT (partition->main_dr);
846 loc = gimple_location (stmt);
848 /* The new statements will be placed before LOOP. */
849 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
851 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
852 partition->plus_one);
853 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
854 false, GSI_CONTINUE_LINKING);
855 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
856 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
857 if (ptr_derefs_may_alias_p (dest, src))
858 kind = BUILT_IN_MEMMOVE;
859 else
860 kind = BUILT_IN_MEMCPY;
862 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
863 false, GSI_CONTINUE_LINKING);
864 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
865 false, GSI_CONTINUE_LINKING);
866 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
867 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
868 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
870 if (dump_file && (dump_flags & TDF_DETAILS))
872 if (kind == BUILT_IN_MEMCPY)
873 fprintf (dump_file, "generated memcpy\n");
874 else
875 fprintf (dump_file, "generated memmove\n");
879 /* Remove and destroy the loop LOOP. */
881 static void
882 destroy_loop (struct loop *loop)
884 unsigned nbbs = loop->num_nodes;
885 edge exit = single_exit (loop);
886 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
887 basic_block *bbs;
888 unsigned i;
890 bbs = get_loop_body_in_dom_order (loop);
892 redirect_edge_pred (exit, src);
893 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
894 exit->flags |= EDGE_FALLTHRU;
895 cancel_loop_tree (loop);
896 rescan_loop_exit (exit, false, true);
898 for (i = 0; i < nbbs; i++)
900 /* We have made sure to not leave any dangling uses of SSA
901 names defined in the loop. With the exception of virtuals.
902 Make sure we replace all uses of virtual defs that will remain
903 outside of the loop with the bare symbol as delete_basic_block
904 will release them. */
905 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
906 gsi_next (&gsi))
908 gphi *phi = gsi.phi ();
909 if (virtual_operand_p (gimple_phi_result (phi)))
910 mark_virtual_phi_result_for_renaming (phi);
912 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
913 gsi_next (&gsi))
915 gimple stmt = gsi_stmt (gsi);
916 tree vdef = gimple_vdef (stmt);
917 if (vdef && TREE_CODE (vdef) == SSA_NAME)
918 mark_virtual_operand_for_renaming (vdef);
920 delete_basic_block (bbs[i]);
922 free (bbs);
924 set_immediate_dominator (CDI_DOMINATORS, dest,
925 recompute_dominator (CDI_DOMINATORS, dest));
928 /* Generates code for PARTITION. */
930 static void
931 generate_code_for_partition (struct loop *loop,
932 partition_t partition, bool copy_p)
934 switch (partition->kind)
936 case PKIND_NORMAL:
937 /* Reductions all have to be in the last partition. */
938 gcc_assert (!partition_reduction_p (partition)
939 || !copy_p);
940 generate_loops_for_partition (loop, partition, copy_p);
941 return;
943 case PKIND_MEMSET:
944 generate_memset_builtin (loop, partition);
945 break;
947 case PKIND_MEMCPY:
948 generate_memcpy_builtin (loop, partition);
949 break;
951 default:
952 gcc_unreachable ();
955 /* Common tail for partitions we turn into a call. If this was the last
956 partition for which we generate code, we have to destroy the loop. */
957 if (!copy_p)
958 destroy_loop (loop);
962 /* Returns a partition with all the statements needed for computing
963 the vertex V of the RDG, also including the loop exit conditions. */
965 static partition_t
966 build_rdg_partition_for_vertex (struct graph *rdg, int v)
968 partition_t partition = partition_alloc (NULL, NULL);
969 auto_vec<int, 3> nodes;
970 unsigned i;
971 int x;
973 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
975 FOR_EACH_VEC_ELT (nodes, i, x)
977 bitmap_set_bit (partition->stmts, x);
978 bitmap_set_bit (partition->loops,
979 loop_containing_stmt (RDG_STMT (rdg, x))->num);
982 return partition;
985 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
986 For the moment we detect only the memset zero pattern. */
988 static void
989 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
991 bitmap_iterator bi;
992 unsigned i;
993 tree nb_iter;
994 data_reference_p single_load, single_store;
995 bool volatiles_p = false;
996 bool plus_one = false;
998 partition->kind = PKIND_NORMAL;
999 partition->main_dr = NULL;
1000 partition->secondary_dr = NULL;
1001 partition->niter = NULL_TREE;
1002 partition->plus_one = false;
1004 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1006 gimple stmt = RDG_STMT (rdg, i);
1008 if (gimple_has_volatile_ops (stmt))
1009 volatiles_p = true;
1011 /* If the stmt has uses outside of the loop mark it as reduction. */
1012 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1014 partition->reduction_p = true;
1015 return;
1019 /* Perform general partition disqualification for builtins. */
1020 if (volatiles_p
1021 || !flag_tree_loop_distribute_patterns)
1022 return;
1024 /* Detect memset and memcpy. */
1025 single_load = NULL;
1026 single_store = NULL;
1027 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1029 gimple stmt = RDG_STMT (rdg, i);
1030 data_reference_p dr;
1031 unsigned j;
1033 if (gimple_code (stmt) == GIMPLE_PHI)
1034 continue;
1036 /* Any scalar stmts are ok. */
1037 if (!gimple_vuse (stmt))
1038 continue;
1040 /* Otherwise just regular loads/stores. */
1041 if (!gimple_assign_single_p (stmt))
1042 return;
1044 /* But exactly one store and/or load. */
1045 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1047 if (DR_IS_READ (dr))
1049 if (single_load != NULL)
1050 return;
1051 single_load = dr;
1053 else
1055 if (single_store != NULL)
1056 return;
1057 single_store = dr;
1062 if (!single_store)
1063 return;
1065 nb_iter = number_of_latch_executions (loop);
1066 if (!nb_iter || nb_iter == chrec_dont_know)
1067 return;
1068 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1069 gimple_bb (DR_STMT (single_store))))
1070 plus_one = true;
1072 if (single_store && !single_load)
1074 gimple stmt = DR_STMT (single_store);
1075 tree rhs = gimple_assign_rhs1 (stmt);
1076 if (const_with_all_bytes_same (rhs) == -1
1077 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1078 || (TYPE_MODE (TREE_TYPE (rhs))
1079 != TYPE_MODE (unsigned_char_type_node))))
1080 return;
1081 if (TREE_CODE (rhs) == SSA_NAME
1082 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1083 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1084 return;
1085 if (!adjacent_dr_p (single_store)
1086 || !dominated_by_p (CDI_DOMINATORS,
1087 loop->latch, gimple_bb (stmt)))
1088 return;
1089 partition->kind = PKIND_MEMSET;
1090 partition->main_dr = single_store;
1091 partition->niter = nb_iter;
1092 partition->plus_one = plus_one;
1094 else if (single_store && single_load)
1096 gimple store = DR_STMT (single_store);
1097 gimple load = DR_STMT (single_load);
1098 /* Direct aggregate copy or via an SSA name temporary. */
1099 if (load != store
1100 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1101 return;
1102 if (!adjacent_dr_p (single_store)
1103 || !adjacent_dr_p (single_load)
1104 || !operand_equal_p (DR_STEP (single_store),
1105 DR_STEP (single_load), 0)
1106 || !dominated_by_p (CDI_DOMINATORS,
1107 loop->latch, gimple_bb (store)))
1108 return;
1109 /* Now check that if there is a dependence this dependence is
1110 of a suitable form for memmove. */
1111 vec<loop_p> loops = vNULL;
1112 ddr_p ddr;
1113 loops.safe_push (loop);
1114 ddr = initialize_data_dependence_relation (single_load, single_store,
1115 loops);
1116 compute_affine_dependence (ddr, loop);
1117 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1119 free_dependence_relation (ddr);
1120 loops.release ();
1121 return;
1123 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1125 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1127 free_dependence_relation (ddr);
1128 loops.release ();
1129 return;
1131 lambda_vector dist_v;
1132 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1134 int dist = dist_v[index_in_loop_nest (loop->num,
1135 DDR_LOOP_NEST (ddr))];
1136 if (dist > 0 && !DDR_REVERSED_P (ddr))
1138 free_dependence_relation (ddr);
1139 loops.release ();
1140 return;
1144 free_dependence_relation (ddr);
1145 loops.release ();
1146 partition->kind = PKIND_MEMCPY;
1147 partition->main_dr = single_store;
1148 partition->secondary_dr = single_load;
1149 partition->niter = nb_iter;
1150 partition->plus_one = plus_one;
1154 /* For a data reference REF, return the declaration of its base
1155 address or NULL_TREE if the base is not determined. */
1157 static tree
1158 ref_base_address (data_reference_p dr)
1160 tree base_address = DR_BASE_ADDRESS (dr);
1161 if (base_address
1162 && TREE_CODE (base_address) == ADDR_EXPR)
1163 return TREE_OPERAND (base_address, 0);
1165 return base_address;
1168 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1169 accesses in RDG. */
1171 static bool
1172 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1173 partition_t partition2)
1175 unsigned i, j, k, l;
1176 bitmap_iterator bi, bj;
1177 data_reference_p ref1, ref2;
1179 /* First check whether in the intersection of the two partitions are
1180 any loads or stores. Common loads are the situation that happens
1181 most often. */
1182 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1183 if (RDG_MEM_WRITE_STMT (rdg, i)
1184 || RDG_MEM_READS_STMT (rdg, i))
1185 return true;
1187 /* Then check all data-references against each other. */
1188 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1189 if (RDG_MEM_WRITE_STMT (rdg, i)
1190 || RDG_MEM_READS_STMT (rdg, i))
1191 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1192 if (RDG_MEM_WRITE_STMT (rdg, j)
1193 || RDG_MEM_READS_STMT (rdg, j))
1195 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1197 tree base1 = ref_base_address (ref1);
1198 if (base1)
1199 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1200 if (base1 == ref_base_address (ref2))
1201 return true;
1205 return false;
1208 /* Aggregate several components into a useful partition that is
1209 registered in the PARTITIONS vector. Partitions will be
1210 distributed in different loops. */
1212 static void
1213 rdg_build_partitions (struct graph *rdg,
1214 vec<gimple> starting_stmts,
1215 vec<partition_t> *partitions)
1217 bitmap processed = BITMAP_ALLOC (NULL);
1218 int i;
1219 gimple stmt;
1221 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1223 int v = rdg_vertex_for_stmt (rdg, stmt);
1225 if (dump_file && (dump_flags & TDF_DETAILS))
1226 fprintf (dump_file,
1227 "ldist asked to generate code for vertex %d\n", v);
1229 /* If the vertex is already contained in another partition so
1230 is the partition rooted at it. */
1231 if (bitmap_bit_p (processed, v))
1232 continue;
1234 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1235 bitmap_ior_into (processed, partition->stmts);
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1239 fprintf (dump_file, "ldist useful partition:\n");
1240 dump_bitmap (dump_file, partition->stmts);
1243 partitions->safe_push (partition);
1246 /* All vertices should have been assigned to at least one partition now,
1247 other than vertices belonging to dead code. */
1249 BITMAP_FREE (processed);
1252 /* Dump to FILE the PARTITIONS. */
1254 static void
1255 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1257 int i;
1258 partition_t partition;
1260 FOR_EACH_VEC_ELT (partitions, i, partition)
1261 debug_bitmap_file (file, partition->stmts);
1264 /* Debug PARTITIONS. */
1265 extern void debug_rdg_partitions (vec<partition_t> );
1267 DEBUG_FUNCTION void
1268 debug_rdg_partitions (vec<partition_t> partitions)
1270 dump_rdg_partitions (stderr, partitions);
1273 /* Returns the number of read and write operations in the RDG. */
1275 static int
1276 number_of_rw_in_rdg (struct graph *rdg)
1278 int i, res = 0;
1280 for (i = 0; i < rdg->n_vertices; i++)
1282 if (RDG_MEM_WRITE_STMT (rdg, i))
1283 ++res;
1285 if (RDG_MEM_READS_STMT (rdg, i))
1286 ++res;
1289 return res;
1292 /* Returns the number of read and write operations in a PARTITION of
1293 the RDG. */
1295 static int
1296 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1298 int res = 0;
1299 unsigned i;
1300 bitmap_iterator ii;
1302 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1304 if (RDG_MEM_WRITE_STMT (rdg, i))
1305 ++res;
1307 if (RDG_MEM_READS_STMT (rdg, i))
1308 ++res;
1311 return res;
1314 /* Returns true when one of the PARTITIONS contains all the read or
1315 write operations of RDG. */
1317 static bool
1318 partition_contains_all_rw (struct graph *rdg,
1319 vec<partition_t> partitions)
1321 int i;
1322 partition_t partition;
1323 int nrw = number_of_rw_in_rdg (rdg);
1325 FOR_EACH_VEC_ELT (partitions, i, partition)
1326 if (nrw == number_of_rw_in_partition (rdg, partition))
1327 return true;
1329 return false;
1332 /* Compute partition dependence created by the data references in DRS1
1333 and DRS2 and modify and return DIR according to that. */
1335 static int
1336 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1337 vec<data_reference_p> drs1,
1338 vec<data_reference_p> drs2)
1340 data_reference_p dr1, dr2;
1342 /* dependence direction - 0 is no dependence, -1 is back,
1343 1 is forth, 2 is both (we can stop then, merging will occur). */
1344 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1345 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1347 int this_dir = 1;
1348 ddr_p ddr;
1349 /* Re-shuffle data-refs to be in dominator order. */
1350 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1351 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1353 data_reference_p tem = dr1;
1354 dr1 = dr2;
1355 dr2 = tem;
1356 this_dir = -this_dir;
1358 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1359 compute_affine_dependence (ddr, loops[0]);
1360 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1361 this_dir = 2;
1362 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1364 if (DDR_REVERSED_P (ddr))
1366 data_reference_p tem = dr1;
1367 dr1 = dr2;
1368 dr2 = tem;
1369 this_dir = -this_dir;
1371 /* Known dependences can still be unordered througout the
1372 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1373 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1374 this_dir = 2;
1375 /* If the overlap is exact preserve stmt order. */
1376 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1378 else
1380 /* Else as the distance vector is lexicographic positive
1381 swap the dependence direction. */
1382 this_dir = -this_dir;
1385 else
1386 this_dir = 0;
1387 free_dependence_relation (ddr);
1388 if (dir == 0)
1389 dir = this_dir;
1390 else if (dir != this_dir)
1391 return 2;
1393 return dir;
1396 /* Compare postorder number of the partition graph vertices V1 and V2. */
1398 static int
1399 pgcmp (const void *v1_, const void *v2_)
1401 const vertex *v1 = (const vertex *)v1_;
1402 const vertex *v2 = (const vertex *)v2_;
1403 return v2->post - v1->post;
1406 /* Distributes the code from LOOP in such a way that producer
1407 statements are placed before consumer statements. Tries to separate
1408 only the statements from STMTS into separate loops.
1409 Returns the number of distributed loops. */
1411 static int
1412 distribute_loop (struct loop *loop, vec<gimple> stmts,
1413 control_dependences *cd, int *nb_calls)
1415 struct graph *rdg;
1416 partition_t partition;
1417 bool any_builtin;
1418 int i, nbp;
1419 graph *pg = NULL;
1420 int num_sccs = 1;
1422 *nb_calls = 0;
1423 auto_vec<loop_p, 3> loop_nest;
1424 if (!find_loop_nest (loop, &loop_nest))
1425 return 0;
1427 rdg = build_rdg (loop_nest, cd);
1428 if (!rdg)
1430 if (dump_file && (dump_flags & TDF_DETAILS))
1431 fprintf (dump_file,
1432 "Loop %d not distributed: failed to build the RDG.\n",
1433 loop->num);
1435 return 0;
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1439 dump_rdg (dump_file, rdg);
1441 auto_vec<partition_t, 3> partitions;
1442 rdg_build_partitions (rdg, stmts, &partitions);
1444 any_builtin = false;
1445 FOR_EACH_VEC_ELT (partitions, i, partition)
1447 classify_partition (loop, rdg, partition);
1448 any_builtin |= partition_builtin_p (partition);
1451 /* If we are only distributing patterns but did not detect any,
1452 simply bail out. */
1453 if (!flag_tree_loop_distribution
1454 && !any_builtin)
1456 nbp = 0;
1457 goto ldist_done;
1460 /* If we are only distributing patterns fuse all partitions that
1461 were not classified as builtins. This also avoids chopping
1462 a loop into pieces, separated by builtin calls. That is, we
1463 only want no or a single loop body remaining. */
1464 partition_t into;
1465 if (!flag_tree_loop_distribution)
1467 for (i = 0; partitions.iterate (i, &into); ++i)
1468 if (!partition_builtin_p (into))
1469 break;
1470 for (++i; partitions.iterate (i, &partition); ++i)
1471 if (!partition_builtin_p (partition))
1473 if (dump_file && (dump_flags & TDF_DETAILS))
1475 fprintf (dump_file, "fusing non-builtin partitions\n");
1476 dump_bitmap (dump_file, into->stmts);
1477 dump_bitmap (dump_file, partition->stmts);
1479 partition_merge_into (into, partition);
1480 partitions.unordered_remove (i);
1481 partition_free (partition);
1482 i--;
1486 /* Due to limitations in the transform phase we have to fuse all
1487 reduction partitions into the last partition so the existing
1488 loop will contain all loop-closed PHI nodes. */
1489 for (i = 0; partitions.iterate (i, &into); ++i)
1490 if (partition_reduction_p (into))
1491 break;
1492 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1493 if (partition_reduction_p (partition))
1495 if (dump_file && (dump_flags & TDF_DETAILS))
1497 fprintf (dump_file, "fusing partitions\n");
1498 dump_bitmap (dump_file, into->stmts);
1499 dump_bitmap (dump_file, partition->stmts);
1500 fprintf (dump_file, "because they have reductions\n");
1502 partition_merge_into (into, partition);
1503 partitions.unordered_remove (i);
1504 partition_free (partition);
1505 i--;
1508 /* Apply our simple cost model - fuse partitions with similar
1509 memory accesses. */
1510 for (i = 0; partitions.iterate (i, &into); ++i)
1512 if (partition_builtin_p (into))
1513 continue;
1514 for (int j = i + 1;
1515 partitions.iterate (j, &partition); ++j)
1517 if (!partition_builtin_p (partition)
1518 && similar_memory_accesses (rdg, into, partition))
1520 if (dump_file && (dump_flags & TDF_DETAILS))
1522 fprintf (dump_file, "fusing partitions\n");
1523 dump_bitmap (dump_file, into->stmts);
1524 dump_bitmap (dump_file, partition->stmts);
1525 fprintf (dump_file, "because they have similar "
1526 "memory accesses\n");
1528 partition_merge_into (into, partition);
1529 partitions.unordered_remove (j);
1530 partition_free (partition);
1531 j--;
1536 /* Build the partition dependency graph. */
1537 if (partitions.length () > 1)
1539 pg = new_graph (partitions.length ());
1540 struct pgdata {
1541 partition_t partition;
1542 vec<data_reference_p> writes;
1543 vec<data_reference_p> reads;
1545 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1546 for (i = 0; partitions.iterate (i, &partition); ++i)
1548 vertex *v = &pg->vertices[i];
1549 pgdata *data = new pgdata;
1550 data_reference_p dr;
1551 /* FIXME - leaks. */
1552 v->data = data;
1553 bitmap_iterator bi;
1554 unsigned j;
1555 data->partition = partition;
1556 data->reads = vNULL;
1557 data->writes = vNULL;
1558 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1559 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1560 if (DR_IS_READ (dr))
1561 data->reads.safe_push (dr);
1562 else
1563 data->writes.safe_push (dr);
1565 partition_t partition1, partition2;
1566 for (i = 0; partitions.iterate (i, &partition1); ++i)
1567 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1569 /* dependence direction - 0 is no dependence, -1 is back,
1570 1 is forth, 2 is both (we can stop then, merging will occur). */
1571 int dir = 0;
1572 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1573 PGDATA(i)->writes,
1574 PGDATA(j)->reads);
1575 if (dir != 2)
1576 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1577 PGDATA(i)->reads,
1578 PGDATA(j)->writes);
1579 if (dir != 2)
1580 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1581 PGDATA(i)->writes,
1582 PGDATA(j)->writes);
1583 if (dir == 1 || dir == 2)
1584 add_edge (pg, i, j);
1585 if (dir == -1 || dir == 2)
1586 add_edge (pg, j, i);
1589 /* Add edges to the reduction partition (if any) to force it last. */
1590 unsigned j;
1591 for (j = 0; partitions.iterate (j, &partition); ++j)
1592 if (partition_reduction_p (partition))
1593 break;
1594 if (j < partitions.length ())
1596 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1597 if (i != j)
1598 add_edge (pg, i, j);
1601 /* Compute partitions we cannot separate and fuse them. */
1602 num_sccs = graphds_scc (pg, NULL);
1603 for (i = 0; i < num_sccs; ++i)
1605 partition_t first;
1606 int j;
1607 for (j = 0; partitions.iterate (j, &first); ++j)
1608 if (pg->vertices[j].component == i)
1609 break;
1610 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1611 if (pg->vertices[j].component == i)
1613 if (dump_file && (dump_flags & TDF_DETAILS))
1615 fprintf (dump_file, "fusing partitions\n");
1616 dump_bitmap (dump_file, first->stmts);
1617 dump_bitmap (dump_file, partition->stmts);
1618 fprintf (dump_file, "because they are in the same "
1619 "dependence SCC\n");
1621 partition_merge_into (first, partition);
1622 partitions[j] = NULL;
1623 partition_free (partition);
1624 PGDATA (j)->partition = NULL;
1628 /* Now order the remaining nodes in postorder. */
1629 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1630 partitions.truncate (0);
1631 for (i = 0; i < pg->n_vertices; ++i)
1633 pgdata *data = PGDATA (i);
1634 if (data->partition)
1635 partitions.safe_push (data->partition);
1636 data->reads.release ();
1637 data->writes.release ();
1638 delete data;
1640 gcc_assert (partitions.length () == (unsigned)num_sccs);
1641 free_graph (pg);
1644 nbp = partitions.length ();
1645 if (nbp == 0
1646 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1647 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1649 nbp = 0;
1650 goto ldist_done;
1653 if (dump_file && (dump_flags & TDF_DETAILS))
1654 dump_rdg_partitions (dump_file, partitions);
1656 FOR_EACH_VEC_ELT (partitions, i, partition)
1658 if (partition_builtin_p (partition))
1659 (*nb_calls)++;
1660 generate_code_for_partition (loop, partition, i < nbp - 1);
1663 ldist_done:
1665 FOR_EACH_VEC_ELT (partitions, i, partition)
1666 partition_free (partition);
1668 free_rdg (rdg);
1669 return nbp - *nb_calls;
1672 /* Distribute all loops in the current function. */
1674 namespace {
1676 const pass_data pass_data_loop_distribution =
1678 GIMPLE_PASS, /* type */
1679 "ldist", /* name */
1680 OPTGROUP_LOOP, /* optinfo_flags */
1681 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1682 ( PROP_cfg | PROP_ssa ), /* properties_required */
1683 0, /* properties_provided */
1684 0, /* properties_destroyed */
1685 0, /* todo_flags_start */
1686 0, /* todo_flags_finish */
1689 class pass_loop_distribution : public gimple_opt_pass
1691 public:
1692 pass_loop_distribution (gcc::context *ctxt)
1693 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1696 /* opt_pass methods: */
1697 virtual bool gate (function *)
1699 return flag_tree_loop_distribution
1700 || flag_tree_loop_distribute_patterns;
1703 virtual unsigned int execute (function *);
1705 }; // class pass_loop_distribution
1707 unsigned int
1708 pass_loop_distribution::execute (function *fun)
1710 struct loop *loop;
1711 bool changed = false;
1712 basic_block bb;
1713 control_dependences *cd = NULL;
1715 FOR_ALL_BB_FN (bb, fun)
1717 gimple_stmt_iterator gsi;
1718 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1719 gimple_set_uid (gsi_stmt (gsi), -1);
1720 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1721 gimple_set_uid (gsi_stmt (gsi), -1);
1724 /* We can at the moment only distribute non-nested loops, thus restrict
1725 walking to innermost loops. */
1726 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1728 auto_vec<gimple> work_list;
1729 basic_block *bbs;
1730 int num = loop->num;
1731 unsigned int i;
1733 /* If the loop doesn't have a single exit we will fail anyway,
1734 so do that early. */
1735 if (!single_exit (loop))
1736 continue;
1738 /* Only optimize hot loops. */
1739 if (!optimize_loop_for_speed_p (loop))
1740 continue;
1742 /* Initialize the worklist with stmts we seed the partitions with. */
1743 bbs = get_loop_body_in_dom_order (loop);
1744 for (i = 0; i < loop->num_nodes; ++i)
1746 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1747 !gsi_end_p (gsi);
1748 gsi_next (&gsi))
1750 gphi *phi = gsi.phi ();
1751 if (virtual_operand_p (gimple_phi_result (phi)))
1752 continue;
1753 /* Distribute stmts which have defs that are used outside of
1754 the loop. */
1755 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1756 continue;
1757 work_list.safe_push (phi);
1759 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1760 !gsi_end_p (gsi);
1761 gsi_next (&gsi))
1763 gimple stmt = gsi_stmt (gsi);
1765 /* If there is a stmt with side-effects bail out - we
1766 cannot and should not distribute this loop. */
1767 if (gimple_has_side_effects (stmt))
1769 work_list.truncate (0);
1770 goto out;
1773 /* Distribute stmts which have defs that are used outside of
1774 the loop. */
1775 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1777 /* Otherwise only distribute stores for now. */
1778 else if (!gimple_vdef (stmt))
1779 continue;
1781 work_list.safe_push (stmt);
1784 out:
1785 free (bbs);
1787 int nb_generated_loops = 0;
1788 int nb_generated_calls = 0;
1789 location_t loc = find_loop_location (loop);
1790 if (work_list.length () > 0)
1792 if (!cd)
1794 calculate_dominance_info (CDI_DOMINATORS);
1795 calculate_dominance_info (CDI_POST_DOMINATORS);
1796 cd = new control_dependences (create_edge_list ());
1797 free_dominance_info (CDI_POST_DOMINATORS);
1799 nb_generated_loops = distribute_loop (loop, work_list, cd,
1800 &nb_generated_calls);
1803 if (nb_generated_loops + nb_generated_calls > 0)
1805 changed = true;
1806 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1807 loc, "Loop %d distributed: split to %d loops "
1808 "and %d library calls.\n",
1809 num, nb_generated_loops, nb_generated_calls);
1811 else if (dump_file && (dump_flags & TDF_DETAILS))
1812 fprintf (dump_file, "Loop %d is the same.\n", num);
1815 if (cd)
1816 delete cd;
1818 if (changed)
1820 mark_virtual_operands_for_renaming (fun);
1821 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1824 #ifdef ENABLE_CHECKING
1825 verify_loop_structure ();
1826 #endif
1828 return 0;
1831 } // anon namespace
1833 gimple_opt_pass *
1834 make_pass_loop_distribution (gcc::context *ctxt)
1836 return new pass_loop_distribution (ctxt);