2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net>
[official-gcc.git] / gcc / tree-loop-distribution.c
blobbbf387d6981edb57925a657ec9cca505425db208
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "basic-block.h"
49 #include "tree-ssa-alias.h"
50 #include "internal-fn.h"
51 #include "gimple-expr.h"
52 #include "is-a.h"
53 #include "gimple.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "stor-layout.h"
57 #include "gimple-ssa.h"
58 #include "tree-cfg.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-into-ssa.h"
66 #include "tree-ssa.h"
67 #include "cfgloop.h"
68 #include "tree-chrec.h"
69 #include "tree-data-ref.h"
70 #include "tree-scalar-evolution.h"
71 #include "tree-pass.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-vectorizer.h"
76 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
77 typedef struct rdg_vertex
79 /* The statement represented by this vertex. */
80 gimple stmt;
82 /* Vector of data-references in this statement. */
83 vec<data_reference_p> datarefs;
85 /* True when the statement contains a write to memory. */
86 bool has_mem_write;
88 /* True when the statement contains a read from memory. */
89 bool has_mem_reads;
90 } *rdg_vertex_p;
92 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
93 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
94 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
95 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
96 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
97 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
98 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
99 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
101 /* Data dependence type. */
103 enum rdg_dep_type
105 /* Read After Write (RAW). */
106 flow_dd = 'f',
108 /* Control dependence (execute conditional on). */
109 control_dd = 'c'
112 /* Dependence information attached to an edge of the RDG. */
114 typedef struct rdg_edge
116 /* Type of the dependence. */
117 enum rdg_dep_type type;
118 } *rdg_edge_p;
120 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
122 /* Dump vertex I in RDG to FILE. */
124 static void
125 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
127 struct vertex *v = &(rdg->vertices[i]);
128 struct graph_edge *e;
130 fprintf (file, "(vertex %d: (%s%s) (in:", i,
131 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
132 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
134 if (v->pred)
135 for (e = v->pred; e; e = e->pred_next)
136 fprintf (file, " %d", e->src);
138 fprintf (file, ") (out:");
140 if (v->succ)
141 for (e = v->succ; e; e = e->succ_next)
142 fprintf (file, " %d", e->dest);
144 fprintf (file, ")\n");
145 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
146 fprintf (file, ")\n");
149 /* Call dump_rdg_vertex on stderr. */
151 DEBUG_FUNCTION void
152 debug_rdg_vertex (struct graph *rdg, int i)
154 dump_rdg_vertex (stderr, rdg, i);
157 /* Dump the reduced dependence graph RDG to FILE. */
159 static void
160 dump_rdg (FILE *file, struct graph *rdg)
162 fprintf (file, "(rdg\n");
163 for (int i = 0; i < rdg->n_vertices; i++)
164 dump_rdg_vertex (file, rdg, i);
165 fprintf (file, ")\n");
168 /* Call dump_rdg on stderr. */
170 DEBUG_FUNCTION void
171 debug_rdg (struct graph *rdg)
173 dump_rdg (stderr, rdg);
176 static void
177 dot_rdg_1 (FILE *file, struct graph *rdg)
179 int i;
180 pretty_printer buffer;
181 pp_needs_newline (&buffer) = false;
182 buffer.buffer->stream = file;
184 fprintf (file, "digraph RDG {\n");
186 for (i = 0; i < rdg->n_vertices; i++)
188 struct vertex *v = &(rdg->vertices[i]);
189 struct graph_edge *e;
191 fprintf (file, "%d [label=\"[%d] ", i, i);
192 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
193 pp_flush (&buffer);
194 fprintf (file, "\"]\n");
196 /* Highlight reads from memory. */
197 if (RDG_MEM_READS_STMT (rdg, i))
198 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
200 /* Highlight stores to memory. */
201 if (RDG_MEM_WRITE_STMT (rdg, i))
202 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
204 if (v->succ)
205 for (e = v->succ; e; e = e->succ_next)
206 switch (RDGE_TYPE (e))
208 case flow_dd:
209 /* These are the most common dependences: don't print these. */
210 fprintf (file, "%d -> %d \n", i, e->dest);
211 break;
213 case control_dd:
214 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
215 break;
217 default:
218 gcc_unreachable ();
222 fprintf (file, "}\n\n");
225 /* Display the Reduced Dependence Graph using dotty. */
227 DEBUG_FUNCTION void
228 dot_rdg (struct graph *rdg)
230 /* When debugging, you may want to enable the following code. */
231 #if 1
232 FILE *file = popen ("dot -Tx11", "w");
233 if (!file)
234 return;
235 dot_rdg_1 (file, rdg);
236 fflush (file);
237 close (fileno (file));
238 pclose (file);
239 #else
240 dot_rdg_1 (stderr, rdg);
241 #endif
244 /* Returns the index of STMT in RDG. */
246 static int
247 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
249 int index = gimple_uid (stmt);
250 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
251 return index;
254 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
255 the index of DEF in RDG. */
257 static void
258 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
260 use_operand_p imm_use_p;
261 imm_use_iterator iterator;
263 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
265 struct graph_edge *e;
266 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
268 if (use < 0)
269 continue;
271 e = add_edge (rdg, idef, use);
272 e->data = XNEW (struct rdg_edge);
273 RDGE_TYPE (e) = flow_dd;
277 /* Creates an edge for the control dependences of BB to the vertex V. */
279 static void
280 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
281 int v, control_dependences *cd)
283 bitmap_iterator bi;
284 unsigned edge_n;
285 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
286 0, edge_n, bi)
288 basic_block cond_bb = cd->get_edge (edge_n)->src;
289 gimple stmt = last_stmt (cond_bb);
290 if (stmt && is_ctrl_stmt (stmt))
292 struct graph_edge *e;
293 int c = rdg_vertex_for_stmt (rdg, stmt);
294 if (c < 0)
295 continue;
297 e = add_edge (rdg, c, v);
298 e->data = XNEW (struct rdg_edge);
299 RDGE_TYPE (e) = control_dd;
304 /* Creates the edges of the reduced dependence graph RDG. */
306 static void
307 create_rdg_flow_edges (struct graph *rdg)
309 int i;
310 def_operand_p def_p;
311 ssa_op_iter iter;
313 for (i = 0; i < rdg->n_vertices; i++)
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
315 iter, SSA_OP_DEF)
316 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
319 /* Creates the edges of the reduced dependence graph RDG. */
321 static void
322 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
324 int i;
326 for (i = 0; i < rdg->n_vertices; i++)
328 gimple stmt = RDG_STMT (rdg, i);
329 if (gimple_code (stmt) == GIMPLE_PHI)
331 edge_iterator ei;
332 edge e;
333 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
334 create_edge_for_control_dependence (rdg, e->src, i, cd);
336 else
337 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
341 /* Build the vertices of the reduced dependence graph RDG. Return false
342 if that failed. */
344 static bool
345 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
346 vec<data_reference_p> *datarefs)
348 int i;
349 gimple stmt;
351 FOR_EACH_VEC_ELT (stmts, i, stmt)
353 struct vertex *v = &(rdg->vertices[i]);
355 /* Record statement to vertex mapping. */
356 gimple_set_uid (stmt, i);
358 v->data = XNEW (struct rdg_vertex);
359 RDGV_STMT (v) = stmt;
360 RDGV_DATAREFS (v).create (0);
361 RDGV_HAS_MEM_WRITE (v) = false;
362 RDGV_HAS_MEM_READS (v) = false;
363 if (gimple_code (stmt) == GIMPLE_PHI)
364 continue;
366 unsigned drp = datarefs->length ();
367 if (!find_data_references_in_stmt (loop, stmt, datarefs))
368 return false;
369 for (unsigned j = drp; j < datarefs->length (); ++j)
371 data_reference_p dr = (*datarefs)[j];
372 if (DR_IS_READ (dr))
373 RDGV_HAS_MEM_READS (v) = true;
374 else
375 RDGV_HAS_MEM_WRITE (v) = true;
376 RDGV_DATAREFS (v).safe_push (dr);
379 return true;
382 /* Initialize STMTS with all the statements of LOOP. The order in
383 which we discover statements is important as
384 generate_loops_for_partition is using the same traversal for
385 identifying statements in loop copies. */
387 static void
388 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
390 unsigned int i;
391 basic_block *bbs = get_loop_body_in_dom_order (loop);
393 for (i = 0; i < loop->num_nodes; i++)
395 basic_block bb = bbs[i];
396 gimple_stmt_iterator bsi;
397 gimple stmt;
399 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
400 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
401 stmts->safe_push (gsi_stmt (bsi));
403 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
405 stmt = gsi_stmt (bsi);
406 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
407 stmts->safe_push (stmt);
411 free (bbs);
414 /* Free the reduced dependence graph RDG. */
416 static void
417 free_rdg (struct graph *rdg)
419 int i;
421 for (i = 0; i < rdg->n_vertices; i++)
423 struct vertex *v = &(rdg->vertices[i]);
424 struct graph_edge *e;
426 for (e = v->succ; e; e = e->succ_next)
427 free (e->data);
429 if (v->data)
431 gimple_set_uid (RDGV_STMT (v), -1);
432 free_data_refs (RDGV_DATAREFS (v));
433 free (v->data);
437 free_graph (rdg);
440 /* Build the Reduced Dependence Graph (RDG) with one vertex per
441 statement of the loop nest LOOP_NEST, and one edge per data dependence or
442 scalar dependence. */
444 static struct graph *
445 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
447 struct graph *rdg;
448 vec<data_reference_p> datarefs;
450 /* Create the RDG vertices from the stmts of the loop nest. */
451 auto_vec<gimple, 10> stmts;
452 stmts_from_loop (loop_nest[0], &stmts);
453 rdg = new_graph (stmts.length ());
454 datarefs.create (10);
455 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
457 datarefs.release ();
458 free_rdg (rdg);
459 return NULL;
461 stmts.release ();
463 create_rdg_flow_edges (rdg);
464 if (cd)
465 create_rdg_cd_edges (rdg, cd);
467 datarefs.release ();
469 return rdg;
474 enum partition_kind {
475 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
478 typedef struct partition_s
480 bitmap stmts;
481 bitmap loops;
482 bool reduction_p;
483 enum partition_kind kind;
484 /* data-references a kind != PKIND_NORMAL partition is about. */
485 data_reference_p main_dr;
486 data_reference_p secondary_dr;
487 tree niter;
488 bool plus_one;
489 } *partition_t;
492 /* Allocate and initialize a partition from BITMAP. */
494 static partition_t
495 partition_alloc (bitmap stmts, bitmap loops)
497 partition_t partition = XCNEW (struct partition_s);
498 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
499 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
500 partition->reduction_p = false;
501 partition->kind = PKIND_NORMAL;
502 return partition;
505 /* Free PARTITION. */
507 static void
508 partition_free (partition_t partition)
510 BITMAP_FREE (partition->stmts);
511 BITMAP_FREE (partition->loops);
512 free (partition);
515 /* Returns true if the partition can be generated as a builtin. */
517 static bool
518 partition_builtin_p (partition_t partition)
520 return partition->kind != PKIND_NORMAL;
523 /* Returns true if the partition contains a reduction. */
525 static bool
526 partition_reduction_p (partition_t partition)
528 return partition->reduction_p;
531 /* Merge PARTITION into the partition DEST. */
533 static void
534 partition_merge_into (partition_t dest, partition_t partition)
536 dest->kind = PKIND_NORMAL;
537 bitmap_ior_into (dest->stmts, partition->stmts);
538 if (partition_reduction_p (partition))
539 dest->reduction_p = true;
543 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
544 the LOOP. */
546 static bool
547 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
549 imm_use_iterator imm_iter;
550 use_operand_p use_p;
552 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
554 gimple use_stmt = USE_STMT (use_p);
555 if (!is_gimple_debug (use_stmt)
556 && loop != loop_containing_stmt (use_stmt))
557 return true;
560 return false;
563 /* Returns true when STMT defines a scalar variable used after the
564 loop LOOP. */
566 static bool
567 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
569 def_operand_p def_p;
570 ssa_op_iter op_iter;
572 if (gimple_code (stmt) == GIMPLE_PHI)
573 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
575 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
576 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
577 return true;
579 return false;
582 /* Return a copy of LOOP placed before LOOP. */
584 static struct loop *
585 copy_loop_before (struct loop *loop)
587 struct loop *res;
588 edge preheader = loop_preheader_edge (loop);
590 initialize_original_copy_tables ();
591 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
592 gcc_assert (res != NULL);
593 free_original_copy_tables ();
594 delete_update_ssa ();
596 return res;
599 /* Creates an empty basic block after LOOP. */
601 static void
602 create_bb_after_loop (struct loop *loop)
604 edge exit = single_exit (loop);
606 if (!exit)
607 return;
609 split_edge (exit);
612 /* Generate code for PARTITION from the code in LOOP. The loop is
613 copied when COPY_P is true. All the statements not flagged in the
614 PARTITION bitmap are removed from the loop or from its copy. The
615 statements are indexed in sequence inside a basic block, and the
616 basic blocks of a loop are taken in dom order. */
618 static void
619 generate_loops_for_partition (struct loop *loop, partition_t partition,
620 bool copy_p)
622 unsigned i;
623 gimple_stmt_iterator bsi;
624 basic_block *bbs;
626 if (copy_p)
628 loop = copy_loop_before (loop);
629 gcc_assert (loop != NULL);
630 create_preheader (loop, CP_SIMPLE_PREHEADERS);
631 create_bb_after_loop (loop);
634 /* Remove stmts not in the PARTITION bitmap. */
635 bbs = get_loop_body_in_dom_order (loop);
637 if (MAY_HAVE_DEBUG_STMTS)
638 for (i = 0; i < loop->num_nodes; i++)
640 basic_block bb = bbs[i];
642 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
644 gimple phi = gsi_stmt (bsi);
645 if (!virtual_operand_p (gimple_phi_result (phi))
646 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
647 reset_debug_uses (phi);
650 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
652 gimple stmt = gsi_stmt (bsi);
653 if (gimple_code (stmt) != GIMPLE_LABEL
654 && !is_gimple_debug (stmt)
655 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
656 reset_debug_uses (stmt);
660 for (i = 0; i < loop->num_nodes; i++)
662 basic_block bb = bbs[i];
664 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
666 gimple phi = gsi_stmt (bsi);
667 if (!virtual_operand_p (gimple_phi_result (phi))
668 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
669 remove_phi_node (&bsi, true);
670 else
671 gsi_next (&bsi);
674 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
676 gimple stmt = gsi_stmt (bsi);
677 if (gimple_code (stmt) != GIMPLE_LABEL
678 && !is_gimple_debug (stmt)
679 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
681 /* Choose an arbitrary path through the empty CFG part
682 that this unnecessary control stmt controls. */
683 if (gimple_code (stmt) == GIMPLE_COND)
685 gimple_cond_make_false (stmt);
686 update_stmt (stmt);
688 else if (gimple_code (stmt) == GIMPLE_SWITCH)
690 gimple_switch_set_index
691 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
692 update_stmt (stmt);
694 else
696 unlink_stmt_vdef (stmt);
697 gsi_remove (&bsi, true);
698 release_defs (stmt);
699 continue;
702 gsi_next (&bsi);
706 free (bbs);
709 /* Build the size argument for a memory operation call. */
711 static tree
712 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
713 bool plus_one)
715 tree size = fold_convert_loc (loc, sizetype, nb_iter);
716 if (plus_one)
717 size = size_binop (PLUS_EXPR, size, size_one_node);
718 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
719 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
720 size = fold_convert_loc (loc, size_type_node, size);
721 return size;
724 /* Build an address argument for a memory operation call. */
726 static tree
727 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
729 tree addr_base;
731 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
732 addr_base = fold_convert_loc (loc, sizetype, addr_base);
734 /* Test for a negative stride, iterating over every element. */
735 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
737 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
738 fold_convert_loc (loc, sizetype, nb_bytes));
739 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
740 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
743 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
746 /* If VAL memory representation contains the same value in all bytes,
747 return that value, otherwise return -1.
748 E.g. for 0x24242424 return 0x24, for IEEE double
749 747708026454360457216.0 return 0x44, etc. */
751 static int
752 const_with_all_bytes_same (tree val)
754 unsigned char buf[64];
755 int i, len;
757 if (integer_zerop (val)
758 || real_zerop (val)
759 || (TREE_CODE (val) == CONSTRUCTOR
760 && !TREE_CLOBBER_P (val)
761 && CONSTRUCTOR_NELTS (val) == 0))
762 return 0;
764 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
765 return -1;
767 len = native_encode_expr (val, buf, sizeof (buf));
768 if (len == 0)
769 return -1;
770 for (i = 1; i < len; i++)
771 if (buf[i] != buf[0])
772 return -1;
773 return buf[0];
776 /* Generate a call to memset for PARTITION in LOOP. */
778 static void
779 generate_memset_builtin (struct loop *loop, partition_t partition)
781 gimple_stmt_iterator gsi;
782 gimple stmt, fn_call;
783 tree mem, fn, nb_bytes;
784 location_t loc;
785 tree val;
787 stmt = DR_STMT (partition->main_dr);
788 loc = gimple_location (stmt);
790 /* The new statements will be placed before LOOP. */
791 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
793 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
794 partition->plus_one);
795 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
796 false, GSI_CONTINUE_LINKING);
797 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
798 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
799 false, GSI_CONTINUE_LINKING);
801 /* This exactly matches the pattern recognition in classify_partition. */
802 val = gimple_assign_rhs1 (stmt);
803 /* Handle constants like 0x15151515 and similarly
804 floating point constants etc. where all bytes are the same. */
805 int bytev = const_with_all_bytes_same (val);
806 if (bytev != -1)
807 val = build_int_cst (integer_type_node, bytev);
808 else if (TREE_CODE (val) == INTEGER_CST)
809 val = fold_convert (integer_type_node, val);
810 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
812 gimple cstmt;
813 tree tem = make_ssa_name (integer_type_node, NULL);
814 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
815 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
816 val = tem;
819 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
820 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
821 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
823 if (dump_file && (dump_flags & TDF_DETAILS))
825 fprintf (dump_file, "generated memset");
826 if (bytev == 0)
827 fprintf (dump_file, " zero\n");
828 else
829 fprintf (dump_file, "\n");
833 /* Generate a call to memcpy for PARTITION in LOOP. */
835 static void
836 generate_memcpy_builtin (struct loop *loop, partition_t partition)
838 gimple_stmt_iterator gsi;
839 gimple stmt, fn_call;
840 tree dest, src, fn, nb_bytes;
841 location_t loc;
842 enum built_in_function kind;
844 stmt = DR_STMT (partition->main_dr);
845 loc = gimple_location (stmt);
847 /* The new statements will be placed before LOOP. */
848 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
850 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
851 partition->plus_one);
852 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
853 false, GSI_CONTINUE_LINKING);
854 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
855 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
856 if (ptr_derefs_may_alias_p (dest, src))
857 kind = BUILT_IN_MEMMOVE;
858 else
859 kind = BUILT_IN_MEMCPY;
861 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
862 false, GSI_CONTINUE_LINKING);
863 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
864 false, GSI_CONTINUE_LINKING);
865 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
866 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
867 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
869 if (dump_file && (dump_flags & TDF_DETAILS))
871 if (kind == BUILT_IN_MEMCPY)
872 fprintf (dump_file, "generated memcpy\n");
873 else
874 fprintf (dump_file, "generated memmove\n");
878 /* Remove and destroy the loop LOOP. */
880 static void
881 destroy_loop (struct loop *loop)
883 unsigned nbbs = loop->num_nodes;
884 edge exit = single_exit (loop);
885 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
886 basic_block *bbs;
887 unsigned i;
889 bbs = get_loop_body_in_dom_order (loop);
891 redirect_edge_pred (exit, src);
892 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
893 exit->flags |= EDGE_FALLTHRU;
894 cancel_loop_tree (loop);
895 rescan_loop_exit (exit, false, true);
897 for (i = 0; i < nbbs; i++)
899 /* We have made sure to not leave any dangling uses of SSA
900 names defined in the loop. With the exception of virtuals.
901 Make sure we replace all uses of virtual defs that will remain
902 outside of the loop with the bare symbol as delete_basic_block
903 will release them. */
904 gimple_stmt_iterator gsi;
905 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
907 gimple phi = gsi_stmt (gsi);
908 if (virtual_operand_p (gimple_phi_result (phi)))
909 mark_virtual_phi_result_for_renaming (phi);
911 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
913 gimple stmt = gsi_stmt (gsi);
914 tree vdef = gimple_vdef (stmt);
915 if (vdef && TREE_CODE (vdef) == SSA_NAME)
916 mark_virtual_operand_for_renaming (vdef);
918 delete_basic_block (bbs[i]);
920 free (bbs);
922 set_immediate_dominator (CDI_DOMINATORS, dest,
923 recompute_dominator (CDI_DOMINATORS, dest));
926 /* Generates code for PARTITION. */
928 static void
929 generate_code_for_partition (struct loop *loop,
930 partition_t partition, bool copy_p)
932 switch (partition->kind)
934 case PKIND_NORMAL:
935 /* Reductions all have to be in the last partition. */
936 gcc_assert (!partition_reduction_p (partition)
937 || !copy_p);
938 generate_loops_for_partition (loop, partition, copy_p);
939 return;
941 case PKIND_MEMSET:
942 generate_memset_builtin (loop, partition);
943 break;
945 case PKIND_MEMCPY:
946 generate_memcpy_builtin (loop, partition);
947 break;
949 default:
950 gcc_unreachable ();
953 /* Common tail for partitions we turn into a call. If this was the last
954 partition for which we generate code, we have to destroy the loop. */
955 if (!copy_p)
956 destroy_loop (loop);
960 /* Returns a partition with all the statements needed for computing
961 the vertex V of the RDG, also including the loop exit conditions. */
963 static partition_t
964 build_rdg_partition_for_vertex (struct graph *rdg, int v)
966 partition_t partition = partition_alloc (NULL, NULL);
967 auto_vec<int, 3> nodes;
968 unsigned i;
969 int x;
971 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
973 FOR_EACH_VEC_ELT (nodes, i, x)
975 bitmap_set_bit (partition->stmts, x);
976 bitmap_set_bit (partition->loops,
977 loop_containing_stmt (RDG_STMT (rdg, x))->num);
980 return partition;
983 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
984 For the moment we detect only the memset zero pattern. */
986 static void
987 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
989 bitmap_iterator bi;
990 unsigned i;
991 tree nb_iter;
992 data_reference_p single_load, single_store;
993 bool volatiles_p = false;
994 bool plus_one = false;
996 partition->kind = PKIND_NORMAL;
997 partition->main_dr = NULL;
998 partition->secondary_dr = NULL;
999 partition->niter = NULL_TREE;
1000 partition->plus_one = false;
1002 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1004 gimple stmt = RDG_STMT (rdg, i);
1006 if (gimple_has_volatile_ops (stmt))
1007 volatiles_p = true;
1009 /* If the stmt has uses outside of the loop mark it as reduction. */
1010 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1012 partition->reduction_p = true;
1013 return;
1017 /* Perform general partition disqualification for builtins. */
1018 if (volatiles_p
1019 || !flag_tree_loop_distribute_patterns)
1020 return;
1022 /* Detect memset and memcpy. */
1023 single_load = NULL;
1024 single_store = NULL;
1025 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1027 gimple stmt = RDG_STMT (rdg, i);
1028 data_reference_p dr;
1029 unsigned j;
1031 if (gimple_code (stmt) == GIMPLE_PHI)
1032 continue;
1034 /* Any scalar stmts are ok. */
1035 if (!gimple_vuse (stmt))
1036 continue;
1038 /* Otherwise just regular loads/stores. */
1039 if (!gimple_assign_single_p (stmt))
1040 return;
1042 /* But exactly one store and/or load. */
1043 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1045 if (DR_IS_READ (dr))
1047 if (single_load != NULL)
1048 return;
1049 single_load = dr;
1051 else
1053 if (single_store != NULL)
1054 return;
1055 single_store = dr;
1060 if (!single_store)
1061 return;
1063 nb_iter = number_of_latch_executions (loop);
1064 if (!nb_iter || nb_iter == chrec_dont_know)
1065 return;
1066 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1067 gimple_bb (DR_STMT (single_store))))
1068 plus_one = true;
1070 if (single_store && !single_load)
1072 gimple stmt = DR_STMT (single_store);
1073 tree rhs = gimple_assign_rhs1 (stmt);
1074 if (const_with_all_bytes_same (rhs) == -1
1075 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1076 || (TYPE_MODE (TREE_TYPE (rhs))
1077 != TYPE_MODE (unsigned_char_type_node))))
1078 return;
1079 if (TREE_CODE (rhs) == SSA_NAME
1080 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1081 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1082 return;
1083 if (!adjacent_dr_p (single_store)
1084 || !dominated_by_p (CDI_DOMINATORS,
1085 loop->latch, gimple_bb (stmt)))
1086 return;
1087 partition->kind = PKIND_MEMSET;
1088 partition->main_dr = single_store;
1089 partition->niter = nb_iter;
1090 partition->plus_one = plus_one;
1092 else if (single_store && single_load)
1094 gimple store = DR_STMT (single_store);
1095 gimple load = DR_STMT (single_load);
1096 /* Direct aggregate copy or via an SSA name temporary. */
1097 if (load != store
1098 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1099 return;
1100 if (!adjacent_dr_p (single_store)
1101 || !adjacent_dr_p (single_load)
1102 || !operand_equal_p (DR_STEP (single_store),
1103 DR_STEP (single_load), 0)
1104 || !dominated_by_p (CDI_DOMINATORS,
1105 loop->latch, gimple_bb (store)))
1106 return;
1107 /* Now check that if there is a dependence this dependence is
1108 of a suitable form for memmove. */
1109 vec<loop_p> loops = vNULL;
1110 ddr_p ddr;
1111 loops.safe_push (loop);
1112 ddr = initialize_data_dependence_relation (single_load, single_store,
1113 loops);
1114 compute_affine_dependence (ddr, loop);
1115 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1117 free_dependence_relation (ddr);
1118 loops.release ();
1119 return;
1121 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1123 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1125 free_dependence_relation (ddr);
1126 loops.release ();
1127 return;
1129 lambda_vector dist_v;
1130 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1132 int dist = dist_v[index_in_loop_nest (loop->num,
1133 DDR_LOOP_NEST (ddr))];
1134 if (dist > 0 && !DDR_REVERSED_P (ddr))
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1142 free_dependence_relation (ddr);
1143 loops.release ();
1144 partition->kind = PKIND_MEMCPY;
1145 partition->main_dr = single_store;
1146 partition->secondary_dr = single_load;
1147 partition->niter = nb_iter;
1148 partition->plus_one = plus_one;
1152 /* For a data reference REF, return the declaration of its base
1153 address or NULL_TREE if the base is not determined. */
1155 static tree
1156 ref_base_address (data_reference_p dr)
1158 tree base_address = DR_BASE_ADDRESS (dr);
1159 if (base_address
1160 && TREE_CODE (base_address) == ADDR_EXPR)
1161 return TREE_OPERAND (base_address, 0);
1163 return base_address;
1166 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1167 accesses in RDG. */
1169 static bool
1170 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1171 partition_t partition2)
1173 unsigned i, j, k, l;
1174 bitmap_iterator bi, bj;
1175 data_reference_p ref1, ref2;
1177 /* First check whether in the intersection of the two partitions are
1178 any loads or stores. Common loads are the situation that happens
1179 most often. */
1180 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1181 if (RDG_MEM_WRITE_STMT (rdg, i)
1182 || RDG_MEM_READS_STMT (rdg, i))
1183 return true;
1185 /* Then check all data-references against each other. */
1186 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1187 if (RDG_MEM_WRITE_STMT (rdg, i)
1188 || RDG_MEM_READS_STMT (rdg, i))
1189 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1190 if (RDG_MEM_WRITE_STMT (rdg, j)
1191 || RDG_MEM_READS_STMT (rdg, j))
1193 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1195 tree base1 = ref_base_address (ref1);
1196 if (base1)
1197 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1198 if (base1 == ref_base_address (ref2))
1199 return true;
1203 return false;
1206 /* Aggregate several components into a useful partition that is
1207 registered in the PARTITIONS vector. Partitions will be
1208 distributed in different loops. */
1210 static void
1211 rdg_build_partitions (struct graph *rdg,
1212 vec<gimple> starting_stmts,
1213 vec<partition_t> *partitions)
1215 bitmap processed = BITMAP_ALLOC (NULL);
1216 int i;
1217 gimple stmt;
1219 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1221 int v = rdg_vertex_for_stmt (rdg, stmt);
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file,
1225 "ldist asked to generate code for vertex %d\n", v);
1227 /* If the vertex is already contained in another partition so
1228 is the partition rooted at it. */
1229 if (bitmap_bit_p (processed, v))
1230 continue;
1232 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1233 bitmap_ior_into (processed, partition->stmts);
1235 if (dump_file && (dump_flags & TDF_DETAILS))
1237 fprintf (dump_file, "ldist useful partition:\n");
1238 dump_bitmap (dump_file, partition->stmts);
1241 partitions->safe_push (partition);
1244 /* All vertices should have been assigned to at least one partition now,
1245 other than vertices belonging to dead code. */
1247 BITMAP_FREE (processed);
1250 /* Dump to FILE the PARTITIONS. */
1252 static void
1253 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1255 int i;
1256 partition_t partition;
1258 FOR_EACH_VEC_ELT (partitions, i, partition)
1259 debug_bitmap_file (file, partition->stmts);
1262 /* Debug PARTITIONS. */
1263 extern void debug_rdg_partitions (vec<partition_t> );
1265 DEBUG_FUNCTION void
1266 debug_rdg_partitions (vec<partition_t> partitions)
1268 dump_rdg_partitions (stderr, partitions);
1271 /* Returns the number of read and write operations in the RDG. */
1273 static int
1274 number_of_rw_in_rdg (struct graph *rdg)
1276 int i, res = 0;
1278 for (i = 0; i < rdg->n_vertices; i++)
1280 if (RDG_MEM_WRITE_STMT (rdg, i))
1281 ++res;
1283 if (RDG_MEM_READS_STMT (rdg, i))
1284 ++res;
1287 return res;
1290 /* Returns the number of read and write operations in a PARTITION of
1291 the RDG. */
1293 static int
1294 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1296 int res = 0;
1297 unsigned i;
1298 bitmap_iterator ii;
1300 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1302 if (RDG_MEM_WRITE_STMT (rdg, i))
1303 ++res;
1305 if (RDG_MEM_READS_STMT (rdg, i))
1306 ++res;
1309 return res;
1312 /* Returns true when one of the PARTITIONS contains all the read or
1313 write operations of RDG. */
1315 static bool
1316 partition_contains_all_rw (struct graph *rdg,
1317 vec<partition_t> partitions)
1319 int i;
1320 partition_t partition;
1321 int nrw = number_of_rw_in_rdg (rdg);
1323 FOR_EACH_VEC_ELT (partitions, i, partition)
1324 if (nrw == number_of_rw_in_partition (rdg, partition))
1325 return true;
1327 return false;
1330 /* Compute partition dependence created by the data references in DRS1
1331 and DRS2 and modify and return DIR according to that. */
1333 static int
1334 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1335 vec<data_reference_p> drs1,
1336 vec<data_reference_p> drs2)
1338 data_reference_p dr1, dr2;
1340 /* dependence direction - 0 is no dependence, -1 is back,
1341 1 is forth, 2 is both (we can stop then, merging will occur). */
1342 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1343 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1345 int this_dir = 1;
1346 ddr_p ddr;
1347 /* Re-shuffle data-refs to be in dominator order. */
1348 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1349 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1351 data_reference_p tem = dr1;
1352 dr1 = dr2;
1353 dr2 = tem;
1354 this_dir = -this_dir;
1356 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1357 compute_affine_dependence (ddr, loops[0]);
1358 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1359 this_dir = 2;
1360 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1362 if (DDR_REVERSED_P (ddr))
1364 data_reference_p tem = dr1;
1365 dr1 = dr2;
1366 dr2 = tem;
1367 this_dir = -this_dir;
1369 /* Known dependences can still be unordered througout the
1370 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1371 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1372 this_dir = 2;
1373 /* If the overlap is exact preserve stmt order. */
1374 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1376 else
1378 /* Else as the distance vector is lexicographic positive
1379 swap the dependence direction. */
1380 this_dir = -this_dir;
1383 else
1384 this_dir = 0;
1385 free_dependence_relation (ddr);
1386 if (dir == 0)
1387 dir = this_dir;
1388 else if (dir != this_dir)
1389 return 2;
1391 return dir;
1394 /* Compare postorder number of the partition graph vertices V1 and V2. */
1396 static int
1397 pgcmp (const void *v1_, const void *v2_)
1399 const vertex *v1 = (const vertex *)v1_;
1400 const vertex *v2 = (const vertex *)v2_;
1401 return v2->post - v1->post;
1404 /* Distributes the code from LOOP in such a way that producer
1405 statements are placed before consumer statements. Tries to separate
1406 only the statements from STMTS into separate loops.
1407 Returns the number of distributed loops. */
1409 static int
1410 distribute_loop (struct loop *loop, vec<gimple> stmts,
1411 control_dependences *cd, int *nb_calls)
1413 struct graph *rdg;
1414 partition_t partition;
1415 bool any_builtin;
1416 int i, nbp;
1417 graph *pg = NULL;
1418 int num_sccs = 1;
1420 *nb_calls = 0;
1421 auto_vec<loop_p, 3> loop_nest;
1422 if (!find_loop_nest (loop, &loop_nest))
1423 return 0;
1425 rdg = build_rdg (loop_nest, cd);
1426 if (!rdg)
1428 if (dump_file && (dump_flags & TDF_DETAILS))
1429 fprintf (dump_file,
1430 "Loop %d not distributed: failed to build the RDG.\n",
1431 loop->num);
1433 return 0;
1436 if (dump_file && (dump_flags & TDF_DETAILS))
1437 dump_rdg (dump_file, rdg);
1439 auto_vec<partition_t, 3> partitions;
1440 rdg_build_partitions (rdg, stmts, &partitions);
1442 any_builtin = false;
1443 FOR_EACH_VEC_ELT (partitions, i, partition)
1445 classify_partition (loop, rdg, partition);
1446 any_builtin |= partition_builtin_p (partition);
1449 /* If we are only distributing patterns but did not detect any,
1450 simply bail out. */
1451 if (!flag_tree_loop_distribution
1452 && !any_builtin)
1454 nbp = 0;
1455 goto ldist_done;
1458 /* If we are only distributing patterns fuse all partitions that
1459 were not classified as builtins. This also avoids chopping
1460 a loop into pieces, separated by builtin calls. That is, we
1461 only want no or a single loop body remaining. */
1462 partition_t into;
1463 if (!flag_tree_loop_distribution)
1465 for (i = 0; partitions.iterate (i, &into); ++i)
1466 if (!partition_builtin_p (into))
1467 break;
1468 for (++i; partitions.iterate (i, &partition); ++i)
1469 if (!partition_builtin_p (partition))
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1473 fprintf (dump_file, "fusing non-builtin partitions\n");
1474 dump_bitmap (dump_file, into->stmts);
1475 dump_bitmap (dump_file, partition->stmts);
1477 partition_merge_into (into, partition);
1478 partitions.unordered_remove (i);
1479 partition_free (partition);
1480 i--;
1484 /* Due to limitations in the transform phase we have to fuse all
1485 reduction partitions into the last partition so the existing
1486 loop will contain all loop-closed PHI nodes. */
1487 for (i = 0; partitions.iterate (i, &into); ++i)
1488 if (partition_reduction_p (into))
1489 break;
1490 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1491 if (partition_reduction_p (partition))
1493 if (dump_file && (dump_flags & TDF_DETAILS))
1495 fprintf (dump_file, "fusing partitions\n");
1496 dump_bitmap (dump_file, into->stmts);
1497 dump_bitmap (dump_file, partition->stmts);
1498 fprintf (dump_file, "because they have reductions\n");
1500 partition_merge_into (into, partition);
1501 partitions.unordered_remove (i);
1502 partition_free (partition);
1503 i--;
1506 /* Apply our simple cost model - fuse partitions with similar
1507 memory accesses. */
1508 for (i = 0; partitions.iterate (i, &into); ++i)
1510 if (partition_builtin_p (into))
1511 continue;
1512 for (int j = i + 1;
1513 partitions.iterate (j, &partition); ++j)
1515 if (!partition_builtin_p (partition)
1516 && similar_memory_accesses (rdg, into, partition))
1518 if (dump_file && (dump_flags & TDF_DETAILS))
1520 fprintf (dump_file, "fusing partitions\n");
1521 dump_bitmap (dump_file, into->stmts);
1522 dump_bitmap (dump_file, partition->stmts);
1523 fprintf (dump_file, "because they have similar "
1524 "memory accesses\n");
1526 partition_merge_into (into, partition);
1527 partitions.unordered_remove (j);
1528 partition_free (partition);
1529 j--;
1534 /* Build the partition dependency graph. */
1535 if (partitions.length () > 1)
1537 pg = new_graph (partitions.length ());
1538 struct pgdata {
1539 partition_t partition;
1540 vec<data_reference_p> writes;
1541 vec<data_reference_p> reads;
1543 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1544 for (i = 0; partitions.iterate (i, &partition); ++i)
1546 vertex *v = &pg->vertices[i];
1547 pgdata *data = new pgdata;
1548 data_reference_p dr;
1549 /* FIXME - leaks. */
1550 v->data = data;
1551 bitmap_iterator bi;
1552 unsigned j;
1553 data->partition = partition;
1554 data->reads = vNULL;
1555 data->writes = vNULL;
1556 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1557 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1558 if (DR_IS_READ (dr))
1559 data->reads.safe_push (dr);
1560 else
1561 data->writes.safe_push (dr);
1563 partition_t partition1, partition2;
1564 for (i = 0; partitions.iterate (i, &partition1); ++i)
1565 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1567 /* dependence direction - 0 is no dependence, -1 is back,
1568 1 is forth, 2 is both (we can stop then, merging will occur). */
1569 int dir = 0;
1570 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1571 PGDATA(i)->writes,
1572 PGDATA(j)->reads);
1573 if (dir != 2)
1574 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1575 PGDATA(i)->reads,
1576 PGDATA(j)->writes);
1577 if (dir != 2)
1578 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1579 PGDATA(i)->writes,
1580 PGDATA(j)->writes);
1581 if (dir == 1 || dir == 2)
1582 add_edge (pg, i, j);
1583 if (dir == -1 || dir == 2)
1584 add_edge (pg, j, i);
1587 /* Add edges to the reduction partition (if any) to force it last. */
1588 unsigned j;
1589 for (j = 0; partitions.iterate (j, &partition); ++j)
1590 if (partition_reduction_p (partition))
1591 break;
1592 if (j < partitions.length ())
1594 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1595 if (i != j)
1596 add_edge (pg, i, j);
1599 /* Compute partitions we cannot separate and fuse them. */
1600 num_sccs = graphds_scc (pg, NULL);
1601 for (i = 0; i < num_sccs; ++i)
1603 partition_t first;
1604 int j;
1605 for (j = 0; partitions.iterate (j, &first); ++j)
1606 if (pg->vertices[j].component == i)
1607 break;
1608 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1609 if (pg->vertices[j].component == i)
1611 if (dump_file && (dump_flags & TDF_DETAILS))
1613 fprintf (dump_file, "fusing partitions\n");
1614 dump_bitmap (dump_file, first->stmts);
1615 dump_bitmap (dump_file, partition->stmts);
1616 fprintf (dump_file, "because they are in the same "
1617 "dependence SCC\n");
1619 partition_merge_into (first, partition);
1620 partitions[j] = NULL;
1621 partition_free (partition);
1622 PGDATA (j)->partition = NULL;
1626 /* Now order the remaining nodes in postorder. */
1627 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1628 partitions.truncate (0);
1629 for (i = 0; i < pg->n_vertices; ++i)
1631 pgdata *data = PGDATA (i);
1632 if (data->partition)
1633 partitions.safe_push (data->partition);
1634 data->reads.release ();
1635 data->writes.release ();
1636 delete data;
1638 gcc_assert (partitions.length () == (unsigned)num_sccs);
1639 free_graph (pg);
1642 nbp = partitions.length ();
1643 if (nbp == 0
1644 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1645 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1647 nbp = 0;
1648 goto ldist_done;
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1652 dump_rdg_partitions (dump_file, partitions);
1654 FOR_EACH_VEC_ELT (partitions, i, partition)
1656 if (partition_builtin_p (partition))
1657 (*nb_calls)++;
1658 generate_code_for_partition (loop, partition, i < nbp - 1);
1661 ldist_done:
1663 FOR_EACH_VEC_ELT (partitions, i, partition)
1664 partition_free (partition);
1666 free_rdg (rdg);
1667 return nbp - *nb_calls;
1670 /* Distribute all loops in the current function. */
1672 namespace {
1674 const pass_data pass_data_loop_distribution =
1676 GIMPLE_PASS, /* type */
1677 "ldist", /* name */
1678 OPTGROUP_LOOP, /* optinfo_flags */
1679 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1680 ( PROP_cfg | PROP_ssa ), /* properties_required */
1681 0, /* properties_provided */
1682 0, /* properties_destroyed */
1683 0, /* todo_flags_start */
1684 0, /* todo_flags_finish */
1687 class pass_loop_distribution : public gimple_opt_pass
1689 public:
1690 pass_loop_distribution (gcc::context *ctxt)
1691 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1694 /* opt_pass methods: */
1695 virtual bool gate (function *)
1697 return flag_tree_loop_distribution
1698 || flag_tree_loop_distribute_patterns;
1701 virtual unsigned int execute (function *);
1703 }; // class pass_loop_distribution
1705 unsigned int
1706 pass_loop_distribution::execute (function *fun)
1708 struct loop *loop;
1709 bool changed = false;
1710 basic_block bb;
1711 control_dependences *cd = NULL;
1713 FOR_ALL_BB_FN (bb, fun)
1715 gimple_stmt_iterator gsi;
1716 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple_set_uid (gsi_stmt (gsi), -1);
1718 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1719 gimple_set_uid (gsi_stmt (gsi), -1);
1722 /* We can at the moment only distribute non-nested loops, thus restrict
1723 walking to innermost loops. */
1724 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1726 auto_vec<gimple> work_list;
1727 basic_block *bbs;
1728 int num = loop->num;
1729 unsigned int i;
1731 /* If the loop doesn't have a single exit we will fail anyway,
1732 so do that early. */
1733 if (!single_exit (loop))
1734 continue;
1736 /* Only optimize hot loops. */
1737 if (!optimize_loop_for_speed_p (loop))
1738 continue;
1740 /* Initialize the worklist with stmts we seed the partitions with. */
1741 bbs = get_loop_body_in_dom_order (loop);
1742 for (i = 0; i < loop->num_nodes; ++i)
1744 gimple_stmt_iterator gsi;
1745 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1747 gimple phi = gsi_stmt (gsi);
1748 if (virtual_operand_p (gimple_phi_result (phi)))
1749 continue;
1750 /* Distribute stmts which have defs that are used outside of
1751 the loop. */
1752 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1753 continue;
1754 work_list.safe_push (phi);
1756 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1758 gimple stmt = gsi_stmt (gsi);
1760 /* If there is a stmt with side-effects bail out - we
1761 cannot and should not distribute this loop. */
1762 if (gimple_has_side_effects (stmt))
1764 work_list.truncate (0);
1765 goto out;
1768 /* Distribute stmts which have defs that are used outside of
1769 the loop. */
1770 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1772 /* Otherwise only distribute stores for now. */
1773 else if (!gimple_vdef (stmt))
1774 continue;
1776 work_list.safe_push (stmt);
1779 out:
1780 free (bbs);
1782 int nb_generated_loops = 0;
1783 int nb_generated_calls = 0;
1784 location_t loc = find_loop_location (loop);
1785 if (work_list.length () > 0)
1787 if (!cd)
1789 calculate_dominance_info (CDI_DOMINATORS);
1790 calculate_dominance_info (CDI_POST_DOMINATORS);
1791 cd = new control_dependences (create_edge_list ());
1792 free_dominance_info (CDI_POST_DOMINATORS);
1794 nb_generated_loops = distribute_loop (loop, work_list, cd,
1795 &nb_generated_calls);
1798 if (nb_generated_loops + nb_generated_calls > 0)
1800 changed = true;
1801 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1802 loc, "Loop %d distributed: split to %d loops "
1803 "and %d library calls.\n",
1804 num, nb_generated_loops, nb_generated_calls);
1806 else if (dump_file && (dump_flags & TDF_DETAILS))
1807 fprintf (dump_file, "Loop %d is the same.\n", num);
1810 if (cd)
1811 delete cd;
1813 if (changed)
1815 mark_virtual_operands_for_renaming (fun);
1816 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1819 #ifdef ENABLE_CHECKING
1820 verify_loop_structure ();
1821 #endif
1823 return 0;
1826 } // anon namespace
1828 gimple_opt_pass *
1829 make_pass_loop_distribution (gcc::context *ctxt)
1831 return new pass_loop_distribution (ctxt);