re PR c++/67980 (left shift count is negative [-Wshift-count-negative] generated...
[official-gcc.git] / gcc / tree-loop-distribution.c
blob5580f286ccfc79eeb72ca1828bde95929fcee31b
1 /* Loop distribution.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 struct rdg_vertex
72 /* The statement represented by this vertex. */
73 gimple *stmt;
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
94 /* Data dependence type. */
96 enum rdg_dep_type
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
105 /* Dependence information attached to an edge of the RDG. */
107 struct rdg_edge
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
115 /* Dump vertex I in RDG to FILE. */
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
131 fprintf (file, ") (out:");
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
142 /* Call dump_rdg_vertex on stderr. */
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
147 dump_rdg_vertex (stderr, rdg, i);
150 /* Dump the reduced dependence graph RDG to FILE. */
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
161 /* Call dump_rdg on stderr. */
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
166 dump_rdg (stderr, rdg);
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
177 fprintf (file, "digraph RDG {\n");
179 for (i = 0; i < rdg->n_vertices; i++)
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
210 default:
211 gcc_unreachable ();
215 fprintf (file, "}\n\n");
218 /* Display the Reduced Dependence Graph using dotty. */
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
223 /* When debugging, you may want to enable the following code. */
224 #ifdef HAVE_POPEN
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
237 /* Returns the index of STMT in RDG. */
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
261 if (use < 0)
262 continue;
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
270 /* Creates an edge for the control dependences of BB to the vertex V. */
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
281 basic_block cond_bb = cd->get_edge_src (edge_n);
282 gimple *stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
297 /* Creates the edges of the reduced dependence graph RDG. */
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
312 /* Creates the edges of the reduced dependence graph RDG. */
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
317 int i;
319 for (i = 0; i < rdg->n_vertices; i++)
321 gimple *stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 if (flow_bb_inside_loop_p (loop, e->src))
328 create_edge_for_control_dependence (rdg, e->src, i, cd);
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
335 /* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
338 static bool
339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
340 vec<data_reference_p> *datarefs)
342 int i;
343 gimple *stmt;
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
347 struct vertex *v = &(rdg->vertices[i]);
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
373 return true;
376 /* Initialize STMTS with all the statements of LOOP. The order in
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
379 identifying statements in loop copies. */
381 static void
382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
384 unsigned int i;
385 basic_block *bbs = get_loop_body_in_dom_order (loop);
387 for (i = 0; i < loop->num_nodes; i++)
389 basic_block bb = bbs[i];
391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 stmts->safe_push (bsi.phi ());
396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
399 gimple *stmt = gsi_stmt (bsi);
400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 stmts->safe_push (stmt);
405 free (bbs);
408 /* Free the reduced dependence graph RDG. */
410 static void
411 free_rdg (struct graph *rdg)
413 int i;
415 for (i = 0; i < rdg->n_vertices; i++)
417 struct vertex *v = &(rdg->vertices[i]);
418 struct graph_edge *e;
420 for (e = v->succ; e; e = e->succ_next)
421 free (e->data);
423 if (v->data)
425 gimple_set_uid (RDGV_STMT (v), -1);
426 free_data_refs (RDGV_DATAREFS (v));
427 free (v->data);
431 free_graph (rdg);
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
436 scalar dependence. */
438 static struct graph *
439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
441 struct graph *rdg;
442 vec<data_reference_p> datarefs;
444 /* Create the RDG vertices from the stmts of the loop nest. */
445 auto_vec<gimple *, 10> stmts;
446 stmts_from_loop (loop_nest[0], &stmts);
447 rdg = new_graph (stmts.length ());
448 datarefs.create (10);
449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
451 datarefs.release ();
452 free_rdg (rdg);
453 return NULL;
455 stmts.release ();
457 create_rdg_flow_edges (rdg);
458 if (cd)
459 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
461 datarefs.release ();
463 return rdg;
468 enum partition_kind {
469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
472 struct partition
474 bitmap stmts;
475 bitmap loops;
476 bool reduction_p;
477 enum partition_kind kind;
478 /* data-references a kind != PKIND_NORMAL partition is about. */
479 data_reference_p main_dr;
480 data_reference_p secondary_dr;
481 tree niter;
482 bool plus_one;
486 /* Allocate and initialize a partition from BITMAP. */
488 static partition *
489 partition_alloc (bitmap stmts, bitmap loops)
491 partition *partition = XCNEW (struct partition);
492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494 partition->reduction_p = false;
495 partition->kind = PKIND_NORMAL;
496 return partition;
499 /* Free PARTITION. */
501 static void
502 partition_free (partition *partition)
504 BITMAP_FREE (partition->stmts);
505 BITMAP_FREE (partition->loops);
506 free (partition);
509 /* Returns true if the partition can be generated as a builtin. */
511 static bool
512 partition_builtin_p (partition *partition)
514 return partition->kind != PKIND_NORMAL;
517 /* Returns true if the partition contains a reduction. */
519 static bool
520 partition_reduction_p (partition *partition)
522 return partition->reduction_p;
525 /* Merge PARTITION into the partition DEST. */
527 static void
528 partition_merge_into (partition *dest, partition *partition)
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
548 gimple *use_stmt = USE_STMT (use_p);
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
554 return false;
557 /* Returns true when STMT defines a scalar variable used after the
558 loop LOOP. */
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
573 return false;
576 /* Return a copy of LOOP placed before LOOP. */
578 static struct loop *
579 copy_loop_before (struct loop *loop)
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
584 initialize_original_copy_tables ();
585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
586 gcc_assert (res != NULL);
587 free_original_copy_tables ();
588 delete_update_ssa ();
590 return res;
593 /* Creates an empty basic block after LOOP. */
595 static void
596 create_bb_after_loop (struct loop *loop)
598 edge exit = single_exit (loop);
600 if (!exit)
601 return;
603 split_edge (exit);
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
612 static void
613 generate_loops_for_partition (struct loop *loop, partition *partition,
614 bool copy_p)
616 unsigned i;
617 basic_block *bbs;
619 if (copy_p)
621 loop = copy_loop_before (loop);
622 gcc_assert (loop != NULL);
623 create_preheader (loop, CP_SIMPLE_PREHEADERS);
624 create_bb_after_loop (loop);
627 /* Remove stmts not in the PARTITION bitmap. */
628 bbs = get_loop_body_in_dom_order (loop);
630 if (MAY_HAVE_DEBUG_STMTS)
631 for (i = 0; i < loop->num_nodes; i++)
633 basic_block bb = bbs[i];
635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 gsi_next (&bsi))
638 gphi *phi = bsi.phi ();
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
646 gimple *stmt = gsi_stmt (bsi);
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 reset_debug_uses (stmt);
654 for (i = 0; i < loop->num_nodes; i++)
656 basic_block bb = bbs[i];
658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
660 gphi *phi = bsi.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 remove_phi_node (&bsi, true);
664 else
665 gsi_next (&bsi);
668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
670 gimple *stmt = gsi_stmt (bsi);
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
679 gimple_cond_make_false (cond_stmt);
680 update_stmt (stmt);
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
684 gswitch *switch_stmt = as_a <gswitch *> (stmt);
685 gimple_switch_set_index
686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
687 update_stmt (stmt);
689 else
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
697 gsi_next (&bsi);
701 free (bbs);
704 /* Build the size argument for a memory operation call. */
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
719 /* Build an address argument for a memory operation call. */
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
724 tree addr_base;
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
746 static int
747 const_with_all_bytes_same (tree val)
749 unsigned char buf[64];
750 int i, len;
752 if (integer_zerop (val)
753 || (TREE_CODE (val) == CONSTRUCTOR
754 && !TREE_CLOBBER_P (val)
755 && CONSTRUCTOR_NELTS (val) == 0))
756 return 0;
758 if (real_zerop (val))
760 /* Only return 0 for +0.0, not for -0.0, which doesn't have
761 an all bytes same memory representation. Don't transform
762 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
763 switch (TREE_CODE (val))
765 case REAL_CST:
766 if (!real_isneg (TREE_REAL_CST_PTR (val)))
767 return 0;
768 break;
769 case COMPLEX_CST:
770 if (!const_with_all_bytes_same (TREE_REALPART (val))
771 && !const_with_all_bytes_same (TREE_IMAGPART (val)))
772 return 0;
773 break;
774 case VECTOR_CST:
775 unsigned int j;
776 for (j = 0; j < VECTOR_CST_NELTS (val); ++j)
777 if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j)))
778 break;
779 if (j == VECTOR_CST_NELTS (val))
780 return 0;
781 break;
782 default:
783 break;
787 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
788 return -1;
790 len = native_encode_expr (val, buf, sizeof (buf));
791 if (len == 0)
792 return -1;
793 for (i = 1; i < len; i++)
794 if (buf[i] != buf[0])
795 return -1;
796 return buf[0];
799 /* Generate a call to memset for PARTITION in LOOP. */
801 static void
802 generate_memset_builtin (struct loop *loop, partition *partition)
804 gimple_stmt_iterator gsi;
805 gimple *stmt, *fn_call;
806 tree mem, fn, nb_bytes;
807 location_t loc;
808 tree val;
810 stmt = DR_STMT (partition->main_dr);
811 loc = gimple_location (stmt);
813 /* The new statements will be placed before LOOP. */
814 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
816 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
817 partition->plus_one);
818 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
819 false, GSI_CONTINUE_LINKING);
820 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
821 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
822 false, GSI_CONTINUE_LINKING);
824 /* This exactly matches the pattern recognition in classify_partition. */
825 val = gimple_assign_rhs1 (stmt);
826 /* Handle constants like 0x15151515 and similarly
827 floating point constants etc. where all bytes are the same. */
828 int bytev = const_with_all_bytes_same (val);
829 if (bytev != -1)
830 val = build_int_cst (integer_type_node, bytev);
831 else if (TREE_CODE (val) == INTEGER_CST)
832 val = fold_convert (integer_type_node, val);
833 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
835 tree tem = make_ssa_name (integer_type_node);
836 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
837 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
838 val = tem;
841 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
842 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
843 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
845 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "generated memset");
848 if (bytev == 0)
849 fprintf (dump_file, " zero\n");
850 else
851 fprintf (dump_file, "\n");
855 /* Generate a call to memcpy for PARTITION in LOOP. */
857 static void
858 generate_memcpy_builtin (struct loop *loop, partition *partition)
860 gimple_stmt_iterator gsi;
861 gimple *stmt, *fn_call;
862 tree dest, src, fn, nb_bytes;
863 location_t loc;
864 enum built_in_function kind;
866 stmt = DR_STMT (partition->main_dr);
867 loc = gimple_location (stmt);
869 /* The new statements will be placed before LOOP. */
870 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
872 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
873 partition->plus_one);
874 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
877 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
878 if (ptr_derefs_may_alias_p (dest, src))
879 kind = BUILT_IN_MEMMOVE;
880 else
881 kind = BUILT_IN_MEMCPY;
883 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
885 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
886 false, GSI_CONTINUE_LINKING);
887 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
888 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
889 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
891 if (dump_file && (dump_flags & TDF_DETAILS))
893 if (kind == BUILT_IN_MEMCPY)
894 fprintf (dump_file, "generated memcpy\n");
895 else
896 fprintf (dump_file, "generated memmove\n");
900 /* Remove and destroy the loop LOOP. */
902 static void
903 destroy_loop (struct loop *loop)
905 unsigned nbbs = loop->num_nodes;
906 edge exit = single_exit (loop);
907 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
908 basic_block *bbs;
909 unsigned i;
911 bbs = get_loop_body_in_dom_order (loop);
913 redirect_edge_pred (exit, src);
914 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
915 exit->flags |= EDGE_FALLTHRU;
916 cancel_loop_tree (loop);
917 rescan_loop_exit (exit, false, true);
919 i = nbbs;
922 /* We have made sure to not leave any dangling uses of SSA
923 names defined in the loop. With the exception of virtuals.
924 Make sure we replace all uses of virtual defs that will remain
925 outside of the loop with the bare symbol as delete_basic_block
926 will release them. */
927 --i;
928 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
929 gsi_next (&gsi))
931 gphi *phi = gsi.phi ();
932 if (virtual_operand_p (gimple_phi_result (phi)))
933 mark_virtual_phi_result_for_renaming (phi);
935 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
936 gsi_next (&gsi))
938 gimple *stmt = gsi_stmt (gsi);
939 tree vdef = gimple_vdef (stmt);
940 if (vdef && TREE_CODE (vdef) == SSA_NAME)
941 mark_virtual_operand_for_renaming (vdef);
943 delete_basic_block (bbs[i]);
945 while (i != 0);
947 free (bbs);
949 set_immediate_dominator (CDI_DOMINATORS, dest,
950 recompute_dominator (CDI_DOMINATORS, dest));
953 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
955 static bool
956 generate_code_for_partition (struct loop *loop,
957 partition *partition, bool copy_p)
959 switch (partition->kind)
961 case PKIND_NORMAL:
962 /* Reductions all have to be in the last partition. */
963 gcc_assert (!partition_reduction_p (partition)
964 || !copy_p);
965 generate_loops_for_partition (loop, partition, copy_p);
966 return false;
968 case PKIND_MEMSET:
969 generate_memset_builtin (loop, partition);
970 break;
972 case PKIND_MEMCPY:
973 generate_memcpy_builtin (loop, partition);
974 break;
976 default:
977 gcc_unreachable ();
980 /* Common tail for partitions we turn into a call. If this was the last
981 partition for which we generate code, we have to destroy the loop. */
982 if (!copy_p)
983 return true;
984 return false;
988 /* Returns a partition with all the statements needed for computing
989 the vertex V of the RDG, also including the loop exit conditions. */
991 static partition *
992 build_rdg_partition_for_vertex (struct graph *rdg, int v)
994 partition *partition = partition_alloc (NULL, NULL);
995 auto_vec<int, 3> nodes;
996 unsigned i;
997 int x;
999 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
1001 FOR_EACH_VEC_ELT (nodes, i, x)
1003 bitmap_set_bit (partition->stmts, x);
1004 bitmap_set_bit (partition->loops,
1005 loop_containing_stmt (RDG_STMT (rdg, x))->num);
1008 return partition;
1011 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1012 For the moment we detect only the memset zero pattern. */
1014 static void
1015 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
1017 bitmap_iterator bi;
1018 unsigned i;
1019 tree nb_iter;
1020 data_reference_p single_load, single_store;
1021 bool volatiles_p = false;
1022 bool plus_one = false;
1024 partition->kind = PKIND_NORMAL;
1025 partition->main_dr = NULL;
1026 partition->secondary_dr = NULL;
1027 partition->niter = NULL_TREE;
1028 partition->plus_one = false;
1030 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1032 gimple *stmt = RDG_STMT (rdg, i);
1034 if (gimple_has_volatile_ops (stmt))
1035 volatiles_p = true;
1037 /* If the stmt has uses outside of the loop mark it as reduction. */
1038 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1040 partition->reduction_p = true;
1041 return;
1045 /* Perform general partition disqualification for builtins. */
1046 if (volatiles_p
1047 || !flag_tree_loop_distribute_patterns)
1048 return;
1050 /* Detect memset and memcpy. */
1051 single_load = NULL;
1052 single_store = NULL;
1053 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1055 gimple *stmt = RDG_STMT (rdg, i);
1056 data_reference_p dr;
1057 unsigned j;
1059 if (gimple_code (stmt) == GIMPLE_PHI)
1060 continue;
1062 /* Any scalar stmts are ok. */
1063 if (!gimple_vuse (stmt))
1064 continue;
1066 /* Otherwise just regular loads/stores. */
1067 if (!gimple_assign_single_p (stmt))
1068 return;
1070 /* But exactly one store and/or load. */
1071 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1073 if (DR_IS_READ (dr))
1075 if (single_load != NULL)
1076 return;
1077 single_load = dr;
1079 else
1081 if (single_store != NULL)
1082 return;
1083 single_store = dr;
1088 if (!single_store)
1089 return;
1091 nb_iter = number_of_latch_executions (loop);
1092 if (!nb_iter || nb_iter == chrec_dont_know)
1093 return;
1094 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1095 gimple_bb (DR_STMT (single_store))))
1096 plus_one = true;
1098 if (single_store && !single_load)
1100 gimple *stmt = DR_STMT (single_store);
1101 tree rhs = gimple_assign_rhs1 (stmt);
1102 if (const_with_all_bytes_same (rhs) == -1
1103 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1104 || (TYPE_MODE (TREE_TYPE (rhs))
1105 != TYPE_MODE (unsigned_char_type_node))))
1106 return;
1107 if (TREE_CODE (rhs) == SSA_NAME
1108 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1109 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1110 return;
1111 if (!adjacent_dr_p (single_store)
1112 || !dominated_by_p (CDI_DOMINATORS,
1113 loop->latch, gimple_bb (stmt)))
1114 return;
1115 partition->kind = PKIND_MEMSET;
1116 partition->main_dr = single_store;
1117 partition->niter = nb_iter;
1118 partition->plus_one = plus_one;
1120 else if (single_store && single_load)
1122 gimple *store = DR_STMT (single_store);
1123 gimple *load = DR_STMT (single_load);
1124 /* Direct aggregate copy or via an SSA name temporary. */
1125 if (load != store
1126 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1127 return;
1128 if (!adjacent_dr_p (single_store)
1129 || !adjacent_dr_p (single_load)
1130 || !operand_equal_p (DR_STEP (single_store),
1131 DR_STEP (single_load), 0)
1132 || !dominated_by_p (CDI_DOMINATORS,
1133 loop->latch, gimple_bb (store)))
1134 return;
1135 /* Now check that if there is a dependence this dependence is
1136 of a suitable form for memmove. */
1137 vec<loop_p> loops = vNULL;
1138 ddr_p ddr;
1139 loops.safe_push (loop);
1140 ddr = initialize_data_dependence_relation (single_load, single_store,
1141 loops);
1142 compute_affine_dependence (ddr, loop);
1143 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1145 free_dependence_relation (ddr);
1146 loops.release ();
1147 return;
1149 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1151 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1153 free_dependence_relation (ddr);
1154 loops.release ();
1155 return;
1157 lambda_vector dist_v;
1158 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1160 int dist = dist_v[index_in_loop_nest (loop->num,
1161 DDR_LOOP_NEST (ddr))];
1162 if (dist > 0 && !DDR_REVERSED_P (ddr))
1164 free_dependence_relation (ddr);
1165 loops.release ();
1166 return;
1170 free_dependence_relation (ddr);
1171 loops.release ();
1172 partition->kind = PKIND_MEMCPY;
1173 partition->main_dr = single_store;
1174 partition->secondary_dr = single_load;
1175 partition->niter = nb_iter;
1176 partition->plus_one = plus_one;
1180 /* For a data reference REF, return the declaration of its base
1181 address or NULL_TREE if the base is not determined. */
1183 static tree
1184 ref_base_address (data_reference_p dr)
1186 tree base_address = DR_BASE_ADDRESS (dr);
1187 if (base_address
1188 && TREE_CODE (base_address) == ADDR_EXPR)
1189 return TREE_OPERAND (base_address, 0);
1191 return base_address;
1194 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1195 accesses in RDG. */
1197 static bool
1198 similar_memory_accesses (struct graph *rdg, partition *partition1,
1199 partition *partition2)
1201 unsigned i, j, k, l;
1202 bitmap_iterator bi, bj;
1203 data_reference_p ref1, ref2;
1205 /* First check whether in the intersection of the two partitions are
1206 any loads or stores. Common loads are the situation that happens
1207 most often. */
1208 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1209 if (RDG_MEM_WRITE_STMT (rdg, i)
1210 || RDG_MEM_READS_STMT (rdg, i))
1211 return true;
1213 /* Then check all data-references against each other. */
1214 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1215 if (RDG_MEM_WRITE_STMT (rdg, i)
1216 || RDG_MEM_READS_STMT (rdg, i))
1217 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1218 if (RDG_MEM_WRITE_STMT (rdg, j)
1219 || RDG_MEM_READS_STMT (rdg, j))
1221 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1223 tree base1 = ref_base_address (ref1);
1224 if (base1)
1225 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1226 if (base1 == ref_base_address (ref2))
1227 return true;
1231 return false;
1234 /* Aggregate several components into a useful partition that is
1235 registered in the PARTITIONS vector. Partitions will be
1236 distributed in different loops. */
1238 static void
1239 rdg_build_partitions (struct graph *rdg,
1240 vec<gimple *> starting_stmts,
1241 vec<partition *> *partitions)
1243 bitmap processed = BITMAP_ALLOC (NULL);
1244 int i;
1245 gimple *stmt;
1247 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1249 int v = rdg_vertex_for_stmt (rdg, stmt);
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 fprintf (dump_file,
1253 "ldist asked to generate code for vertex %d\n", v);
1255 /* If the vertex is already contained in another partition so
1256 is the partition rooted at it. */
1257 if (bitmap_bit_p (processed, v))
1258 continue;
1260 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1261 bitmap_ior_into (processed, partition->stmts);
1263 if (dump_file && (dump_flags & TDF_DETAILS))
1265 fprintf (dump_file, "ldist useful partition:\n");
1266 dump_bitmap (dump_file, partition->stmts);
1269 partitions->safe_push (partition);
1272 /* All vertices should have been assigned to at least one partition now,
1273 other than vertices belonging to dead code. */
1275 BITMAP_FREE (processed);
1278 /* Dump to FILE the PARTITIONS. */
1280 static void
1281 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1283 int i;
1284 partition *partition;
1286 FOR_EACH_VEC_ELT (partitions, i, partition)
1287 debug_bitmap_file (file, partition->stmts);
1290 /* Debug PARTITIONS. */
1291 extern void debug_rdg_partitions (vec<partition *> );
1293 DEBUG_FUNCTION void
1294 debug_rdg_partitions (vec<partition *> partitions)
1296 dump_rdg_partitions (stderr, partitions);
1299 /* Returns the number of read and write operations in the RDG. */
1301 static int
1302 number_of_rw_in_rdg (struct graph *rdg)
1304 int i, res = 0;
1306 for (i = 0; i < rdg->n_vertices; i++)
1308 if (RDG_MEM_WRITE_STMT (rdg, i))
1309 ++res;
1311 if (RDG_MEM_READS_STMT (rdg, i))
1312 ++res;
1315 return res;
1318 /* Returns the number of read and write operations in a PARTITION of
1319 the RDG. */
1321 static int
1322 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1324 int res = 0;
1325 unsigned i;
1326 bitmap_iterator ii;
1328 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1330 if (RDG_MEM_WRITE_STMT (rdg, i))
1331 ++res;
1333 if (RDG_MEM_READS_STMT (rdg, i))
1334 ++res;
1337 return res;
1340 /* Returns true when one of the PARTITIONS contains all the read or
1341 write operations of RDG. */
1343 static bool
1344 partition_contains_all_rw (struct graph *rdg,
1345 vec<partition *> partitions)
1347 int i;
1348 partition *partition;
1349 int nrw = number_of_rw_in_rdg (rdg);
1351 FOR_EACH_VEC_ELT (partitions, i, partition)
1352 if (nrw == number_of_rw_in_partition (rdg, partition))
1353 return true;
1355 return false;
1358 /* Compute partition dependence created by the data references in DRS1
1359 and DRS2 and modify and return DIR according to that. */
1361 static int
1362 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1363 vec<data_reference_p> drs1,
1364 vec<data_reference_p> drs2)
1366 data_reference_p dr1, dr2;
1368 /* dependence direction - 0 is no dependence, -1 is back,
1369 1 is forth, 2 is both (we can stop then, merging will occur). */
1370 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1371 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1373 data_reference_p saved_dr1 = dr1;
1374 int this_dir = 1;
1375 ddr_p ddr;
1376 /* Re-shuffle data-refs to be in dominator order. */
1377 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1378 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1380 std::swap (dr1, dr2);
1381 this_dir = -this_dir;
1383 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1384 compute_affine_dependence (ddr, loops[0]);
1385 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1386 this_dir = 2;
1387 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1389 if (DDR_REVERSED_P (ddr))
1391 std::swap (dr1, dr2);
1392 this_dir = -this_dir;
1394 /* Known dependences can still be unordered througout the
1395 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1396 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1397 this_dir = 2;
1398 /* If the overlap is exact preserve stmt order. */
1399 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1401 else
1403 /* Else as the distance vector is lexicographic positive
1404 swap the dependence direction. */
1405 this_dir = -this_dir;
1408 else
1409 this_dir = 0;
1410 free_dependence_relation (ddr);
1411 if (dir == 0)
1412 dir = this_dir;
1413 else if (dir != this_dir)
1414 return 2;
1415 /* Shuffle "back" dr1. */
1416 dr1 = saved_dr1;
1418 return dir;
1421 /* Compare postorder number of the partition graph vertices V1 and V2. */
1423 static int
1424 pgcmp (const void *v1_, const void *v2_)
1426 const vertex *v1 = (const vertex *)v1_;
1427 const vertex *v2 = (const vertex *)v2_;
1428 return v2->post - v1->post;
1431 /* Distributes the code from LOOP in such a way that producer
1432 statements are placed before consumer statements. Tries to separate
1433 only the statements from STMTS into separate loops.
1434 Returns the number of distributed loops. Set *DESTROY_P to whether
1435 LOOP needs to be destroyed. */
1437 static int
1438 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1439 control_dependences *cd, int *nb_calls, bool *destroy_p)
1441 struct graph *rdg;
1442 partition *partition;
1443 bool any_builtin;
1444 int i, nbp;
1445 graph *pg = NULL;
1446 int num_sccs = 1;
1448 *destroy_p = false;
1449 *nb_calls = 0;
1450 auto_vec<loop_p, 3> loop_nest;
1451 if (!find_loop_nest (loop, &loop_nest))
1452 return 0;
1454 rdg = build_rdg (loop_nest, cd);
1455 if (!rdg)
1457 if (dump_file && (dump_flags & TDF_DETAILS))
1458 fprintf (dump_file,
1459 "Loop %d not distributed: failed to build the RDG.\n",
1460 loop->num);
1462 return 0;
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1466 dump_rdg (dump_file, rdg);
1468 auto_vec<struct partition *, 3> partitions;
1469 rdg_build_partitions (rdg, stmts, &partitions);
1471 any_builtin = false;
1472 FOR_EACH_VEC_ELT (partitions, i, partition)
1474 classify_partition (loop, rdg, partition);
1475 any_builtin |= partition_builtin_p (partition);
1478 /* If we are only distributing patterns but did not detect any,
1479 simply bail out. */
1480 if (!flag_tree_loop_distribution
1481 && !any_builtin)
1483 nbp = 0;
1484 goto ldist_done;
1487 /* If we are only distributing patterns fuse all partitions that
1488 were not classified as builtins. This also avoids chopping
1489 a loop into pieces, separated by builtin calls. That is, we
1490 only want no or a single loop body remaining. */
1491 struct partition *into;
1492 if (!flag_tree_loop_distribution)
1494 for (i = 0; partitions.iterate (i, &into); ++i)
1495 if (!partition_builtin_p (into))
1496 break;
1497 for (++i; partitions.iterate (i, &partition); ++i)
1498 if (!partition_builtin_p (partition))
1500 if (dump_file && (dump_flags & TDF_DETAILS))
1502 fprintf (dump_file, "fusing non-builtin partitions\n");
1503 dump_bitmap (dump_file, into->stmts);
1504 dump_bitmap (dump_file, partition->stmts);
1506 partition_merge_into (into, partition);
1507 partitions.unordered_remove (i);
1508 partition_free (partition);
1509 i--;
1513 /* Due to limitations in the transform phase we have to fuse all
1514 reduction partitions into the last partition so the existing
1515 loop will contain all loop-closed PHI nodes. */
1516 for (i = 0; partitions.iterate (i, &into); ++i)
1517 if (partition_reduction_p (into))
1518 break;
1519 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1520 if (partition_reduction_p (partition))
1522 if (dump_file && (dump_flags & TDF_DETAILS))
1524 fprintf (dump_file, "fusing partitions\n");
1525 dump_bitmap (dump_file, into->stmts);
1526 dump_bitmap (dump_file, partition->stmts);
1527 fprintf (dump_file, "because they have reductions\n");
1529 partition_merge_into (into, partition);
1530 partitions.unordered_remove (i);
1531 partition_free (partition);
1532 i--;
1535 /* Apply our simple cost model - fuse partitions with similar
1536 memory accesses. */
1537 for (i = 0; partitions.iterate (i, &into); ++i)
1539 bool changed = false;
1540 if (partition_builtin_p (into))
1541 continue;
1542 for (int j = i + 1;
1543 partitions.iterate (j, &partition); ++j)
1545 if (!partition_builtin_p (partition)
1546 && similar_memory_accesses (rdg, into, partition))
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1550 fprintf (dump_file, "fusing partitions\n");
1551 dump_bitmap (dump_file, into->stmts);
1552 dump_bitmap (dump_file, partition->stmts);
1553 fprintf (dump_file, "because they have similar "
1554 "memory accesses\n");
1556 partition_merge_into (into, partition);
1557 partitions.unordered_remove (j);
1558 partition_free (partition);
1559 j--;
1560 changed = true;
1563 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1564 accesses when 1 and 2 have similar accesses but not 0 and 1
1565 then in the next iteration we will fail to consider merging
1566 1 into 0,2. So try again if we did any merging into 0. */
1567 if (changed)
1568 i--;
1571 /* Build the partition dependency graph. */
1572 if (partitions.length () > 1)
1574 pg = new_graph (partitions.length ());
1575 struct pgdata {
1576 struct partition *partition;
1577 vec<data_reference_p> writes;
1578 vec<data_reference_p> reads;
1580 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1581 for (i = 0; partitions.iterate (i, &partition); ++i)
1583 vertex *v = &pg->vertices[i];
1584 pgdata *data = new pgdata;
1585 data_reference_p dr;
1586 /* FIXME - leaks. */
1587 v->data = data;
1588 bitmap_iterator bi;
1589 unsigned j;
1590 data->partition = partition;
1591 data->reads = vNULL;
1592 data->writes = vNULL;
1593 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1594 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1595 if (DR_IS_READ (dr))
1596 data->reads.safe_push (dr);
1597 else
1598 data->writes.safe_push (dr);
1600 struct partition *partition1, *partition2;
1601 for (i = 0; partitions.iterate (i, &partition1); ++i)
1602 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1604 /* dependence direction - 0 is no dependence, -1 is back,
1605 1 is forth, 2 is both (we can stop then, merging will occur). */
1606 int dir = 0;
1607 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1608 PGDATA(i)->writes,
1609 PGDATA(j)->reads);
1610 if (dir != 2)
1611 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1612 PGDATA(i)->reads,
1613 PGDATA(j)->writes);
1614 if (dir != 2)
1615 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1616 PGDATA(i)->writes,
1617 PGDATA(j)->writes);
1618 if (dir == 1 || dir == 2)
1619 add_edge (pg, i, j);
1620 if (dir == -1 || dir == 2)
1621 add_edge (pg, j, i);
1624 /* Add edges to the reduction partition (if any) to force it last. */
1625 unsigned j;
1626 for (j = 0; partitions.iterate (j, &partition); ++j)
1627 if (partition_reduction_p (partition))
1628 break;
1629 if (j < partitions.length ())
1631 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1632 if (i != j)
1633 add_edge (pg, i, j);
1636 /* Compute partitions we cannot separate and fuse them. */
1637 num_sccs = graphds_scc (pg, NULL);
1638 for (i = 0; i < num_sccs; ++i)
1640 struct partition *first;
1641 int j;
1642 for (j = 0; partitions.iterate (j, &first); ++j)
1643 if (pg->vertices[j].component == i)
1644 break;
1645 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1646 if (pg->vertices[j].component == i)
1648 if (dump_file && (dump_flags & TDF_DETAILS))
1650 fprintf (dump_file, "fusing partitions\n");
1651 dump_bitmap (dump_file, first->stmts);
1652 dump_bitmap (dump_file, partition->stmts);
1653 fprintf (dump_file, "because they are in the same "
1654 "dependence SCC\n");
1656 partition_merge_into (first, partition);
1657 partitions[j] = NULL;
1658 partition_free (partition);
1659 PGDATA (j)->partition = NULL;
1663 /* Now order the remaining nodes in postorder. */
1664 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1665 partitions.truncate (0);
1666 for (i = 0; i < pg->n_vertices; ++i)
1668 pgdata *data = PGDATA (i);
1669 if (data->partition)
1670 partitions.safe_push (data->partition);
1671 data->reads.release ();
1672 data->writes.release ();
1673 delete data;
1675 gcc_assert (partitions.length () == (unsigned)num_sccs);
1676 free_graph (pg);
1679 nbp = partitions.length ();
1680 if (nbp == 0
1681 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1682 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1684 nbp = 0;
1685 goto ldist_done;
1688 if (dump_file && (dump_flags & TDF_DETAILS))
1689 dump_rdg_partitions (dump_file, partitions);
1691 FOR_EACH_VEC_ELT (partitions, i, partition)
1693 if (partition_builtin_p (partition))
1694 (*nb_calls)++;
1695 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1698 ldist_done:
1700 FOR_EACH_VEC_ELT (partitions, i, partition)
1701 partition_free (partition);
1703 free_rdg (rdg);
1704 return nbp - *nb_calls;
1707 /* Distribute all loops in the current function. */
1709 namespace {
1711 const pass_data pass_data_loop_distribution =
1713 GIMPLE_PASS, /* type */
1714 "ldist", /* name */
1715 OPTGROUP_LOOP, /* optinfo_flags */
1716 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1717 ( PROP_cfg | PROP_ssa ), /* properties_required */
1718 0, /* properties_provided */
1719 0, /* properties_destroyed */
1720 0, /* todo_flags_start */
1721 0, /* todo_flags_finish */
1724 class pass_loop_distribution : public gimple_opt_pass
1726 public:
1727 pass_loop_distribution (gcc::context *ctxt)
1728 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1731 /* opt_pass methods: */
1732 virtual bool gate (function *)
1734 return flag_tree_loop_distribution
1735 || flag_tree_loop_distribute_patterns;
1738 virtual unsigned int execute (function *);
1740 }; // class pass_loop_distribution
1742 unsigned int
1743 pass_loop_distribution::execute (function *fun)
1745 struct loop *loop;
1746 bool changed = false;
1747 basic_block bb;
1748 control_dependences *cd = NULL;
1749 auto_vec<loop_p> loops_to_be_destroyed;
1751 FOR_ALL_BB_FN (bb, fun)
1753 gimple_stmt_iterator gsi;
1754 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1755 gimple_set_uid (gsi_stmt (gsi), -1);
1756 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1757 gimple_set_uid (gsi_stmt (gsi), -1);
1760 /* We can at the moment only distribute non-nested loops, thus restrict
1761 walking to innermost loops. */
1762 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1764 auto_vec<gimple *> work_list;
1765 basic_block *bbs;
1766 int num = loop->num;
1767 unsigned int i;
1769 /* If the loop doesn't have a single exit we will fail anyway,
1770 so do that early. */
1771 if (!single_exit (loop))
1772 continue;
1774 /* Only optimize hot loops. */
1775 if (!optimize_loop_for_speed_p (loop))
1776 continue;
1778 /* Initialize the worklist with stmts we seed the partitions with. */
1779 bbs = get_loop_body_in_dom_order (loop);
1780 for (i = 0; i < loop->num_nodes; ++i)
1782 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1783 !gsi_end_p (gsi);
1784 gsi_next (&gsi))
1786 gphi *phi = gsi.phi ();
1787 if (virtual_operand_p (gimple_phi_result (phi)))
1788 continue;
1789 /* Distribute stmts which have defs that are used outside of
1790 the loop. */
1791 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1792 continue;
1793 work_list.safe_push (phi);
1795 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1796 !gsi_end_p (gsi);
1797 gsi_next (&gsi))
1799 gimple *stmt = gsi_stmt (gsi);
1801 /* If there is a stmt with side-effects bail out - we
1802 cannot and should not distribute this loop. */
1803 if (gimple_has_side_effects (stmt))
1805 work_list.truncate (0);
1806 goto out;
1809 /* Distribute stmts which have defs that are used outside of
1810 the loop. */
1811 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1813 /* Otherwise only distribute stores for now. */
1814 else if (!gimple_vdef (stmt))
1815 continue;
1817 work_list.safe_push (stmt);
1820 out:
1821 free (bbs);
1823 int nb_generated_loops = 0;
1824 int nb_generated_calls = 0;
1825 location_t loc = find_loop_location (loop);
1826 if (work_list.length () > 0)
1828 if (!cd)
1830 calculate_dominance_info (CDI_DOMINATORS);
1831 calculate_dominance_info (CDI_POST_DOMINATORS);
1832 cd = new control_dependences ();
1833 free_dominance_info (CDI_POST_DOMINATORS);
1835 bool destroy_p;
1836 nb_generated_loops = distribute_loop (loop, work_list, cd,
1837 &nb_generated_calls,
1838 &destroy_p);
1839 if (destroy_p)
1840 loops_to_be_destroyed.safe_push (loop);
1843 if (nb_generated_loops + nb_generated_calls > 0)
1845 changed = true;
1846 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1847 loc, "Loop %d distributed: split to %d loops "
1848 "and %d library calls.\n",
1849 num, nb_generated_loops, nb_generated_calls);
1851 else if (dump_file && (dump_flags & TDF_DETAILS))
1852 fprintf (dump_file, "Loop %d is the same.\n", num);
1855 if (cd)
1856 delete cd;
1858 if (changed)
1860 /* Destroy loop bodies that could not be reused. Do this late as we
1861 otherwise can end up refering to stale data in control dependences. */
1862 unsigned i;
1863 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1864 destroy_loop (loop);
1866 /* Cached scalar evolutions now may refer to wrong or non-existing
1867 loops. */
1868 scev_reset_htab ();
1869 mark_virtual_operands_for_renaming (fun);
1870 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1873 checking_verify_loop_structure ();
1875 return 0;
1878 } // anon namespace
1880 gimple_opt_pass *
1881 make_pass_loop_distribution (gcc::context *ctxt)
1883 return new pass_loop_distribution (ctxt);