* lib/target-supports.exp (check_weak_available): Return true for AIX.
[official-gcc.git] / gcc / tree-loop-distribution.c
blob95c4d5f753a0c563f76c97e1573fa8f592c9fc52
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
29 is transformed to
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree-flow.h"
48 #include "cfgloop.h"
49 #include "tree-chrec.h"
50 #include "tree-data-ref.h"
51 #include "tree-scalar-evolution.h"
52 #include "tree-pass.h"
54 enum partition_kind {
55 PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
58 typedef struct partition_s
60 bitmap stmts;
61 bool has_writes;
62 enum partition_kind kind;
63 /* data-references a kind != PKIND_NORMAL partition is about. */
64 data_reference_p main_dr;
65 data_reference_p secondary_dr;
66 } *partition_t;
69 /* Allocate and initialize a partition from BITMAP. */
71 static partition_t
72 partition_alloc (bitmap stmts)
74 partition_t partition = XCNEW (struct partition_s);
75 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
76 partition->has_writes = false;
77 partition->kind = PKIND_NORMAL;
78 return partition;
81 /* Free PARTITION. */
83 static void
84 partition_free (partition_t partition)
86 BITMAP_FREE (partition->stmts);
87 free (partition);
90 /* Returns true if the partition can be generated as a builtin. */
92 static bool
93 partition_builtin_p (partition_t partition)
95 return partition->kind > PKIND_REDUCTION;
98 /* Returns true if the partition has an writes. */
100 static bool
101 partition_has_writes (partition_t partition)
103 return partition->has_writes;
106 /* If bit I is not set, it means that this node represents an
107 operation that has already been performed, and that should not be
108 performed again. This is the subgraph of remaining important
109 computations that is passed to the DFS algorithm for avoiding to
110 include several times the same stores in different loops. */
111 static bitmap remaining_stmts;
113 /* A node of the RDG is marked in this bitmap when it has as a
114 predecessor a node that writes to memory. */
115 static bitmap upstream_mem_writes;
117 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
118 the LOOP. */
120 static bool
121 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
123 imm_use_iterator imm_iter;
124 use_operand_p use_p;
126 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
128 gimple use_stmt = USE_STMT (use_p);
129 if (!is_gimple_debug (use_stmt)
130 && loop != loop_containing_stmt (use_stmt))
131 return true;
134 return false;
137 /* Returns true when STMT defines a scalar variable used after the
138 loop LOOP. */
140 static bool
141 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
143 def_operand_p def_p;
144 ssa_op_iter op_iter;
146 if (gimple_code (stmt) == GIMPLE_PHI)
147 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
149 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
150 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
151 return true;
153 return false;
156 /* Return a copy of LOOP placed before LOOP. */
158 static struct loop *
159 copy_loop_before (struct loop *loop)
161 struct loop *res;
162 edge preheader = loop_preheader_edge (loop);
164 initialize_original_copy_tables ();
165 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
166 gcc_assert (res != NULL);
167 free_original_copy_tables ();
168 delete_update_ssa ();
170 return res;
173 /* Creates an empty basic block after LOOP. */
175 static void
176 create_bb_after_loop (struct loop *loop)
178 edge exit = single_exit (loop);
180 if (!exit)
181 return;
183 split_edge (exit);
186 /* Generate code for PARTITION from the code in LOOP. The loop is
187 copied when COPY_P is true. All the statements not flagged in the
188 PARTITION bitmap are removed from the loop or from its copy. The
189 statements are indexed in sequence inside a basic block, and the
190 basic blocks of a loop are taken in dom order. */
192 static void
193 generate_loops_for_partition (struct loop *loop, partition_t partition,
194 bool copy_p)
196 unsigned i, x;
197 gimple_stmt_iterator bsi;
198 basic_block *bbs;
200 if (copy_p)
202 loop = copy_loop_before (loop);
203 gcc_assert (loop != NULL);
204 create_preheader (loop, CP_SIMPLE_PREHEADERS);
205 create_bb_after_loop (loop);
208 /* Remove stmts not in the PARTITION bitmap. The order in which we
209 visit the phi nodes and the statements is exactly as in
210 stmts_from_loop. */
211 bbs = get_loop_body_in_dom_order (loop);
213 if (MAY_HAVE_DEBUG_STMTS)
214 for (x = 0, i = 0; i < loop->num_nodes; i++)
216 basic_block bb = bbs[i];
218 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
219 if (!bitmap_bit_p (partition->stmts, x++))
220 reset_debug_uses (gsi_stmt (bsi));
222 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
224 gimple stmt = gsi_stmt (bsi);
225 if (gimple_code (stmt) != GIMPLE_LABEL
226 && !is_gimple_debug (stmt)
227 && !bitmap_bit_p (partition->stmts, x++))
228 reset_debug_uses (stmt);
232 for (x = 0, i = 0; i < loop->num_nodes; i++)
234 basic_block bb = bbs[i];
236 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
237 if (!bitmap_bit_p (partition->stmts, x++))
239 gimple phi = gsi_stmt (bsi);
240 if (virtual_operand_p (gimple_phi_result (phi)))
241 mark_virtual_phi_result_for_renaming (phi);
242 remove_phi_node (&bsi, true);
244 else
245 gsi_next (&bsi);
247 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
249 gimple stmt = gsi_stmt (bsi);
250 if (gimple_code (stmt) != GIMPLE_LABEL
251 && !is_gimple_debug (stmt)
252 && !bitmap_bit_p (partition->stmts, x++))
254 unlink_stmt_vdef (stmt);
255 gsi_remove (&bsi, true);
256 release_defs (stmt);
258 else
259 gsi_next (&bsi);
263 free (bbs);
266 /* Build the size argument for a memory operation call. */
268 static tree
269 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
271 tree size;
272 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
273 fold_convert_loc (loc, sizetype, nb_iter),
274 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
275 return fold_convert_loc (loc, size_type_node, size);
278 /* Build an address argument for a memory operation call. */
280 static tree
281 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
283 tree addr_base;
285 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
286 addr_base = fold_convert_loc (loc, sizetype, addr_base);
288 /* Test for a negative stride, iterating over every element. */
289 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
291 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
292 fold_convert_loc (loc, sizetype, nb_bytes));
293 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
294 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
297 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
300 /* If VAL memory representation contains the same value in all bytes,
301 return that value, otherwise return -1.
302 E.g. for 0x24242424 return 0x24, for IEEE double
303 747708026454360457216.0 return 0x44, etc. */
305 static int
306 const_with_all_bytes_same (tree val)
308 unsigned char buf[64];
309 int i, len;
311 if (integer_zerop (val)
312 || real_zerop (val)
313 || (TREE_CODE (val) == CONSTRUCTOR
314 && !TREE_CLOBBER_P (val)
315 && CONSTRUCTOR_NELTS (val) == 0))
316 return 0;
318 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
319 return -1;
321 len = native_encode_expr (val, buf, sizeof (buf));
322 if (len == 0)
323 return -1;
324 for (i = 1; i < len; i++)
325 if (buf[i] != buf[0])
326 return -1;
327 return buf[0];
330 /* Generate a call to memset for PARTITION in LOOP. */
332 static void
333 generate_memset_builtin (struct loop *loop, partition_t partition)
335 gimple_stmt_iterator gsi;
336 gimple stmt, fn_call;
337 tree nb_iter, mem, fn, nb_bytes;
338 location_t loc;
339 tree val;
341 stmt = DR_STMT (partition->main_dr);
342 loc = gimple_location (stmt);
343 if (gimple_bb (stmt) == loop->latch)
344 nb_iter = number_of_latch_executions (loop);
345 else
346 nb_iter = number_of_exit_cond_executions (loop);
348 /* The new statements will be placed before LOOP. */
349 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
351 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
352 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
353 false, GSI_CONTINUE_LINKING);
354 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
355 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
356 false, GSI_CONTINUE_LINKING);
358 /* This exactly matches the pattern recognition in classify_partition. */
359 val = gimple_assign_rhs1 (stmt);
360 /* Handle constants like 0x15151515 and similarly
361 floating point constants etc. where all bytes are the same. */
362 int bytev = const_with_all_bytes_same (val);
363 if (bytev != -1)
364 val = build_int_cst (integer_type_node, bytev);
365 else if (TREE_CODE (val) == INTEGER_CST)
366 val = fold_convert (integer_type_node, val);
367 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
369 gimple cstmt;
370 tree tem = make_ssa_name (integer_type_node, NULL);
371 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
372 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
373 val = tem;
376 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
377 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
378 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
380 if (dump_file && (dump_flags & TDF_DETAILS))
382 fprintf (dump_file, "generated memset");
383 if (bytev == 0)
384 fprintf (dump_file, " zero\n");
385 else
386 fprintf (dump_file, "\n");
390 /* Generate a call to memcpy for PARTITION in LOOP. */
392 static void
393 generate_memcpy_builtin (struct loop *loop, partition_t partition)
395 gimple_stmt_iterator gsi;
396 gimple stmt, fn_call;
397 tree nb_iter, dest, src, fn, nb_bytes;
398 location_t loc;
399 enum built_in_function kind;
401 stmt = DR_STMT (partition->main_dr);
402 loc = gimple_location (stmt);
403 if (gimple_bb (stmt) == loop->latch)
404 nb_iter = number_of_latch_executions (loop);
405 else
406 nb_iter = number_of_exit_cond_executions (loop);
408 /* The new statements will be placed before LOOP. */
409 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
411 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
412 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
413 false, GSI_CONTINUE_LINKING);
414 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
415 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
416 if (ptr_derefs_may_alias_p (dest, src))
417 kind = BUILT_IN_MEMMOVE;
418 else
419 kind = BUILT_IN_MEMCPY;
421 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
422 false, GSI_CONTINUE_LINKING);
423 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
424 false, GSI_CONTINUE_LINKING);
425 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
426 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
427 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
429 if (dump_file && (dump_flags & TDF_DETAILS))
431 if (kind == BUILT_IN_MEMCPY)
432 fprintf (dump_file, "generated memcpy\n");
433 else
434 fprintf (dump_file, "generated memmove\n");
438 /* Remove and destroy the loop LOOP. */
440 static void
441 destroy_loop (struct loop *loop)
443 unsigned nbbs = loop->num_nodes;
444 edge exit = single_exit (loop);
445 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
446 basic_block *bbs;
447 unsigned i;
449 bbs = get_loop_body_in_dom_order (loop);
451 redirect_edge_pred (exit, src);
452 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
453 exit->flags |= EDGE_FALLTHRU;
454 cancel_loop_tree (loop);
455 rescan_loop_exit (exit, false, true);
457 for (i = 0; i < nbbs; i++)
459 /* We have made sure to not leave any dangling uses of SSA
460 names defined in the loop. With the exception of virtuals.
461 Make sure we replace all uses of virtual defs that will remain
462 outside of the loop with the bare symbol as delete_basic_block
463 will release them. */
464 gimple_stmt_iterator gsi;
465 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
467 gimple phi = gsi_stmt (gsi);
468 if (virtual_operand_p (gimple_phi_result (phi)))
469 mark_virtual_phi_result_for_renaming (phi);
471 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
473 gimple stmt = gsi_stmt (gsi);
474 tree vdef = gimple_vdef (stmt);
475 if (vdef && TREE_CODE (vdef) == SSA_NAME)
476 mark_virtual_operand_for_renaming (vdef);
478 delete_basic_block (bbs[i]);
480 free (bbs);
482 set_immediate_dominator (CDI_DOMINATORS, dest,
483 recompute_dominator (CDI_DOMINATORS, dest));
486 /* Generates code for PARTITION. */
488 static void
489 generate_code_for_partition (struct loop *loop,
490 partition_t partition, bool copy_p)
492 switch (partition->kind)
494 case PKIND_MEMSET:
495 generate_memset_builtin (loop, partition);
496 /* If this is the last partition for which we generate code, we have
497 to destroy the loop. */
498 if (!copy_p)
499 destroy_loop (loop);
500 break;
502 case PKIND_MEMCPY:
503 generate_memcpy_builtin (loop, partition);
504 /* If this is the last partition for which we generate code, we have
505 to destroy the loop. */
506 if (!copy_p)
507 destroy_loop (loop);
508 break;
510 case PKIND_REDUCTION:
511 /* Reductions all have to be in the last partition. */
512 gcc_assert (!copy_p);
513 case PKIND_NORMAL:
514 generate_loops_for_partition (loop, partition, copy_p);
515 break;
517 default:
518 gcc_unreachable ();
523 /* Returns true if the node V of RDG cannot be recomputed. */
525 static bool
526 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
528 if (RDG_MEM_WRITE_STMT (rdg, v))
529 return true;
531 return false;
534 /* Returns true when the vertex V has already been generated in the
535 current partition (V is in PROCESSED), or when V belongs to another
536 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
538 static inline bool
539 already_processed_vertex_p (bitmap processed, int v)
541 return (bitmap_bit_p (processed, v)
542 || !bitmap_bit_p (remaining_stmts, v));
545 /* Returns NULL when there is no anti-dependence or output-dependence
546 among the successors of vertex V, otherwise returns the edge with the
547 dependency. */
549 static struct graph_edge *
550 has_anti_or_output_dependence (struct vertex *v)
552 struct graph_edge *e;
554 if (v->succ)
555 for (e = v->succ; e; e = e->succ_next)
556 if (RDGE_TYPE (e) == anti_dd
557 || RDGE_TYPE (e) == output_dd)
558 return e;
560 return NULL;
563 /* Returns true when V has an anti-dependence edge among its successors. */
565 static bool
566 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
568 struct graph_edge *e;
570 if (v->pred)
571 for (e = v->pred; e; e = e->pred_next)
572 if (bitmap_bit_p (upstream_mem_writes, e->src)
573 /* Don't consider flow channels: a write to memory followed
574 by a read from memory. These channels allow the split of
575 the RDG in different partitions. */
576 && !RDG_MEM_WRITE_STMT (rdg, e->src))
577 return true;
579 return false;
582 /* Initializes the upstream_mem_writes bitmap following the
583 information from RDG. */
585 static void
586 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
588 int v, x;
589 bitmap seen = BITMAP_ALLOC (NULL);
591 for (v = rdg->n_vertices - 1; v >= 0; v--)
592 if (!bitmap_bit_p (seen, v))
594 unsigned i;
595 vec<int> nodes;
596 nodes.create (3);
598 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
600 FOR_EACH_VEC_ELT (nodes, i, x)
602 if (!bitmap_set_bit (seen, x))
603 continue;
605 if (RDG_MEM_WRITE_STMT (rdg, x)
606 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
607 /* In anti dependences the read should occur before
608 the write, this is why both the read and the write
609 should be placed in the same partition. In output
610 dependences the writes order need to be preserved. */
611 || has_anti_or_output_dependence (&(rdg->vertices[x])))
612 bitmap_set_bit (upstream_mem_writes, x);
615 nodes.release ();
619 /* Returns true when vertex u has a memory write node as a predecessor
620 in RDG. */
622 static bool
623 has_upstream_mem_writes (int u)
625 return bitmap_bit_p (upstream_mem_writes, u);
628 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
629 bitmap, bitmap);
631 /* Flag the uses of U stopping following the information from
632 upstream_mem_writes. */
634 static void
635 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
636 bitmap processed)
638 use_operand_p use_p;
639 struct vertex *x = &(rdg->vertices[u]);
640 gimple stmt = RDGV_STMT (x);
641 struct graph_edge *anti_dep = has_anti_or_output_dependence (x);
643 /* Keep in the same partition the destination of an antidependence,
644 because this is a store to the exact same location. Putting this
645 in another partition is bad for cache locality. */
646 if (anti_dep)
648 int v = anti_dep->dest;
650 if (!already_processed_vertex_p (processed, v))
651 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
652 processed);
655 if (gimple_code (stmt) != GIMPLE_PHI)
657 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
659 tree use = USE_FROM_PTR (use_p);
661 if (TREE_CODE (use) == SSA_NAME
662 && !SSA_NAME_IS_DEFAULT_DEF (use))
664 gimple def_stmt = SSA_NAME_DEF_STMT (use);
665 int v = rdg_vertex_for_stmt (rdg, def_stmt);
667 if (v >= 0
668 && !already_processed_vertex_p (processed, v))
669 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
670 processed);
675 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
677 tree op0 = gimple_assign_lhs (stmt);
679 /* Scalar channels don't have enough space for transmitting data
680 between tasks, unless we add more storage by privatizing. */
681 if (is_gimple_reg (op0))
683 use_operand_p use_p;
684 imm_use_iterator iter;
686 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
688 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
690 if (!already_processed_vertex_p (processed, v))
691 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
692 processed);
698 /* Flag V from RDG as part of PARTITION, and also flag its loop number
699 in LOOPS. */
701 static void
702 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
704 struct loop *loop;
706 if (!bitmap_set_bit (partition->stmts, v))
707 return;
709 loop = loop_containing_stmt (RDG_STMT (rdg, v));
710 bitmap_set_bit (loops, loop->num);
712 if (rdg_cannot_recompute_vertex_p (rdg, v))
714 partition->has_writes = true;
715 bitmap_clear_bit (remaining_stmts, v);
719 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
720 Also flag their loop number in LOOPS. */
722 static void
723 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
724 bitmap loops, bitmap processed)
726 unsigned i;
727 vec<int> nodes;
728 nodes.create (3);
729 int x;
731 bitmap_set_bit (processed, v);
732 rdg_flag_uses (rdg, v, partition, loops, processed);
733 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
734 rdg_flag_vertex (rdg, v, partition, loops);
736 FOR_EACH_VEC_ELT (nodes, i, x)
737 if (!already_processed_vertex_p (processed, x))
738 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
740 nodes.release ();
743 /* Initialize CONDS with all the condition statements from the basic
744 blocks of LOOP. */
746 static void
747 collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
749 unsigned i;
750 edge e;
751 vec<edge> exits = get_loop_exit_edges (loop);
753 FOR_EACH_VEC_ELT (exits, i, e)
755 gimple cond = last_stmt (e->src);
757 if (cond)
758 conds->safe_push (cond);
761 exits.release ();
764 /* Add to PARTITION all the exit condition statements for LOOPS
765 together with all their dependent statements determined from
766 RDG. */
768 static void
769 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
770 bitmap processed)
772 unsigned i;
773 bitmap_iterator bi;
774 vec<gimple> conds;
775 conds.create (3);
777 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
778 collect_condition_stmts (get_loop (cfun, i), &conds);
780 while (!conds.is_empty ())
782 gimple cond = conds.pop ();
783 int v = rdg_vertex_for_stmt (rdg, cond);
784 bitmap new_loops = BITMAP_ALLOC (NULL);
786 if (!already_processed_vertex_p (processed, v))
787 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
789 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
790 if (bitmap_set_bit (loops, i))
791 collect_condition_stmts (get_loop (cfun, i), &conds);
793 BITMAP_FREE (new_loops);
796 conds.release ();
799 /* Returns a bitmap in which all the statements needed for computing
800 the strongly connected component C of the RDG are flagged, also
801 including the loop exit conditions. */
803 static partition_t
804 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
806 int i, v;
807 partition_t partition = partition_alloc (NULL);
808 bitmap loops = BITMAP_ALLOC (NULL);
809 bitmap processed = BITMAP_ALLOC (NULL);
811 FOR_EACH_VEC_ELT (c->vertices, i, v)
812 if (!already_processed_vertex_p (processed, v))
813 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
815 rdg_flag_loop_exits (rdg, loops, partition, processed);
817 BITMAP_FREE (processed);
818 BITMAP_FREE (loops);
819 return partition;
822 /* Free memory for COMPONENTS. */
824 static void
825 free_rdg_components (vec<rdgc> components)
827 int i;
828 rdgc x;
830 FOR_EACH_VEC_ELT (components, i, x)
832 x->vertices.release ();
833 free (x);
836 components.release ();
839 /* Build the COMPONENTS vector with the strongly connected components
840 of RDG in which the STARTING_VERTICES occur. */
842 static void
843 rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
844 vec<rdgc> *components)
846 int i, v;
847 bitmap saved_components = BITMAP_ALLOC (NULL);
848 int n_components = graphds_scc (rdg, NULL);
849 /* ??? Macros cannot process template types with more than one
850 argument, so we need this typedef. */
851 typedef vec<int> vec_int_heap;
852 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
854 for (i = 0; i < n_components; i++)
855 all_components[i].create (3);
857 for (i = 0; i < rdg->n_vertices; i++)
858 all_components[rdg->vertices[i].component].safe_push (i);
860 FOR_EACH_VEC_ELT (starting_vertices, i, v)
862 int c = rdg->vertices[v].component;
864 if (bitmap_set_bit (saved_components, c))
866 rdgc x = XCNEW (struct rdg_component);
867 x->num = c;
868 x->vertices = all_components[c];
870 components->safe_push (x);
874 for (i = 0; i < n_components; i++)
875 if (!bitmap_bit_p (saved_components, i))
876 all_components[i].release ();
878 free (all_components);
879 BITMAP_FREE (saved_components);
882 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
883 For the moment we detect only the memset zero pattern. */
885 static void
886 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
888 bitmap_iterator bi;
889 unsigned i;
890 tree nb_iter;
891 data_reference_p single_load, single_store;
892 bool volatiles_p = false;
894 partition->kind = PKIND_NORMAL;
895 partition->main_dr = NULL;
896 partition->secondary_dr = NULL;
898 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
900 gimple stmt = RDG_STMT (rdg, i);
902 if (gimple_has_volatile_ops (stmt))
903 volatiles_p = true;
905 /* If the stmt has uses outside of the loop fail.
906 ??? If the stmt is generated in another partition that
907 is not created as builtin we can ignore this. */
908 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "not generating builtin, partition has "
912 "scalar uses outside of the loop\n");
913 partition->kind = PKIND_REDUCTION;
914 return;
918 /* Perform general partition disqualification for builtins. */
919 if (volatiles_p
920 || !flag_tree_loop_distribute_patterns)
921 return;
923 nb_iter = number_of_exit_cond_executions (loop);
924 if (!nb_iter || nb_iter == chrec_dont_know)
925 return;
927 /* Detect memset and memcpy. */
928 single_load = NULL;
929 single_store = NULL;
930 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
932 gimple stmt = RDG_STMT (rdg, i);
933 data_reference_p dr;
934 unsigned j;
936 if (gimple_code (stmt) == GIMPLE_PHI)
937 continue;
939 /* Any scalar stmts are ok. */
940 if (!gimple_vuse (stmt))
941 continue;
943 /* Otherwise just regular loads/stores. */
944 if (!gimple_assign_single_p (stmt))
945 return;
947 /* But exactly one store and/or load. */
948 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
950 if (DR_IS_READ (dr))
952 if (single_load != NULL)
953 return;
954 single_load = dr;
956 else
958 if (single_store != NULL)
959 return;
960 single_store = dr;
965 if (single_store && !single_load)
967 gimple stmt = DR_STMT (single_store);
968 tree rhs = gimple_assign_rhs1 (stmt);
969 if (const_with_all_bytes_same (rhs) == -1
970 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
971 || (TYPE_MODE (TREE_TYPE (rhs))
972 != TYPE_MODE (unsigned_char_type_node))))
973 return;
974 if (TREE_CODE (rhs) == SSA_NAME
975 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
976 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
977 return;
978 if (!adjacent_dr_p (single_store))
979 return;
980 partition->kind = PKIND_MEMSET;
981 partition->main_dr = single_store;
983 else if (single_store && single_load)
985 gimple store = DR_STMT (single_store);
986 gimple load = DR_STMT (single_load);
987 /* Direct aggregate copy or via an SSA name temporary. */
988 if (load != store
989 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
990 return;
991 if (!adjacent_dr_p (single_store)
992 || !adjacent_dr_p (single_load)
993 || !operand_equal_p (DR_STEP (single_store),
994 DR_STEP (single_load), 0))
995 return;
996 /* Now check that if there is a dependence this dependence is
997 of a suitable form for memmove. */
998 vec<loop_p> loops = vNULL;
999 ddr_p ddr;
1000 loops.safe_push (loop);
1001 ddr = initialize_data_dependence_relation (single_load, single_store,
1002 loops);
1003 compute_affine_dependence (ddr, loop);
1004 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1006 free_dependence_relation (ddr);
1007 loops.release ();
1008 return;
1010 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1012 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1014 free_dependence_relation (ddr);
1015 loops.release ();
1016 return;
1018 lambda_vector dist_v;
1019 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1021 int dist = dist_v[index_in_loop_nest (loop->num,
1022 DDR_LOOP_NEST (ddr))];
1023 if (dist > 0 && !DDR_REVERSED_P (ddr))
1025 free_dependence_relation (ddr);
1026 loops.release ();
1027 return;
1031 free_dependence_relation (ddr);
1032 loops.release ();
1033 partition->kind = PKIND_MEMCPY;
1034 partition->main_dr = single_store;
1035 partition->secondary_dr = single_load;
1039 /* For a data reference REF, return the declaration of its base
1040 address or NULL_TREE if the base is not determined. */
1042 static tree
1043 ref_base_address (data_reference_p dr)
1045 tree base_address = DR_BASE_ADDRESS (dr);
1046 if (base_address
1047 && TREE_CODE (base_address) == ADDR_EXPR)
1048 return TREE_OPERAND (base_address, 0);
1050 return base_address;
1053 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1054 accesses in RDG. */
1056 static bool
1057 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1058 partition_t partition2)
1060 unsigned i, j, k, l;
1061 bitmap_iterator bi, bj;
1062 data_reference_p ref1, ref2;
1064 /* First check whether in the intersection of the two partitions are
1065 any loads or stores. Common loads are the situation that happens
1066 most often. */
1067 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1068 if (RDG_MEM_WRITE_STMT (rdg, i)
1069 || RDG_MEM_READS_STMT (rdg, i))
1070 return true;
1072 /* Then check all data-references against each other. */
1073 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1074 if (RDG_MEM_WRITE_STMT (rdg, i)
1075 || RDG_MEM_READS_STMT (rdg, i))
1076 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1077 if (RDG_MEM_WRITE_STMT (rdg, j)
1078 || RDG_MEM_READS_STMT (rdg, j))
1080 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1082 tree base1 = ref_base_address (ref1);
1083 if (base1)
1084 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1085 if (base1 == ref_base_address (ref2))
1086 return true;
1090 return false;
1093 /* Aggregate several components into a useful partition that is
1094 registered in the PARTITIONS vector. Partitions will be
1095 distributed in different loops. */
1097 static void
1098 rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1099 vec<int> *other_stores,
1100 vec<partition_t> *partitions, bitmap processed)
1102 int i;
1103 rdgc x;
1104 partition_t partition = partition_alloc (NULL);
1106 FOR_EACH_VEC_ELT (components, i, x)
1108 partition_t np;
1109 int v = x->vertices[0];
1111 if (bitmap_bit_p (processed, v))
1112 continue;
1114 np = build_rdg_partition_for_component (rdg, x);
1115 bitmap_ior_into (partition->stmts, np->stmts);
1116 partition->has_writes = partition_has_writes (np);
1117 bitmap_ior_into (processed, np->stmts);
1118 partition_free (np);
1120 if (partition_has_writes (partition))
1122 if (dump_file && (dump_flags & TDF_DETAILS))
1124 fprintf (dump_file, "ldist useful partition:\n");
1125 dump_bitmap (dump_file, partition->stmts);
1128 partitions->safe_push (partition);
1129 partition = partition_alloc (NULL);
1133 /* Add the nodes from the RDG that were not marked as processed, and
1134 that are used outside the current loop. These are scalar
1135 computations that are not yet part of previous partitions. */
1136 for (i = 0; i < rdg->n_vertices; i++)
1137 if (!bitmap_bit_p (processed, i)
1138 && rdg_defs_used_in_other_loops_p (rdg, i))
1139 other_stores->safe_push (i);
1141 /* If there are still statements left in the OTHER_STORES array,
1142 create other components and partitions with these stores and
1143 their dependences. */
1144 if (other_stores->length () > 0)
1146 vec<rdgc> comps;
1147 comps.create (3);
1148 vec<int> foo;
1149 foo.create (3);
1151 rdg_build_components (rdg, *other_stores, &comps);
1152 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1154 foo.release ();
1155 free_rdg_components (comps);
1158 /* If there is something left in the last partition, save it. */
1159 if (bitmap_count_bits (partition->stmts) > 0)
1160 partitions->safe_push (partition);
1161 else
1162 partition_free (partition);
1165 /* Dump to FILE the PARTITIONS. */
1167 static void
1168 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1170 int i;
1171 partition_t partition;
1173 FOR_EACH_VEC_ELT (partitions, i, partition)
1174 debug_bitmap_file (file, partition->stmts);
1177 /* Debug PARTITIONS. */
1178 extern void debug_rdg_partitions (vec<partition_t> );
1180 DEBUG_FUNCTION void
1181 debug_rdg_partitions (vec<partition_t> partitions)
1183 dump_rdg_partitions (stderr, partitions);
1186 /* Returns the number of read and write operations in the RDG. */
1188 static int
1189 number_of_rw_in_rdg (struct graph *rdg)
1191 int i, res = 0;
1193 for (i = 0; i < rdg->n_vertices; i++)
1195 if (RDG_MEM_WRITE_STMT (rdg, i))
1196 ++res;
1198 if (RDG_MEM_READS_STMT (rdg, i))
1199 ++res;
1202 return res;
1205 /* Returns the number of read and write operations in a PARTITION of
1206 the RDG. */
1208 static int
1209 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1211 int res = 0;
1212 unsigned i;
1213 bitmap_iterator ii;
1215 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1217 if (RDG_MEM_WRITE_STMT (rdg, i))
1218 ++res;
1220 if (RDG_MEM_READS_STMT (rdg, i))
1221 ++res;
1224 return res;
1227 /* Returns true when one of the PARTITIONS contains all the read or
1228 write operations of RDG. */
1230 static bool
1231 partition_contains_all_rw (struct graph *rdg,
1232 vec<partition_t> partitions)
1234 int i;
1235 partition_t partition;
1236 int nrw = number_of_rw_in_rdg (rdg);
1238 FOR_EACH_VEC_ELT (partitions, i, partition)
1239 if (nrw == number_of_rw_in_partition (rdg, partition))
1240 return true;
1242 return false;
1245 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1246 distributed loops. */
1248 static int
1249 ldist_gen (struct loop *loop, struct graph *rdg,
1250 vec<int> starting_vertices)
1252 int i, nbp;
1253 vec<rdgc> components;
1254 components.create (3);
1255 vec<partition_t> partitions;
1256 partitions.create (3);
1257 vec<int> other_stores;
1258 other_stores.create (3);
1259 partition_t partition;
1260 bitmap processed = BITMAP_ALLOC (NULL);
1261 bool any_builtin;
1263 remaining_stmts = BITMAP_ALLOC (NULL);
1264 upstream_mem_writes = BITMAP_ALLOC (NULL);
1266 for (i = 0; i < rdg->n_vertices; i++)
1268 bitmap_set_bit (remaining_stmts, i);
1270 /* Save in OTHER_STORES all the memory writes that are not in
1271 STARTING_VERTICES. */
1272 if (RDG_MEM_WRITE_STMT (rdg, i))
1274 int v;
1275 unsigned j;
1276 bool found = false;
1278 FOR_EACH_VEC_ELT (starting_vertices, j, v)
1279 if (i == v)
1281 found = true;
1282 break;
1285 if (!found)
1286 other_stores.safe_push (i);
1290 mark_nodes_having_upstream_mem_writes (rdg);
1291 rdg_build_components (rdg, starting_vertices, &components);
1292 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1293 processed);
1294 BITMAP_FREE (processed);
1296 any_builtin = false;
1297 FOR_EACH_VEC_ELT (partitions, i, partition)
1299 classify_partition (loop, rdg, partition);
1300 any_builtin |= partition_builtin_p (partition);
1303 /* If we are only distributing patterns fuse all partitions that
1304 were not properly classified as builtins. Else fuse partitions
1305 with similar memory accesses. */
1306 if (!flag_tree_loop_distribution)
1308 partition_t into;
1309 /* If we did not detect any builtin simply bail out. */
1310 if (!any_builtin)
1312 nbp = 0;
1313 goto ldist_done;
1315 /* Only fuse adjacent non-builtin partitions, see PR53616.
1316 ??? Use dependence information to improve partition ordering. */
1317 i = 0;
1320 for (; partitions.iterate (i, &into); ++i)
1321 if (!partition_builtin_p (into))
1322 break;
1323 for (++i; partitions.iterate (i, &partition); ++i)
1324 if (!partition_builtin_p (partition))
1326 bitmap_ior_into (into->stmts, partition->stmts);
1327 if (partition->kind == PKIND_REDUCTION)
1328 into->kind = PKIND_REDUCTION;
1329 partitions.ordered_remove (i);
1330 partition_free (partition);
1331 i--;
1333 else
1334 break;
1336 while ((unsigned) i < partitions.length ());
1338 else
1340 partition_t into;
1341 int j;
1342 for (i = 0; partitions.iterate (i, &into); ++i)
1344 if (partition_builtin_p (into))
1345 continue;
1346 for (j = i + 1;
1347 partitions.iterate (j, &partition); ++j)
1349 if (!partition_builtin_p (partition)
1350 /* ??? The following is horribly inefficient,
1351 we are re-computing and analyzing data-references
1352 of the stmts in the partitions all the time. */
1353 && similar_memory_accesses (rdg, into, partition))
1355 if (dump_file && (dump_flags & TDF_DETAILS))
1357 fprintf (dump_file, "fusing partitions\n");
1358 dump_bitmap (dump_file, into->stmts);
1359 dump_bitmap (dump_file, partition->stmts);
1360 fprintf (dump_file, "because they have similar "
1361 "memory accesses\n");
1363 bitmap_ior_into (into->stmts, partition->stmts);
1364 if (partition->kind == PKIND_REDUCTION)
1365 into->kind = PKIND_REDUCTION;
1366 partitions.ordered_remove (j);
1367 partition_free (partition);
1368 j--;
1374 /* Fuse all reduction partitions into the last. */
1375 if (partitions.length () > 1)
1377 partition_t into = partitions.last ();
1378 for (i = partitions.length () - 2; i >= 0; --i)
1380 partition_t what = partitions[i];
1381 if (what->kind == PKIND_REDUCTION)
1383 if (dump_file && (dump_flags & TDF_DETAILS))
1385 fprintf (dump_file, "fusing partitions\n");
1386 dump_bitmap (dump_file, into->stmts);
1387 dump_bitmap (dump_file, what->stmts);
1388 fprintf (dump_file, "because the latter has reductions\n");
1390 bitmap_ior_into (into->stmts, what->stmts);
1391 into->kind = PKIND_REDUCTION;
1392 partitions.ordered_remove (i);
1393 partition_free (what);
1398 nbp = partitions.length ();
1399 if (nbp == 0
1400 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1401 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1403 nbp = 0;
1404 goto ldist_done;
1407 if (dump_file && (dump_flags & TDF_DETAILS))
1408 dump_rdg_partitions (dump_file, partitions);
1410 FOR_EACH_VEC_ELT (partitions, i, partition)
1411 generate_code_for_partition (loop, partition, i < nbp - 1);
1413 ldist_done:
1415 BITMAP_FREE (remaining_stmts);
1416 BITMAP_FREE (upstream_mem_writes);
1418 FOR_EACH_VEC_ELT (partitions, i, partition)
1419 partition_free (partition);
1421 other_stores.release ();
1422 partitions.release ();
1423 free_rdg_components (components);
1424 return nbp;
1427 /* Distributes the code from LOOP in such a way that producer
1428 statements are placed before consumer statements. When STMTS is
1429 NULL, performs the maximal distribution, if STMTS is not NULL,
1430 tries to separate only these statements from the LOOP's body.
1431 Returns the number of distributed loops. */
1433 static int
1434 distribute_loop (struct loop *loop, vec<gimple> stmts)
1436 int res = 0;
1437 struct graph *rdg;
1438 gimple s;
1439 unsigned i;
1440 vec<int> vertices;
1441 vec<ddr_p> dependence_relations;
1442 vec<data_reference_p> datarefs;
1443 vec<loop_p> loop_nest;
1445 datarefs.create (10);
1446 dependence_relations.create (100);
1447 loop_nest.create (3);
1448 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1450 if (!rdg)
1452 if (dump_file && (dump_flags & TDF_DETAILS))
1453 fprintf (dump_file,
1454 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1455 loop->num);
1457 free_dependence_relations (dependence_relations);
1458 free_data_refs (datarefs);
1459 loop_nest.release ();
1460 return res;
1463 vertices.create (3);
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1466 dump_rdg (dump_file, rdg);
1468 FOR_EACH_VEC_ELT (stmts, i, s)
1470 int v = rdg_vertex_for_stmt (rdg, s);
1472 if (v >= 0)
1474 vertices.safe_push (v);
1476 if (dump_file && (dump_flags & TDF_DETAILS))
1477 fprintf (dump_file,
1478 "ldist asked to generate code for vertex %d\n", v);
1482 res = ldist_gen (loop, rdg, vertices);
1483 vertices.release ();
1484 free_rdg (rdg);
1485 free_dependence_relations (dependence_relations);
1486 free_data_refs (datarefs);
1487 loop_nest.release ();
1488 return res;
1491 /* Distribute all loops in the current function. */
1493 static unsigned int
1494 tree_loop_distribution (void)
1496 struct loop *loop;
1497 loop_iterator li;
1498 bool changed = false;
1499 basic_block bb;
1501 FOR_ALL_BB (bb)
1503 gimple_stmt_iterator gsi;
1504 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1505 gimple_set_uid (gsi_stmt (gsi), -1);
1506 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1507 gimple_set_uid (gsi_stmt (gsi), -1);
1510 /* We can at the moment only distribute non-nested loops, thus restrict
1511 walking to innermost loops. */
1512 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1514 vec<gimple> work_list = vNULL;
1515 basic_block *bbs;
1516 int num = loop->num;
1517 int nb_generated_loops = 0;
1518 unsigned int i;
1520 /* If the loop doesn't have a single exit we will fail anyway,
1521 so do that early. */
1522 if (!single_exit (loop))
1523 continue;
1525 /* Only optimize hot loops. */
1526 if (!optimize_loop_for_speed_p (loop))
1527 continue;
1529 /* Only distribute loops with a header and latch for now. */
1530 if (loop->num_nodes > 2)
1531 continue;
1533 /* Initialize the worklist with stmts we seed the partitions with. */
1534 bbs = get_loop_body_in_dom_order (loop);
1535 for (i = 0; i < loop->num_nodes; ++i)
1537 gimple_stmt_iterator gsi;
1538 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1540 gimple stmt = gsi_stmt (gsi);
1541 /* Distribute stmts which have defs that are used outside of
1542 the loop. */
1543 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1545 /* Otherwise only distribute stores for now. */
1546 else if (!gimple_assign_single_p (stmt)
1547 || is_gimple_reg (gimple_assign_lhs (stmt)))
1548 continue;
1550 work_list.safe_push (stmt);
1553 free (bbs);
1555 if (work_list.length () > 0)
1556 nb_generated_loops = distribute_loop (loop, work_list);
1558 if (nb_generated_loops > 0)
1559 changed = true;
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1563 if (nb_generated_loops > 1)
1564 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1565 num, nb_generated_loops);
1566 else
1567 fprintf (dump_file, "Loop %d is the same.\n", num);
1570 work_list.release ();
1573 if (changed)
1575 mark_virtual_operands_for_renaming (cfun);
1576 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1579 #ifdef ENABLE_CHECKING
1580 verify_loop_structure ();
1581 #endif
1583 return 0;
1586 static bool
1587 gate_tree_loop_distribution (void)
1589 return flag_tree_loop_distribution
1590 || flag_tree_loop_distribute_patterns;
1593 namespace {
1595 const pass_data pass_data_loop_distribution =
1597 GIMPLE_PASS, /* type */
1598 "ldist", /* name */
1599 OPTGROUP_LOOP, /* optinfo_flags */
1600 true, /* has_gate */
1601 true, /* has_execute */
1602 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1603 ( PROP_cfg | PROP_ssa ), /* properties_required */
1604 0, /* properties_provided */
1605 0, /* properties_destroyed */
1606 0, /* todo_flags_start */
1607 TODO_verify_ssa, /* todo_flags_finish */
1610 class pass_loop_distribution : public gimple_opt_pass
1612 public:
1613 pass_loop_distribution(gcc::context *ctxt)
1614 : gimple_opt_pass(pass_data_loop_distribution, ctxt)
1617 /* opt_pass methods: */
1618 bool gate () { return gate_tree_loop_distribution (); }
1619 unsigned int execute () { return tree_loop_distribution (); }
1621 }; // class pass_loop_distribution
1623 } // anon namespace
1625 gimple_opt_pass *
1626 make_pass_loop_distribution (gcc::context *ctxt)
1628 return new pass_loop_distribution (ctxt);