From d583c97934c7af62fd9c8d741c41c742365e4dfd Mon Sep 17 00:00:00 2001 From: hubicka Date: Tue, 6 Nov 2012 16:22:45 +0000 Subject: [PATCH] * cfgloopanal.c (get_loop_hot_path): New function. * tree-ssa-lop-ivcanon.c (struct loop_size): Add CONSTANT_IV, NUM_NON_PURE_CALLS_ON_HOT_PATH, NUM_PURE_CALLS_ON_HOT_PATH, NUM_BRANCHES_ON_HOT_PATH. (tree_estimate_loop_size): Compute the new values. (try_unroll_loop_completely): Disable unrolling of loops with only calls or too many branches. (tree_unroll_loops_completely): Deal also with outer loops of hot loops. * cfgloop.h (get_loop_hot_path): Declare. * params.def (PARAM_MAX_PEEL_BRANCHES): New parameters. * invoke.texi (max-peel-branches): Document. * gcc.dg/tree-ssa/loop-1.c: Make to look like a good unroling candidate still. * gcc.dg/tree-ssa/loop-23.c: Likewise. * gcc.dg/tree-ssa/cunroll-1.c: Unrolling now happens early. * gcc.dg/tree-prof/unroll-1.c: Remove confused dg-options. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@193246 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/cfgloop.h | 1 + gcc/cfgloopanal.c | 33 ++++++ gcc/doc/invoke.texi | 3 + gcc/params.def | 5 + gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.dg/tree-prof/unroll-1.c | 1 - gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 9 +- gcc/testsuite/gcc.dg/tree-ssa/loop-23.c | 23 ++-- gcc/tree-ssa-loop-ivcanon.c | 171 ++++++++++++++++++++++++++---- 10 files changed, 218 insertions(+), 39 deletions(-) diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 94ad6372d28..5cd62b3d640 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -714,6 +714,7 @@ extern void doloop_optimize_loops (void); extern void move_loop_invariants (void); extern bool finite_loop_p (struct loop *); extern void scale_loop_profile (struct loop *loop, int scale, int iteration_bound); +extern VEC (basic_block, heap) * get_loop_hot_path (const struct loop *loop); /* Returns the outermost loop of the loop nest that contains LOOP.*/ static inline struct loop * diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index c3cf3edf9b9..ba7c2626635 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -483,3 +483,36 @@ single_likely_exit (struct loop *loop) VEC_free (edge, heap, exits); return found; } + + +/* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs + order against direction of edges from latch. Specially, if + header != latch, latch is the 1-st block. */ + +VEC (basic_block, heap) * +get_loop_hot_path (const struct loop *loop) +{ + basic_block bb = loop->header; + VEC (basic_block, heap) *path = NULL; + bitmap visited = BITMAP_ALLOC (NULL); + + while (true) + { + edge_iterator ei; + edge e; + edge best = NULL; + + VEC_safe_push (basic_block, heap, path, bb); + bitmap_set_bit (visited, bb->index); + FOR_EACH_EDGE (e, ei, bb->succs) + if ((!best || e->probability > best->probability) + && !loop_exit_edge_p (loop, e) + && !bitmap_bit_p (visited, e->dest->index)) + best = e; + if (!best || best->dest == loop->header) + break; + bb = best->dest; + } + BITMAP_FREE (visited); + return path; +} diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1338e6e461c..73363a1a5a6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -9085,6 +9085,9 @@ the loop code is peeled. @item max-peel-times The maximum number of peelings of a single loop. +@item max-peel-branches +The maximum number of branches on the hot path through the peeled sequence. + @item max-completely-peeled-insns The maximum number of insns of a completely peeled loop. diff --git a/gcc/params.def b/gcc/params.def index 8733f1ba631..0ceb8a26263 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -291,6 +291,11 @@ DEFPARAM(PARAM_MAX_PEEL_TIMES, "max-peel-times", "The maximum number of peelings of a single loop", 16, 0, 0) +/* The maximum number of peelings of a single loop that is peeled completely. */ +DEFPARAM(PARAM_MAX_PEEL_BRANCHES, + "max-peel-branches", + "The maximum number of branches on the path through the peeled sequence", + 32, 0, 0) /* The maximum number of insns of a peeled loop. */ DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS, "max-completely-peeled-insns", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d5500aeb69b..be75f5374fd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2012-11-06 Jan Hubicka + + * gcc.dg/tree-ssa/loop-1.c: Make to look like a good unroling candidate still. + * gcc.dg/tree-ssa/loop-23.c: Likewise. + * gcc.dg/tree-ssa/cunroll-1.c: Unrolling now happens early. + * gcc.dg/tree-prof/unroll-1.c: Remove confused dg-options. + 2012-11-06 David Edelsohn * const-uniq-1.c: Expand regex to match AIX XCOFF labels. diff --git a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c index b1c6b8b2d39..0b6e2ea13a8 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c @@ -21,4 +21,3 @@ main() } /* { dg-final-use { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ /* { dg-final-use { cleanup-rtl-dump "Not unrolling loop, doesn't roll" } } */ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops" } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c index c302b17a86d..37d5ba10334 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -fdump-tree-cunrolli-details" } */ int a[2]; test(int c) { @@ -10,4 +10,4 @@ test(int c) /* Array bounds says the loop will not roll much. */ /* { dg-final { scan-tree-dump "Unrolled loop 1 completely .duplicated 1 times.." "cunroll"} } */ /* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunroll"} } */ -/* { dg-final { cleanup-tree-dump "cunroll" } } */ +/* { dg-final { cleanup-tree-dump "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index e4ad7a9fa7e..35ff0be60fa 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -17,13 +17,16 @@ to the load from the GOT this also contains the name of the funtion so for each call the function name would appear twice. */ /* { dg-options "-O1 -ftree-loop-ivcanon -funroll-loops -fdump-tree-ivcanon-details -fdump-tree-cunroll-details -fdump-tree-optimized -mno-relax-pic-calls" { target mips*-*-* } } */ - -void xxx(void) +__attribute__ ((pure)) +int foo (int x); +int xxx(void) { int x = 45; + int sum; while (x >>= 1) - foo (); + sum += foo (x) * 2; + return sum; } /* We should be able to find out that the loop iterates four times and unroll it completely. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c index a16dc5f0357..466d1758d1f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c @@ -1,24 +1,27 @@ /* { dg-do compile } */ /* { dg-options "-O2 -funroll-loops -fdump-tree-cunroll-details" } */ -void bla(int); +__attribute__ ((pure)) +int bla(int); -void foo(void) +int foo(void) { int i; + int sum; /* This loop used to appear to be too large for unrolling. */ for (i = 0; i < 4; i++) { - bla (i); - bla (2*i); - bla (3*i); - bla (4*i); - bla (5*i); - bla (6*i); - bla (7*i); - bla (8*i); + sum += bla (i); + sum += bla (2*i); + sum += bla (3*i); + sum += bla (4*i); + sum += bla (5*i); + sum += bla (6*i); + sum += bla (7*i); + sum += bla (8*i); } + return sum; } /* { dg-final { scan-tree-dump-times "Unrolled loop 1 completely" 1 "cunroll" } } */ diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 58f45d2d1c4..601223b3dda 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -140,6 +140,20 @@ struct loop_size instructions after exit are not executed. */ int last_iteration; int last_iteration_eliminated_by_peeling; + + /* If some IV computation will become constant. */ + bool constant_iv; + + /* Number of call stmts that are not a builtin and are pure or const + present on the hot path. */ + int num_pure_calls_on_hot_path; + /* Number of call stmts that are not a builtin and are not pure nor const + present on the hot path. */ + int num_non_pure_calls_on_hot_path; + /* Number of statements other than calls in the loop. */ + int non_call_stmts_on_hot_path; + /* Number of branches seen on the hot path. */ + int num_branches_on_hot_path; }; /* Return true if OP in STMT will be constant after peeling LOOP. */ @@ -188,7 +202,11 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop) return true; } -/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. +/* Computes an estimated number of insns in LOOP. + EXIT (if non-NULL) is an exite edge that will be eliminated in all but last + iteration of the loop. + EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration + of loop. Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */ static void @@ -198,11 +216,17 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru gimple_stmt_iterator gsi; unsigned int i; bool after_exit; + VEC (basic_block, heap) *path = get_loop_hot_path (loop); size->overall = 0; size->eliminated_by_peeling = 0; size->last_iteration = 0; size->last_iteration_eliminated_by_peeling = 0; + size->num_pure_calls_on_hot_path = 0; + size->num_non_pure_calls_on_hot_path = 0; + size->non_call_stmts_on_hot_path = 0; + size->num_branches_on_hot_path = 0; + size->constant_iv = 0; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); @@ -221,6 +245,8 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru gimple stmt = gsi_stmt (gsi); int num = estimate_num_insns (stmt, &eni_size_weights); bool likely_eliminated = false; + bool likely_eliminated_last = false; + bool likely_eliminated_peeled = false; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -231,11 +257,21 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru /* Look for reasons why we might optimize this stmt away. */ /* Exit conditional. */ - if (exit && body[i] == exit->src && stmt == last_stmt (exit->src)) + if (exit && body[i] == exit->src + && stmt == last_stmt (exit->src)) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Exit condition will be eliminated.\n"); - likely_eliminated = true; + fprintf (dump_file, " Exit condition will be eliminated " + "in peeled copies.\n"); + likely_eliminated_peeled = true; + } + else if (edge_to_cancel && body[i] == edge_to_cancel->src + && stmt == last_stmt (edge_to_cancel->src)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Exit condition will be eliminated " + "in last copy.\n"); + likely_eliminated_last = true; } /* Sets of IV variables */ else if (gimple_code (stmt) == GIMPLE_ASSIGN @@ -249,19 +285,22 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru /* Assignments of IV variables. */ else if (gimple_code (stmt) == GIMPLE_ASSIGN && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME - && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop) + && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop) && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS || constant_after_peeling (gimple_assign_rhs2 (stmt), stmt, loop))) { + size->constant_iv = true; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Constant expression will be folded away.\n"); likely_eliminated = true; } /* Conditionals. */ - else if (gimple_code (stmt) == GIMPLE_COND - && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) - && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)) + else if ((gimple_code (stmt) == GIMPLE_COND + && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) + && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)) + || (gimple_code (stmt) == GIMPLE_SWITCH + && constant_after_peeling (gimple_switch_index (stmt), stmt, loop))) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Constant conditional.\n"); @@ -269,16 +308,49 @@ tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, stru } size->overall += num; - if (likely_eliminated) + if (likely_eliminated || likely_eliminated_peeled) size->eliminated_by_peeling += num; if (!after_exit) { size->last_iteration += num; - if (likely_eliminated) + if (likely_eliminated || likely_eliminated_last) size->last_iteration_eliminated_by_peeling += num; } } } + while (VEC_length (basic_block, path)) + { + basic_block bb = VEC_pop (basic_block, path); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL) + { + int flags = gimple_call_flags (stmt); + tree decl = gimple_call_fndecl (stmt); + + if (decl && DECL_IS_BUILTIN (decl) + && is_inexpensive_builtin (decl)) + ; + else if (flags & (ECF_PURE | ECF_CONST)) + size->num_pure_calls_on_hot_path++; + else + size->num_non_pure_calls_on_hot_path++; + size->num_branches_on_hot_path ++; + } + else if (gimple_code (stmt) != GIMPLE_CALL + && gimple_code (stmt) != GIMPLE_DEBUG) + size->non_call_stmts_on_hot_path++; + if (((gimple_code (stmt) == GIMPLE_COND + && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) + || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))) + || (gimple_code (stmt) == GIMPLE_SWITCH + && !constant_after_peeling (gimple_switch_index (stmt), stmt, loop))) + && (!exit || bb != exit->src)) + size->num_branches_on_hot_path++; + } + } + VEC_free (basic_block, heap, path); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, size->eliminated_by_peeling, size->last_iteration, @@ -644,34 +716,85 @@ try_unroll_loop_completely (struct loop *loop, (int) unr_insns); } + /* If the code is going to shrink, we don't need to be extra cautious + on guessing if the unrolling is going to be profitable. */ + if (unr_insns + /* If there is IV variable that will become constant, we save + one instruction in the loop prologue we do not account + otherwise. */ + <= ninsns + (size.constant_iv != false)) + ; /* We unroll only inner loops, because we do not consider it profitable otheriwse. We still can cancel loopback edge of not rolling loop; this is always a good idea. */ - if (loop->inner && unr_insns > ninsns) + else if (ul == UL_NO_GROWTH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); + return false; + } + /* Outer loops tend to be less interesting candidates for complette + unrolling unless we can do a lot of propagation into the inner loop + body. For now we disable outer loop unrolling when the code would + grow. */ + else if (loop->inner) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d:" + fprintf (dump_file, "Not unrolling loop %d: " "it is not innermost and code would grow.\n", loop->num); return false; } - - if (unr_insns > ninsns - && (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))) + /* If there is call on a hot path through the loop, then + there is most probably not much to optimize. */ + else if (size.num_non_pure_calls_on_hot_path) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d " - "(--param max-completely-peeled-insns limit reached).\n", + fprintf (dump_file, "Not unrolling loop %d: " + "contains call and code would grow.\n", loop->num); return false; } - - if (ul == UL_NO_GROWTH - && unr_insns > ninsns) + /* If there is pure/const call in the function, then we + can still optimize the unrolled loop body if it contains + some other interesting code than the calls and code + storing or cumulating the return value. */ + else if (size.num_pure_calls_on_hot_path + /* One IV increment, one test, one ivtmp store + and one usefull stmt. That is about minimal loop + doing pure call. */ + && (size.non_call_stmts_on_hot_path + <= 3 + size.num_pure_calls_on_hot_path)) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + fprintf (dump_file, "Not unrolling loop %d: " + "contains just pure calls and code would grow.\n", + loop->num); + return false; + } + /* Complette unrolling is major win when control flow is removed and + one big basic block is created. If the loop contains control flow + the optimization may still be a win because of eliminating the loop + overhead but it also may blow the branch predictor tables. + Limit number of branches on the hot path through the peeled + sequence. */ + else if (size.num_branches_on_hot_path * (int)n_unroll + > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + " number of branches on hot path in the unrolled sequence" + " reach --param max-peel-branches limit.\n", + loop->num); + return false; + } + else if (unr_insns + > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "(--param max-completely-peeled-insns limit reached).\n", loop->num); return false; } @@ -689,6 +812,8 @@ try_unroll_loop_completely (struct loop *loop, { free_original_copy_tables (); free (wont_exit); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Failed to duplicate the loop\n"); return false; } @@ -968,7 +1093,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) { struct loop *loop_father = loop_outer (loop); - if (may_increase_size && optimize_loop_for_speed_p (loop) + if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) -- 2.11.4.GIT