1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
31 Additionally in case we detect that it is beneficial to unroll the
32 loop completely, we do it right here to expose the optimization
33 possibilities to the following passes. */
37 #include "coretypes.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-ssa-alias.h"
44 #include "internal-fn.h"
45 #include "gimple-fold.h"
47 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop-niter.h"
60 #include "tree-ssa-loop.h"
61 #include "tree-into-ssa.h"
63 #include "tree-pass.h"
64 #include "tree-chrec.h"
65 #include "tree-scalar-evolution.h"
68 #include "tree-inline.h"
70 #include "tree-cfgcleanup.h"
72 /* Specifies types of loops that may be unrolled. */
76 UL_SINGLE_ITER
, /* Only loops that exit immediately in the first
78 UL_NO_GROWTH
, /* Only loops whose unrolling will not cause increase
80 UL_ALL
/* All suitable loops. */
83 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
84 is the exit edge whose condition is replaced. */
87 create_canonical_iv (struct loop
*loop
, edge exit
, tree niter
)
92 gimple_stmt_iterator incr_at
;
95 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
97 fprintf (dump_file
, "Added canonical iv to loop %d, ", loop
->num
);
98 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
99 fprintf (dump_file
, " iterations.\n");
102 cond
= last_stmt (exit
->src
);
103 in
= EDGE_SUCC (exit
->src
, 0);
105 in
= EDGE_SUCC (exit
->src
, 1);
107 /* Note that we do not need to worry about overflows, since
108 type of niter is always unsigned and all comparisons are
109 just for equality/nonequality -- i.e. everything works
110 with a modulo arithmetics. */
112 type
= TREE_TYPE (niter
);
113 niter
= fold_build2 (PLUS_EXPR
, type
,
115 build_int_cst (type
, 1));
116 incr_at
= gsi_last_bb (in
->src
);
118 build_int_cst (type
, -1),
120 &incr_at
, false, NULL
, &var
);
122 cmp
= (exit
->flags
& EDGE_TRUE_VALUE
) ? EQ_EXPR
: NE_EXPR
;
123 gimple_cond_set_code (cond
, cmp
);
124 gimple_cond_set_lhs (cond
, var
);
125 gimple_cond_set_rhs (cond
, build_int_cst (type
, 0));
129 /* Describe size of loop as detected by tree_estimate_loop_size. */
132 /* Number of instructions in the loop. */
135 /* Number of instructions that will be likely optimized out in
136 peeled iterations of loop (i.e. computation based on induction
137 variable where induction variable starts at known constant.) */
138 int eliminated_by_peeling
;
140 /* Same statistics for last iteration of loop: it is smaller because
141 instructions after exit are not executed. */
143 int last_iteration_eliminated_by_peeling
;
145 /* If some IV computation will become constant. */
148 /* Number of call stmts that are not a builtin and are pure or const
149 present on the hot path. */
150 int num_pure_calls_on_hot_path
;
151 /* Number of call stmts that are not a builtin and are not pure nor const
152 present on the hot path. */
153 int num_non_pure_calls_on_hot_path
;
154 /* Number of statements other than calls in the loop. */
155 int non_call_stmts_on_hot_path
;
156 /* Number of branches seen on the hot path. */
157 int num_branches_on_hot_path
;
160 /* Return true if OP in STMT will be constant after peeling LOOP. */
163 constant_after_peeling (tree op
, gimple stmt
, struct loop
*loop
)
167 if (is_gimple_min_invariant (op
))
170 /* We can still fold accesses to constant arrays when index is known. */
171 if (TREE_CODE (op
) != SSA_NAME
)
175 /* First make fast look if we see constant array inside. */
176 while (handled_component_p (base
))
177 base
= TREE_OPERAND (base
, 0);
179 && ctor_for_folding (base
) != error_mark_node
)
180 || CONSTANT_CLASS_P (base
))
182 /* If so, see if we understand all the indices. */
184 while (handled_component_p (base
))
186 if (TREE_CODE (base
) == ARRAY_REF
187 && !constant_after_peeling (TREE_OPERAND (base
, 1), stmt
, loop
))
189 base
= TREE_OPERAND (base
, 0);
196 /* Induction variables are constants. */
197 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false))
199 if (!is_gimple_min_invariant (iv
.base
))
201 if (!is_gimple_min_invariant (iv
.step
))
206 /* Computes an estimated number of insns in LOOP.
207 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
208 iteration of the loop.
209 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
211 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
212 Stop estimating after UPPER_BOUND is met. Return true in this case. */
215 tree_estimate_loop_size (struct loop
*loop
, edge exit
, edge edge_to_cancel
, struct loop_size
*size
,
218 basic_block
*body
= get_loop_body (loop
);
219 gimple_stmt_iterator gsi
;
222 vec
<basic_block
> path
= get_loop_hot_path (loop
);
225 size
->eliminated_by_peeling
= 0;
226 size
->last_iteration
= 0;
227 size
->last_iteration_eliminated_by_peeling
= 0;
228 size
->num_pure_calls_on_hot_path
= 0;
229 size
->num_non_pure_calls_on_hot_path
= 0;
230 size
->non_call_stmts_on_hot_path
= 0;
231 size
->num_branches_on_hot_path
= 0;
232 size
->constant_iv
= 0;
234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
235 fprintf (dump_file
, "Estimating sizes for loop %i\n", loop
->num
);
236 for (i
= 0; i
< loop
->num_nodes
; i
++)
238 if (edge_to_cancel
&& body
[i
] != edge_to_cancel
->src
239 && dominated_by_p (CDI_DOMINATORS
, body
[i
], edge_to_cancel
->src
))
243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
244 fprintf (dump_file
, " BB: %i, after_exit: %i\n", body
[i
]->index
, after_exit
);
246 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
248 gimple stmt
= gsi_stmt (gsi
);
249 int num
= estimate_num_insns (stmt
, &eni_size_weights
);
250 bool likely_eliminated
= false;
251 bool likely_eliminated_last
= false;
252 bool likely_eliminated_peeled
= false;
254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
256 fprintf (dump_file
, " size: %3i ", num
);
257 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, 0);
260 /* Look for reasons why we might optimize this stmt away. */
262 if (gimple_has_side_effects (stmt
))
264 /* Exit conditional. */
265 else if (exit
&& body
[i
] == exit
->src
266 && stmt
== last_stmt (exit
->src
))
268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
269 fprintf (dump_file
, " Exit condition will be eliminated "
270 "in peeled copies.\n");
271 likely_eliminated_peeled
= true;
273 else if (edge_to_cancel
&& body
[i
] == edge_to_cancel
->src
274 && stmt
== last_stmt (edge_to_cancel
->src
))
276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
277 fprintf (dump_file
, " Exit condition will be eliminated "
279 likely_eliminated_last
= true;
281 /* Sets of IV variables */
282 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
283 && constant_after_peeling (gimple_assign_lhs (stmt
), stmt
, loop
))
285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
286 fprintf (dump_file
, " Induction variable computation will"
287 " be folded away.\n");
288 likely_eliminated
= true;
290 /* Assignments of IV variables. */
291 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
292 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
293 && constant_after_peeling (gimple_assign_rhs1 (stmt
), stmt
, loop
)
294 && (gimple_assign_rhs_class (stmt
) != GIMPLE_BINARY_RHS
295 || constant_after_peeling (gimple_assign_rhs2 (stmt
),
298 size
->constant_iv
= true;
299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
300 fprintf (dump_file
, " Constant expression will be folded away.\n");
301 likely_eliminated
= true;
304 else if ((gimple_code (stmt
) == GIMPLE_COND
305 && constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
306 && constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
))
307 || (gimple_code (stmt
) == GIMPLE_SWITCH
308 && constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
311 fprintf (dump_file
, " Constant conditional.\n");
312 likely_eliminated
= true;
315 size
->overall
+= num
;
316 if (likely_eliminated
|| likely_eliminated_peeled
)
317 size
->eliminated_by_peeling
+= num
;
320 size
->last_iteration
+= num
;
321 if (likely_eliminated
|| likely_eliminated_last
)
322 size
->last_iteration_eliminated_by_peeling
+= num
;
324 if ((size
->overall
* 3 / 2 - size
->eliminated_by_peeling
325 - size
->last_iteration_eliminated_by_peeling
) > upper_bound
)
333 while (path
.length ())
335 basic_block bb
= path
.pop ();
336 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
338 gimple stmt
= gsi_stmt (gsi
);
339 if (gimple_code (stmt
) == GIMPLE_CALL
)
341 int flags
= gimple_call_flags (stmt
);
342 tree decl
= gimple_call_fndecl (stmt
);
344 if (decl
&& DECL_IS_BUILTIN (decl
)
345 && is_inexpensive_builtin (decl
))
347 else if (flags
& (ECF_PURE
| ECF_CONST
))
348 size
->num_pure_calls_on_hot_path
++;
350 size
->num_non_pure_calls_on_hot_path
++;
351 size
->num_branches_on_hot_path
++;
353 else if (gimple_code (stmt
) != GIMPLE_CALL
354 && gimple_code (stmt
) != GIMPLE_DEBUG
)
355 size
->non_call_stmts_on_hot_path
++;
356 if (((gimple_code (stmt
) == GIMPLE_COND
357 && (!constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
358 || constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
)))
359 || (gimple_code (stmt
) == GIMPLE_SWITCH
360 && !constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
361 && (!exit
|| bb
!= exit
->src
))
362 size
->num_branches_on_hot_path
++;
366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
367 fprintf (dump_file
, "size: %i-%i, last_iteration: %i-%i\n", size
->overall
,
368 size
->eliminated_by_peeling
, size
->last_iteration
,
369 size
->last_iteration_eliminated_by_peeling
);
375 /* Estimate number of insns of completely unrolled loop.
376 It is (NUNROLL + 1) * size of loop body with taking into account
377 the fact that in last copy everything after exit conditional
378 is dead and that some instructions will be eliminated after
381 Loop body is likely going to simplify further, this is difficult
382 to guess, we just decrease the result by 1/3. */
384 static unsigned HOST_WIDE_INT
385 estimated_unrolled_size (struct loop_size
*size
,
386 unsigned HOST_WIDE_INT nunroll
)
388 HOST_WIDE_INT unr_insns
= ((nunroll
)
389 * (HOST_WIDE_INT
) (size
->overall
390 - size
->eliminated_by_peeling
));
393 unr_insns
+= size
->last_iteration
- size
->last_iteration_eliminated_by_peeling
;
395 unr_insns
= unr_insns
* 2 / 3;
402 /* Loop LOOP is known to not loop. See if there is an edge in the loop
403 body that can be remove to make the loop to always exit and at
404 the same time it does not make any code potentially executed
405 during the last iteration dead.
407 After complette unrolling we still may get rid of the conditional
408 on the exit in the last copy even if we have no idea what it does.
409 This is quite common case for loops of form
415 Here we prove the loop to iterate 5 times but we do not know
416 it from induction variable.
418 For now we handle only simple case where there is exit condition
419 just before the latch block and the latch block contains no statements
420 with side effect that may otherwise terminate the execution of loop
421 (such as by EH or by terminating the program or longjmp).
423 In the general case we may want to cancel the paths leading to statements
424 loop-niter identified as having undefined effect in the last iteration.
425 The other cases are hopefully rare and will be cleaned up later. */
428 loop_edge_to_cancel (struct loop
*loop
)
433 gimple_stmt_iterator gsi
;
435 /* We want only one predecestor of the loop. */
436 if (EDGE_COUNT (loop
->latch
->preds
) > 1)
439 exits
= get_loop_exit_edges (loop
);
441 FOR_EACH_VEC_ELT (exits
, i
, edge_to_cancel
)
443 /* Find the other edge than the loop exit
444 leaving the conditoinal. */
445 if (EDGE_COUNT (edge_to_cancel
->src
->succs
) != 2)
447 if (EDGE_SUCC (edge_to_cancel
->src
, 0) == edge_to_cancel
)
448 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 1);
450 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 0);
452 /* We only can handle conditionals. */
453 if (!(edge_to_cancel
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
456 /* We should never have conditionals in the loop latch. */
457 gcc_assert (edge_to_cancel
->dest
!= loop
->header
);
459 /* Check that it leads to loop latch. */
460 if (edge_to_cancel
->dest
!= loop
->latch
)
465 /* Verify that the code in loop latch does nothing that may end program
466 execution without really reaching the exit. This may include
467 non-pure/const function calls, EH statements, volatile ASMs etc. */
468 for (gsi
= gsi_start_bb (loop
->latch
); !gsi_end_p (gsi
); gsi_next (&gsi
))
469 if (gimple_has_side_effects (gsi_stmt (gsi
)))
471 return edge_to_cancel
;
477 /* Remove all tests for exits that are known to be taken after LOOP was
478 peeled NPEELED times. Put gcc_unreachable before every statement
479 known to not be executed. */
482 remove_exits_and_undefined_stmts (struct loop
*loop
, unsigned int npeeled
)
484 struct nb_iter_bound
*elt
;
485 bool changed
= false;
487 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
489 /* If statement is known to be undefined after peeling, turn it
490 into unreachable (or trap when debugging experience is supposed
493 && elt
->bound
.ult (double_int::from_uhwi (npeeled
)))
495 gimple_stmt_iterator gsi
= gsi_for_stmt (elt
->stmt
);
496 gimple stmt
= gimple_build_call
497 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
499 gimple_set_location (stmt
, gimple_location (elt
->stmt
));
500 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
504 fprintf (dump_file
, "Forced statement unreachable: ");
505 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
508 /* If we know the exit will be taken after peeling, update. */
509 else if (elt
->is_exit
510 && elt
->bound
.ule (double_int::from_uhwi (npeeled
)))
512 basic_block bb
= gimple_bb (elt
->stmt
);
513 edge exit_edge
= EDGE_SUCC (bb
, 0);
515 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
517 fprintf (dump_file
, "Forced exit to be taken: ");
518 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
520 if (!loop_exit_edge_p (loop
, exit_edge
))
521 exit_edge
= EDGE_SUCC (bb
, 1);
522 gcc_checking_assert (loop_exit_edge_p (loop
, exit_edge
));
523 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
524 gimple_cond_make_true (elt
->stmt
);
526 gimple_cond_make_false (elt
->stmt
);
527 update_stmt (elt
->stmt
);
534 /* Remove all exits that are known to be never taken because of the loop bound
538 remove_redundant_iv_tests (struct loop
*loop
)
540 struct nb_iter_bound
*elt
;
541 bool changed
= false;
543 if (!loop
->any_upper_bound
)
545 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
547 /* Exit is pointless if it won't be taken before loop reaches
549 if (elt
->is_exit
&& loop
->any_upper_bound
550 && loop
->nb_iterations_upper_bound
.ult (elt
->bound
))
552 basic_block bb
= gimple_bb (elt
->stmt
);
553 edge exit_edge
= EDGE_SUCC (bb
, 0);
554 struct tree_niter_desc niter
;
556 if (!loop_exit_edge_p (loop
, exit_edge
))
557 exit_edge
= EDGE_SUCC (bb
, 1);
559 /* Only when we know the actual number of iterations, not
560 just a bound, we can remove the exit. */
561 if (!number_of_iterations_exit (loop
, exit_edge
,
562 &niter
, false, false)
563 || !integer_onep (niter
.assumptions
)
564 || !integer_zerop (niter
.may_be_zero
)
566 || TREE_CODE (niter
.niter
) != INTEGER_CST
567 || !loop
->nb_iterations_upper_bound
.ult
568 (tree_to_double_int (niter
.niter
)))
571 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
573 fprintf (dump_file
, "Removed pointless exit: ");
574 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
576 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
577 gimple_cond_make_false (elt
->stmt
);
579 gimple_cond_make_true (elt
->stmt
);
580 update_stmt (elt
->stmt
);
587 /* Stores loops that will be unlooped after we process whole loop tree. */
588 static vec
<loop_p
> loops_to_unloop
;
589 static vec
<int> loops_to_unloop_nunroll
;
591 /* Cancel all fully unrolled loops by putting __builtin_unreachable
593 We do it after all unrolling since unlooping moves basic blocks
594 across loop boundaries trashing loop closed SSA form as well
595 as SCEV info needed to be intact during unrolling.
597 IRRED_INVALIDATED is used to bookkeep if information about
598 irreducible regions may become invalid as a result
599 of the transformation.
600 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
601 when we need to go into loop closed SSA form. */
604 unloop_loops (bitmap loop_closed_ssa_invalidated
,
605 bool *irred_invalidated
)
607 while (loops_to_unloop
.length ())
609 struct loop
*loop
= loops_to_unloop
.pop ();
610 int n_unroll
= loops_to_unloop_nunroll
.pop ();
611 basic_block latch
= loop
->latch
;
612 edge latch_edge
= loop_latch_edge (loop
);
613 int flags
= latch_edge
->flags
;
614 location_t locus
= latch_edge
->goto_locus
;
616 gimple_stmt_iterator gsi
;
618 remove_exits_and_undefined_stmts (loop
, n_unroll
);
620 /* Unloop destroys the latch edge. */
621 unloop (loop
, irred_invalidated
, loop_closed_ssa_invalidated
);
623 /* Create new basic block for the latch edge destination and wire
625 stmt
= gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
626 latch_edge
= make_edge (latch
, create_basic_block (NULL
, NULL
, latch
), flags
);
627 latch_edge
->probability
= 0;
628 latch_edge
->count
= 0;
629 latch_edge
->flags
|= flags
;
630 latch_edge
->goto_locus
= locus
;
632 latch_edge
->dest
->loop_father
= current_loops
->tree_root
;
633 latch_edge
->dest
->count
= 0;
634 latch_edge
->dest
->frequency
= 0;
635 set_immediate_dominator (CDI_DOMINATORS
, latch_edge
->dest
, latch_edge
->src
);
637 gsi
= gsi_start_bb (latch_edge
->dest
);
638 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
640 loops_to_unloop
.release ();
641 loops_to_unloop_nunroll
.release ();
644 /* Tries to unroll LOOP completely, i.e. NITER times.
645 UL determines which loops we are allowed to unroll.
646 EXIT is the exit of the loop that should be eliminated.
647 MAXITER specfy bound on number of iterations, -1 if it is
648 not known or too large for HOST_WIDE_INT. The location
649 LOCUS corresponding to the loop is used when emitting
650 a summary of the unroll to the dump file. */
653 try_unroll_loop_completely (struct loop
*loop
,
654 edge exit
, tree niter
,
655 enum unroll_level ul
,
656 HOST_WIDE_INT maxiter
,
659 unsigned HOST_WIDE_INT n_unroll
, ninsns
, max_unroll
, unr_insns
;
661 struct loop_size size
;
662 bool n_unroll_found
= false;
663 edge edge_to_cancel
= NULL
;
665 /* See if we proved number of iterations to be low constant.
667 EXIT is an edge that will be removed in all but last iteration of
670 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
671 of the unrolled sequence and is expected to make the final loop not
674 If the number of execution of loop is determined by standard induction
675 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
677 if (tree_fits_uhwi_p (niter
))
679 n_unroll
= tree_to_uhwi (niter
);
680 n_unroll_found
= true;
681 edge_to_cancel
= EDGE_SUCC (exit
->src
, 0);
682 if (edge_to_cancel
== exit
)
683 edge_to_cancel
= EDGE_SUCC (exit
->src
, 1);
685 /* We do not know the number of iterations and thus we can not eliminate
690 /* See if we can improve our estimate by using recorded loop bounds. */
692 && (!n_unroll_found
|| (unsigned HOST_WIDE_INT
)maxiter
< n_unroll
))
695 n_unroll_found
= true;
696 /* Loop terminates before the IV variable test, so we can not
697 remove it in the last iteration. */
698 edge_to_cancel
= NULL
;
704 max_unroll
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
);
705 if (n_unroll
> max_unroll
)
709 edge_to_cancel
= loop_edge_to_cancel (loop
);
717 vec
<edge
> to_remove
= vNULL
;
718 if (ul
== UL_SINGLE_ITER
)
721 large
= tree_estimate_loop_size
722 (loop
, exit
, edge_to_cancel
, &size
,
723 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
));
724 ninsns
= size
.overall
;
727 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
728 fprintf (dump_file
, "Not unrolling loop %d: it is too large.\n",
733 unr_insns
= estimated_unrolled_size (&size
, n_unroll
);
734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
736 fprintf (dump_file
, " Loop size: %d\n", (int) ninsns
);
737 fprintf (dump_file
, " Estimated size after unrolling: %d\n",
741 /* If the code is going to shrink, we don't need to be extra cautious
742 on guessing if the unrolling is going to be profitable. */
744 /* If there is IV variable that will become constant, we save
745 one instruction in the loop prologue we do not account
747 <= ninsns
+ (size
.constant_iv
!= false))
749 /* We unroll only inner loops, because we do not consider it profitable
750 otheriwse. We still can cancel loopback edge of not rolling loop;
751 this is always a good idea. */
752 else if (ul
== UL_NO_GROWTH
)
754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
755 fprintf (dump_file
, "Not unrolling loop %d: size would grow.\n",
759 /* Outer loops tend to be less interesting candidates for complette
760 unrolling unless we can do a lot of propagation into the inner loop
761 body. For now we disable outer loop unrolling when the code would
763 else if (loop
->inner
)
765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
766 fprintf (dump_file
, "Not unrolling loop %d: "
767 "it is not innermost and code would grow.\n",
771 /* If there is call on a hot path through the loop, then
772 there is most probably not much to optimize. */
773 else if (size
.num_non_pure_calls_on_hot_path
)
775 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
776 fprintf (dump_file
, "Not unrolling loop %d: "
777 "contains call and code would grow.\n",
781 /* If there is pure/const call in the function, then we
782 can still optimize the unrolled loop body if it contains
783 some other interesting code than the calls and code
784 storing or cumulating the return value. */
785 else if (size
.num_pure_calls_on_hot_path
786 /* One IV increment, one test, one ivtmp store
787 and one useful stmt. That is about minimal loop
789 && (size
.non_call_stmts_on_hot_path
790 <= 3 + size
.num_pure_calls_on_hot_path
))
792 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
793 fprintf (dump_file
, "Not unrolling loop %d: "
794 "contains just pure calls and code would grow.\n",
798 /* Complette unrolling is major win when control flow is removed and
799 one big basic block is created. If the loop contains control flow
800 the optimization may still be a win because of eliminating the loop
801 overhead but it also may blow the branch predictor tables.
802 Limit number of branches on the hot path through the peeled
804 else if (size
.num_branches_on_hot_path
* (int)n_unroll
805 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES
))
807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
808 fprintf (dump_file
, "Not unrolling loop %d: "
809 " number of branches on hot path in the unrolled sequence"
810 " reach --param max-peel-branches limit.\n",
815 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
))
817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
818 fprintf (dump_file
, "Not unrolling loop %d: "
819 "(--param max-completely-peeled-insns limit reached).\n",
824 initialize_original_copy_tables ();
825 wont_exit
= sbitmap_alloc (n_unroll
+ 1);
826 bitmap_ones (wont_exit
);
827 bitmap_clear_bit (wont_exit
, 0);
829 if (!gimple_duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
832 DLTHE_FLAG_UPDATE_FREQ
833 | DLTHE_FLAG_COMPLETTE_PEEL
))
835 free_original_copy_tables ();
837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
838 fprintf (dump_file
, "Failed to duplicate the loop\n");
842 FOR_EACH_VEC_ELT (to_remove
, i
, e
)
844 bool ok
= remove_path (e
);
848 to_remove
.release ();
850 free_original_copy_tables ();
854 /* Remove the conditional from the last copy of the loop. */
857 cond
= last_stmt (edge_to_cancel
->src
);
858 if (edge_to_cancel
->flags
& EDGE_TRUE_VALUE
)
859 gimple_cond_make_false (cond
);
861 gimple_cond_make_true (cond
);
863 /* Do not remove the path. Doing so may remove outer loop
864 and confuse bookkeeping code in tree_unroll_loops_completelly. */
867 /* Store the loop for later unlooping and exit removal. */
868 loops_to_unloop
.safe_push (loop
);
869 loops_to_unloop_nunroll
.safe_push (n_unroll
);
871 if (dump_enabled_p ())
874 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
875 "loop turned into non-loop; it never loops\n");
878 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
879 "loop with %d iterations completely unrolled",
880 (int) (n_unroll
+ 1));
882 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
,
883 " (header execution count %d)",
884 (int)loop
->header
->count
);
885 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, "\n");
889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
892 fprintf (dump_file
, "Exit condition of peeled iterations was "
895 fprintf (dump_file
, "Last iteration exit edge was proved true.\n");
897 fprintf (dump_file
, "Latch of last iteration was marked by "
898 "__builtin_unreachable ().\n");
904 /* Adds a canonical induction variable to LOOP if suitable.
905 CREATE_IV is true if we may create a new iv. UL determines
906 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
907 to determine the number of iterations of a loop by direct evaluation.
908 Returns true if cfg is changed. */
911 canonicalize_loop_induction_variables (struct loop
*loop
,
912 bool create_iv
, enum unroll_level ul
,
917 HOST_WIDE_INT maxiter
;
918 bool modified
= false;
919 location_t locus
= UNKNOWN_LOCATION
;
921 niter
= number_of_latch_executions (loop
);
922 exit
= single_exit (loop
);
923 if (TREE_CODE (niter
) == INTEGER_CST
)
924 locus
= gimple_location (last_stmt (exit
->src
));
927 /* If the loop has more than one exit, try checking all of them
928 for # of iterations determinable through scev. */
930 niter
= find_loop_niter (loop
, &exit
);
932 /* Finally if everything else fails, try brute force evaluation. */
934 && (chrec_contains_undetermined (niter
)
935 || TREE_CODE (niter
) != INTEGER_CST
))
936 niter
= find_loop_niter_by_eval (loop
, &exit
);
939 locus
= gimple_location (last_stmt (exit
->src
));
941 if (TREE_CODE (niter
) != INTEGER_CST
)
945 /* We work exceptionally hard here to estimate the bound
946 by find_loop_niter_by_eval. Be sure to keep it for future. */
947 if (niter
&& TREE_CODE (niter
) == INTEGER_CST
)
949 record_niter_bound (loop
, tree_to_double_int (niter
),
950 exit
== single_likely_exit (loop
), true);
953 /* Force re-computation of loop bounds so we can remove redundant exits. */
954 maxiter
= max_loop_iterations_int (loop
);
956 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
957 && TREE_CODE (niter
) == INTEGER_CST
)
959 fprintf (dump_file
, "Loop %d iterates ", loop
->num
);
960 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
961 fprintf (dump_file
, " times.\n");
963 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
966 fprintf (dump_file
, "Loop %d iterates at most %i times.\n", loop
->num
,
970 /* Remove exits that are known to be never taken based on loop bound.
971 Needs to be called after compilation of max_loop_iterations_int that
972 populates the loop bounds. */
973 modified
|= remove_redundant_iv_tests (loop
);
975 if (try_unroll_loop_completely (loop
, exit
, niter
, ul
, maxiter
, locus
))
979 && niter
&& !chrec_contains_undetermined (niter
)
980 && exit
&& just_once_each_iteration_p (loop
, exit
->src
))
981 create_canonical_iv (loop
, exit
, niter
);
986 /* The main entry point of the pass. Adds canonical induction variables
987 to the suitable loops. */
990 canonicalize_induction_variables (void)
993 bool changed
= false;
994 bool irred_invalidated
= false;
995 bitmap loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
997 free_numbers_of_iterations_estimates ();
998 estimate_numbers_of_iterations ();
1000 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1002 changed
|= canonicalize_loop_induction_variables (loop
,
1003 true, UL_SINGLE_ITER
,
1006 gcc_assert (!need_ssa_update_p (cfun
));
1008 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1009 if (irred_invalidated
1010 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1011 mark_irreducible_loops ();
1013 /* Clean up the information about numbers of iterations, since brute force
1014 evaluation could reveal new information. */
1017 if (!bitmap_empty_p (loop_closed_ssa_invalidated
))
1019 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA
));
1020 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1022 BITMAP_FREE (loop_closed_ssa_invalidated
);
1025 return TODO_cleanup_cfg
;
1029 /* Propagate VAL into all uses of SSA_NAME. */
1032 propagate_into_all_uses (tree ssa_name
, tree val
)
1034 imm_use_iterator iter
;
1037 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, ssa_name
)
1039 gimple_stmt_iterator use_stmt_gsi
= gsi_for_stmt (use_stmt
);
1042 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1045 if (is_gimple_assign (use_stmt
)
1046 && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt
))
1047 == GIMPLE_SINGLE_RHS
)
1049 tree rhs
= gimple_assign_rhs1 (use_stmt
);
1051 if (TREE_CODE (rhs
) == ADDR_EXPR
)
1052 recompute_tree_invariant_for_addr_expr (rhs
);
1055 fold_stmt_inplace (&use_stmt_gsi
);
1056 update_stmt (use_stmt
);
1057 maybe_clean_or_replace_eh_stmt (use_stmt
, use_stmt
);
1061 /* Propagate constant SSA_NAMEs defined in basic block BB. */
1064 propagate_constants_for_unrolling (basic_block bb
)
1066 gimple_stmt_iterator gsi
;
1068 /* Look for degenerate PHI nodes with constant argument. */
1069 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); )
1071 gimple phi
= gsi_stmt (gsi
);
1072 tree result
= gimple_phi_result (phi
);
1073 tree arg
= gimple_phi_arg_def (phi
, 0);
1075 if (gimple_phi_num_args (phi
) == 1 && TREE_CODE (arg
) == INTEGER_CST
)
1077 propagate_into_all_uses (result
, arg
);
1078 gsi_remove (&gsi
, true);
1079 release_ssa_name (result
);
1085 /* Look for assignments to SSA names with constant RHS. */
1086 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
1088 gimple stmt
= gsi_stmt (gsi
);
1091 if (is_gimple_assign (stmt
)
1092 && gimple_assign_rhs_code (stmt
) == INTEGER_CST
1093 && (lhs
= gimple_assign_lhs (stmt
), TREE_CODE (lhs
) == SSA_NAME
)
1094 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1096 propagate_into_all_uses (lhs
, gimple_assign_rhs1 (stmt
));
1097 gsi_remove (&gsi
, true);
1098 release_ssa_name (lhs
);
1105 /* Process loops from innermost to outer, stopping at the innermost
1106 loop we unrolled. */
1109 tree_unroll_loops_completely_1 (bool may_increase_size
, bool unroll_outer
,
1110 vec
<loop_p
, va_heap
>& father_stack
,
1113 struct loop
*loop_father
;
1114 bool changed
= false;
1116 enum unroll_level ul
;
1118 /* Process inner loops first. */
1119 for (inner
= loop
->inner
; inner
!= NULL
; inner
= inner
->next
)
1120 changed
|= tree_unroll_loops_completely_1 (may_increase_size
,
1121 unroll_outer
, father_stack
,
1124 /* If we changed an inner loop we cannot process outer loops in this
1125 iteration because SSA form is not up-to-date. Continue with
1126 siblings of outer loops instead. */
1130 /* Don't unroll #pragma omp simd loops until the vectorizer
1131 attempts to vectorize those. */
1132 if (loop
->force_vectorize
)
1135 /* Try to unroll this loop. */
1136 loop_father
= loop_outer (loop
);
1140 if (may_increase_size
&& optimize_loop_nest_for_speed_p (loop
)
1141 /* Unroll outermost loops only if asked to do so or they do
1142 not cause code growth. */
1143 && (unroll_outer
|| loop_outer (loop_father
)))
1148 if (canonicalize_loop_induction_variables
1149 (loop
, false, ul
, !flag_tree_loop_ivcanon
))
1151 /* If we'll continue unrolling, we need to propagate constants
1152 within the new basic blocks to fold away induction variable
1153 computations; otherwise, the size might blow up before the
1154 iteration is complete and the IR eventually cleaned up. */
1155 if (loop_outer (loop_father
) && !loop_father
->aux
)
1157 father_stack
.safe_push (loop_father
);
1158 loop_father
->aux
= loop_father
;
1167 /* Unroll LOOPS completely if they iterate just few times. Unless
1168 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1169 size of the code does not increase. */
1172 tree_unroll_loops_completely (bool may_increase_size
, bool unroll_outer
)
1174 auto_vec
<loop_p
, 16> father_stack
;
1177 bool irred_invalidated
= false;
1182 bitmap loop_closed_ssa_invalidated
= NULL
;
1184 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1185 loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
1187 free_numbers_of_iterations_estimates ();
1188 estimate_numbers_of_iterations ();
1190 changed
= tree_unroll_loops_completely_1 (may_increase_size
,
1191 unroll_outer
, father_stack
,
1192 current_loops
->tree_root
);
1198 /* Be sure to skip unlooped loops while procesing father_stack
1200 FOR_EACH_VEC_ELT (loops_to_unloop
, i
, iter
)
1201 (*iter
)->aux
= NULL
;
1202 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1205 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1207 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
1208 if (loop_closed_ssa_invalidated
1209 && !bitmap_empty_p (loop_closed_ssa_invalidated
))
1210 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated
,
1213 update_ssa (TODO_update_ssa
);
1215 /* Propagate the constants within the new basic blocks. */
1216 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1220 basic_block
*body
= get_loop_body_in_dom_order (*iter
);
1221 for (j
= 0; j
< (*iter
)->num_nodes
; j
++)
1222 propagate_constants_for_unrolling (body
[j
]);
1224 (*iter
)->aux
= NULL
;
1226 father_stack
.truncate (0);
1228 /* This will take care of removing completely unrolled loops
1229 from the loop structures so we can continue unrolling now
1231 if (cleanup_tree_cfg ())
1232 update_ssa (TODO_update_ssa_only_virtuals
);
1234 /* Clean up the information about numbers of iterations, since
1235 complete unrolling might have invalidated it. */
1237 #ifdef ENABLE_CHECKING
1238 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1239 verify_loop_closed_ssa (true);
1242 if (loop_closed_ssa_invalidated
)
1243 BITMAP_FREE (loop_closed_ssa_invalidated
);
1246 && ++iteration
<= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS
));
1248 father_stack
.release ();
1250 if (irred_invalidated
1251 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1252 mark_irreducible_loops ();
1257 /* Canonical induction variable creation pass. */
1261 const pass_data pass_data_iv_canon
=
1263 GIMPLE_PASS
, /* type */
1264 "ivcanon", /* name */
1265 OPTGROUP_LOOP
, /* optinfo_flags */
1266 true, /* has_execute */
1267 TV_TREE_LOOP_IVCANON
, /* tv_id */
1268 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1269 0, /* properties_provided */
1270 0, /* properties_destroyed */
1271 0, /* todo_flags_start */
1272 0, /* todo_flags_finish */
1275 class pass_iv_canon
: public gimple_opt_pass
1278 pass_iv_canon (gcc::context
*ctxt
)
1279 : gimple_opt_pass (pass_data_iv_canon
, ctxt
)
1282 /* opt_pass methods: */
1283 virtual bool gate (function
*) { return flag_tree_loop_ivcanon
!= 0; }
1284 virtual unsigned int execute (function
*fun
);
1286 }; // class pass_iv_canon
1289 pass_iv_canon::execute (function
*fun
)
1291 if (number_of_loops (fun
) <= 1)
1294 return canonicalize_induction_variables ();
1300 make_pass_iv_canon (gcc::context
*ctxt
)
1302 return new pass_iv_canon (ctxt
);
1305 /* Complete unrolling of loops. */
1309 const pass_data pass_data_complete_unroll
=
1311 GIMPLE_PASS
, /* type */
1312 "cunroll", /* name */
1313 OPTGROUP_LOOP
, /* optinfo_flags */
1314 true, /* has_execute */
1315 TV_COMPLETE_UNROLL
, /* tv_id */
1316 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1317 0, /* properties_provided */
1318 0, /* properties_destroyed */
1319 0, /* todo_flags_start */
1320 0, /* todo_flags_finish */
1323 class pass_complete_unroll
: public gimple_opt_pass
1326 pass_complete_unroll (gcc::context
*ctxt
)
1327 : gimple_opt_pass (pass_data_complete_unroll
, ctxt
)
1330 /* opt_pass methods: */
1331 virtual unsigned int execute (function
*);
1333 }; // class pass_complete_unroll
1336 pass_complete_unroll::execute (function
*fun
)
1338 if (number_of_loops (fun
) <= 1)
1341 return tree_unroll_loops_completely (flag_unroll_loops
1343 || optimize
>= 3, true);
1349 make_pass_complete_unroll (gcc::context
*ctxt
)
1351 return new pass_complete_unroll (ctxt
);
1354 /* Complete unrolling of inner loops. */
1358 const pass_data pass_data_complete_unrolli
=
1360 GIMPLE_PASS
, /* type */
1361 "cunrolli", /* name */
1362 OPTGROUP_LOOP
, /* optinfo_flags */
1363 true, /* has_execute */
1364 TV_COMPLETE_UNROLL
, /* tv_id */
1365 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1366 0, /* properties_provided */
1367 0, /* properties_destroyed */
1368 0, /* todo_flags_start */
1369 TODO_verify_flow
, /* todo_flags_finish */
1372 class pass_complete_unrolli
: public gimple_opt_pass
1375 pass_complete_unrolli (gcc::context
*ctxt
)
1376 : gimple_opt_pass (pass_data_complete_unrolli
, ctxt
)
1379 /* opt_pass methods: */
1380 virtual bool gate (function
*) { return optimize
>= 2; }
1381 virtual unsigned int execute (function
*);
1383 }; // class pass_complete_unrolli
1386 pass_complete_unrolli::execute (function
*fun
)
1390 loop_optimizer_init (LOOPS_NORMAL
1391 | LOOPS_HAVE_RECORDED_EXITS
);
1392 if (number_of_loops (fun
) > 1)
1395 ret
= tree_unroll_loops_completely (optimize
>= 3, false);
1396 free_numbers_of_iterations_estimates ();
1399 loop_optimizer_finalize ();
1407 make_pass_complete_unrolli (gcc::context
*ctxt
)
1409 return new pass_complete_unrolli (ctxt
);