1 /* Induction variable canonicalization and loop peeling.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass detects the loops that iterate a constant number of times,
21 adds a canonical induction variable (step -1, tested against 0)
22 and replaces the exit test. This enables the less powerful rtl
23 level analysis to use this information.
25 This might spoil the code in some cases (by increasing register pressure).
26 Note that in the case the new variable is not needed, ivopts will get rid
27 of it, so it might only be a problem when there are no other linear induction
28 variables. In that case the created optimization possibilities are likely
31 Additionally in case we detect that it is beneficial to unroll the
32 loop completely, we do it right here to expose the optimization
33 possibilities to the following passes. */
37 #include "coretypes.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-ssa-alias.h"
44 #include "internal-fn.h"
45 #include "gimple-fold.h"
47 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop-niter.h"
60 #include "tree-ssa-loop.h"
61 #include "tree-into-ssa.h"
63 #include "tree-pass.h"
64 #include "tree-chrec.h"
65 #include "tree-scalar-evolution.h"
68 #include "tree-inline.h"
70 #include "tree-cfgcleanup.h"
73 /* Specifies types of loops that may be unrolled. */
77 UL_SINGLE_ITER
, /* Only loops that exit immediately in the first
79 UL_NO_GROWTH
, /* Only loops whose unrolling will not cause increase
81 UL_ALL
/* All suitable loops. */
84 /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
85 is the exit edge whose condition is replaced. */
88 create_canonical_iv (struct loop
*loop
, edge exit
, tree niter
)
93 gimple_stmt_iterator incr_at
;
96 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
98 fprintf (dump_file
, "Added canonical iv to loop %d, ", loop
->num
);
99 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
100 fprintf (dump_file
, " iterations.\n");
103 cond
= last_stmt (exit
->src
);
104 in
= EDGE_SUCC (exit
->src
, 0);
106 in
= EDGE_SUCC (exit
->src
, 1);
108 /* Note that we do not need to worry about overflows, since
109 type of niter is always unsigned and all comparisons are
110 just for equality/nonequality -- i.e. everything works
111 with a modulo arithmetics. */
113 type
= TREE_TYPE (niter
);
114 niter
= fold_build2 (PLUS_EXPR
, type
,
116 build_int_cst (type
, 1));
117 incr_at
= gsi_last_bb (in
->src
);
119 build_int_cst (type
, -1),
121 &incr_at
, false, NULL
, &var
);
123 cmp
= (exit
->flags
& EDGE_TRUE_VALUE
) ? EQ_EXPR
: NE_EXPR
;
124 gimple_cond_set_code (cond
, cmp
);
125 gimple_cond_set_lhs (cond
, var
);
126 gimple_cond_set_rhs (cond
, build_int_cst (type
, 0));
130 /* Describe size of loop as detected by tree_estimate_loop_size. */
133 /* Number of instructions in the loop. */
136 /* Number of instructions that will be likely optimized out in
137 peeled iterations of loop (i.e. computation based on induction
138 variable where induction variable starts at known constant.) */
139 int eliminated_by_peeling
;
141 /* Same statistics for last iteration of loop: it is smaller because
142 instructions after exit are not executed. */
144 int last_iteration_eliminated_by_peeling
;
146 /* If some IV computation will become constant. */
149 /* Number of call stmts that are not a builtin and are pure or const
150 present on the hot path. */
151 int num_pure_calls_on_hot_path
;
152 /* Number of call stmts that are not a builtin and are not pure nor const
153 present on the hot path. */
154 int num_non_pure_calls_on_hot_path
;
155 /* Number of statements other than calls in the loop. */
156 int non_call_stmts_on_hot_path
;
157 /* Number of branches seen on the hot path. */
158 int num_branches_on_hot_path
;
161 /* Return true if OP in STMT will be constant after peeling LOOP. */
164 constant_after_peeling (tree op
, gimple stmt
, struct loop
*loop
)
168 if (is_gimple_min_invariant (op
))
171 /* We can still fold accesses to constant arrays when index is known. */
172 if (TREE_CODE (op
) != SSA_NAME
)
176 /* First make fast look if we see constant array inside. */
177 while (handled_component_p (base
))
178 base
= TREE_OPERAND (base
, 0);
180 && ctor_for_folding (base
) != error_mark_node
)
181 || CONSTANT_CLASS_P (base
))
183 /* If so, see if we understand all the indices. */
185 while (handled_component_p (base
))
187 if (TREE_CODE (base
) == ARRAY_REF
188 && !constant_after_peeling (TREE_OPERAND (base
, 1), stmt
, loop
))
190 base
= TREE_OPERAND (base
, 0);
197 /* Induction variables are constants. */
198 if (!simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false))
200 if (!is_gimple_min_invariant (iv
.base
))
202 if (!is_gimple_min_invariant (iv
.step
))
207 /* Computes an estimated number of insns in LOOP.
208 EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
209 iteration of the loop.
210 EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
212 Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
213 Stop estimating after UPPER_BOUND is met. Return true in this case. */
216 tree_estimate_loop_size (struct loop
*loop
, edge exit
, edge edge_to_cancel
, struct loop_size
*size
,
219 basic_block
*body
= get_loop_body (loop
);
220 gimple_stmt_iterator gsi
;
223 vec
<basic_block
> path
= get_loop_hot_path (loop
);
226 size
->eliminated_by_peeling
= 0;
227 size
->last_iteration
= 0;
228 size
->last_iteration_eliminated_by_peeling
= 0;
229 size
->num_pure_calls_on_hot_path
= 0;
230 size
->num_non_pure_calls_on_hot_path
= 0;
231 size
->non_call_stmts_on_hot_path
= 0;
232 size
->num_branches_on_hot_path
= 0;
233 size
->constant_iv
= 0;
235 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
236 fprintf (dump_file
, "Estimating sizes for loop %i\n", loop
->num
);
237 for (i
= 0; i
< loop
->num_nodes
; i
++)
239 if (edge_to_cancel
&& body
[i
] != edge_to_cancel
->src
240 && dominated_by_p (CDI_DOMINATORS
, body
[i
], edge_to_cancel
->src
))
244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
245 fprintf (dump_file
, " BB: %i, after_exit: %i\n", body
[i
]->index
, after_exit
);
247 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
249 gimple stmt
= gsi_stmt (gsi
);
250 int num
= estimate_num_insns (stmt
, &eni_size_weights
);
251 bool likely_eliminated
= false;
252 bool likely_eliminated_last
= false;
253 bool likely_eliminated_peeled
= false;
255 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
257 fprintf (dump_file
, " size: %3i ", num
);
258 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, 0);
261 /* Look for reasons why we might optimize this stmt away. */
263 if (gimple_has_side_effects (stmt
))
265 /* Exit conditional. */
266 else if (exit
&& body
[i
] == exit
->src
267 && stmt
== last_stmt (exit
->src
))
269 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
270 fprintf (dump_file
, " Exit condition will be eliminated "
271 "in peeled copies.\n");
272 likely_eliminated_peeled
= true;
274 else if (edge_to_cancel
&& body
[i
] == edge_to_cancel
->src
275 && stmt
== last_stmt (edge_to_cancel
->src
))
277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
278 fprintf (dump_file
, " Exit condition will be eliminated "
280 likely_eliminated_last
= true;
282 /* Sets of IV variables */
283 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
284 && constant_after_peeling (gimple_assign_lhs (stmt
), stmt
, loop
))
286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
287 fprintf (dump_file
, " Induction variable computation will"
288 " be folded away.\n");
289 likely_eliminated
= true;
291 /* Assignments of IV variables. */
292 else if (gimple_code (stmt
) == GIMPLE_ASSIGN
293 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
294 && constant_after_peeling (gimple_assign_rhs1 (stmt
), stmt
, loop
)
295 && (gimple_assign_rhs_class (stmt
) != GIMPLE_BINARY_RHS
296 || constant_after_peeling (gimple_assign_rhs2 (stmt
),
299 size
->constant_iv
= true;
300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
301 fprintf (dump_file
, " Constant expression will be folded away.\n");
302 likely_eliminated
= true;
305 else if ((gimple_code (stmt
) == GIMPLE_COND
306 && constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
307 && constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
))
308 || (gimple_code (stmt
) == GIMPLE_SWITCH
309 && constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
312 fprintf (dump_file
, " Constant conditional.\n");
313 likely_eliminated
= true;
316 size
->overall
+= num
;
317 if (likely_eliminated
|| likely_eliminated_peeled
)
318 size
->eliminated_by_peeling
+= num
;
321 size
->last_iteration
+= num
;
322 if (likely_eliminated
|| likely_eliminated_last
)
323 size
->last_iteration_eliminated_by_peeling
+= num
;
325 if ((size
->overall
* 3 / 2 - size
->eliminated_by_peeling
326 - size
->last_iteration_eliminated_by_peeling
) > upper_bound
)
334 while (path
.length ())
336 basic_block bb
= path
.pop ();
337 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
339 gimple stmt
= gsi_stmt (gsi
);
340 if (gimple_code (stmt
) == GIMPLE_CALL
)
342 int flags
= gimple_call_flags (stmt
);
343 tree decl
= gimple_call_fndecl (stmt
);
345 if (decl
&& DECL_IS_BUILTIN (decl
)
346 && is_inexpensive_builtin (decl
))
348 else if (flags
& (ECF_PURE
| ECF_CONST
))
349 size
->num_pure_calls_on_hot_path
++;
351 size
->num_non_pure_calls_on_hot_path
++;
352 size
->num_branches_on_hot_path
++;
354 else if (gimple_code (stmt
) != GIMPLE_CALL
355 && gimple_code (stmt
) != GIMPLE_DEBUG
)
356 size
->non_call_stmts_on_hot_path
++;
357 if (((gimple_code (stmt
) == GIMPLE_COND
358 && (!constant_after_peeling (gimple_cond_lhs (stmt
), stmt
, loop
)
359 || constant_after_peeling (gimple_cond_rhs (stmt
), stmt
, loop
)))
360 || (gimple_code (stmt
) == GIMPLE_SWITCH
361 && !constant_after_peeling (gimple_switch_index (stmt
), stmt
, loop
)))
362 && (!exit
|| bb
!= exit
->src
))
363 size
->num_branches_on_hot_path
++;
367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
368 fprintf (dump_file
, "size: %i-%i, last_iteration: %i-%i\n", size
->overall
,
369 size
->eliminated_by_peeling
, size
->last_iteration
,
370 size
->last_iteration_eliminated_by_peeling
);
376 /* Estimate number of insns of completely unrolled loop.
377 It is (NUNROLL + 1) * size of loop body with taking into account
378 the fact that in last copy everything after exit conditional
379 is dead and that some instructions will be eliminated after
382 Loop body is likely going to simplify further, this is difficult
383 to guess, we just decrease the result by 1/3. */
385 static unsigned HOST_WIDE_INT
386 estimated_unrolled_size (struct loop_size
*size
,
387 unsigned HOST_WIDE_INT nunroll
)
389 HOST_WIDE_INT unr_insns
= ((nunroll
)
390 * (HOST_WIDE_INT
) (size
->overall
391 - size
->eliminated_by_peeling
));
394 unr_insns
+= size
->last_iteration
- size
->last_iteration_eliminated_by_peeling
;
396 unr_insns
= unr_insns
* 2 / 3;
403 /* Loop LOOP is known to not loop. See if there is an edge in the loop
404 body that can be remove to make the loop to always exit and at
405 the same time it does not make any code potentially executed
406 during the last iteration dead.
408 After complette unrolling we still may get rid of the conditional
409 on the exit in the last copy even if we have no idea what it does.
410 This is quite common case for loops of form
416 Here we prove the loop to iterate 5 times but we do not know
417 it from induction variable.
419 For now we handle only simple case where there is exit condition
420 just before the latch block and the latch block contains no statements
421 with side effect that may otherwise terminate the execution of loop
422 (such as by EH or by terminating the program or longjmp).
424 In the general case we may want to cancel the paths leading to statements
425 loop-niter identified as having undefined effect in the last iteration.
426 The other cases are hopefully rare and will be cleaned up later. */
429 loop_edge_to_cancel (struct loop
*loop
)
434 gimple_stmt_iterator gsi
;
436 /* We want only one predecestor of the loop. */
437 if (EDGE_COUNT (loop
->latch
->preds
) > 1)
440 exits
= get_loop_exit_edges (loop
);
442 FOR_EACH_VEC_ELT (exits
, i
, edge_to_cancel
)
444 /* Find the other edge than the loop exit
445 leaving the conditoinal. */
446 if (EDGE_COUNT (edge_to_cancel
->src
->succs
) != 2)
448 if (EDGE_SUCC (edge_to_cancel
->src
, 0) == edge_to_cancel
)
449 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 1);
451 edge_to_cancel
= EDGE_SUCC (edge_to_cancel
->src
, 0);
453 /* We only can handle conditionals. */
454 if (!(edge_to_cancel
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
457 /* We should never have conditionals in the loop latch. */
458 gcc_assert (edge_to_cancel
->dest
!= loop
->header
);
460 /* Check that it leads to loop latch. */
461 if (edge_to_cancel
->dest
!= loop
->latch
)
466 /* Verify that the code in loop latch does nothing that may end program
467 execution without really reaching the exit. This may include
468 non-pure/const function calls, EH statements, volatile ASMs etc. */
469 for (gsi
= gsi_start_bb (loop
->latch
); !gsi_end_p (gsi
); gsi_next (&gsi
))
470 if (gimple_has_side_effects (gsi_stmt (gsi
)))
472 return edge_to_cancel
;
478 /* Remove all tests for exits that are known to be taken after LOOP was
479 peeled NPEELED times. Put gcc_unreachable before every statement
480 known to not be executed. */
483 remove_exits_and_undefined_stmts (struct loop
*loop
, unsigned int npeeled
)
485 struct nb_iter_bound
*elt
;
486 bool changed
= false;
488 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
490 /* If statement is known to be undefined after peeling, turn it
491 into unreachable (or trap when debugging experience is supposed
494 && wi::ltu_p (elt
->bound
, npeeled
))
496 gimple_stmt_iterator gsi
= gsi_for_stmt (elt
->stmt
);
497 gimple stmt
= gimple_build_call
498 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
500 gimple_set_location (stmt
, gimple_location (elt
->stmt
));
501 gsi_insert_before (&gsi
, stmt
, GSI_NEW_STMT
);
503 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
505 fprintf (dump_file
, "Forced statement unreachable: ");
506 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
509 /* If we know the exit will be taken after peeling, update. */
510 else if (elt
->is_exit
511 && wi::leu_p (elt
->bound
, npeeled
))
513 basic_block bb
= gimple_bb (elt
->stmt
);
514 edge exit_edge
= EDGE_SUCC (bb
, 0);
516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
518 fprintf (dump_file
, "Forced exit to be taken: ");
519 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
521 if (!loop_exit_edge_p (loop
, exit_edge
))
522 exit_edge
= EDGE_SUCC (bb
, 1);
523 gcc_checking_assert (loop_exit_edge_p (loop
, exit_edge
));
524 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
525 gimple_cond_make_true (elt
->stmt
);
527 gimple_cond_make_false (elt
->stmt
);
528 update_stmt (elt
->stmt
);
535 /* Remove all exits that are known to be never taken because of the loop bound
539 remove_redundant_iv_tests (struct loop
*loop
)
541 struct nb_iter_bound
*elt
;
542 bool changed
= false;
544 if (!loop
->any_upper_bound
)
546 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
548 /* Exit is pointless if it won't be taken before loop reaches
550 if (elt
->is_exit
&& loop
->any_upper_bound
551 && wi::ltu_p (loop
->nb_iterations_upper_bound
, elt
->bound
))
553 basic_block bb
= gimple_bb (elt
->stmt
);
554 edge exit_edge
= EDGE_SUCC (bb
, 0);
555 struct tree_niter_desc niter
;
557 if (!loop_exit_edge_p (loop
, exit_edge
))
558 exit_edge
= EDGE_SUCC (bb
, 1);
560 /* Only when we know the actual number of iterations, not
561 just a bound, we can remove the exit. */
562 if (!number_of_iterations_exit (loop
, exit_edge
,
563 &niter
, false, false)
564 || !integer_onep (niter
.assumptions
)
565 || !integer_zerop (niter
.may_be_zero
)
567 || TREE_CODE (niter
.niter
) != INTEGER_CST
568 || !wi::ltu_p (loop
->nb_iterations_upper_bound
,
569 wi::to_widest (niter
.niter
)))
572 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
574 fprintf (dump_file
, "Removed pointless exit: ");
575 print_gimple_stmt (dump_file
, elt
->stmt
, 0, 0);
577 if (exit_edge
->flags
& EDGE_TRUE_VALUE
)
578 gimple_cond_make_false (elt
->stmt
);
580 gimple_cond_make_true (elt
->stmt
);
581 update_stmt (elt
->stmt
);
588 /* Stores loops that will be unlooped after we process whole loop tree. */
589 static vec
<loop_p
> loops_to_unloop
;
590 static vec
<int> loops_to_unloop_nunroll
;
592 /* Cancel all fully unrolled loops by putting __builtin_unreachable
594 We do it after all unrolling since unlooping moves basic blocks
595 across loop boundaries trashing loop closed SSA form as well
596 as SCEV info needed to be intact during unrolling.
598 IRRED_INVALIDATED is used to bookkeep if information about
599 irreducible regions may become invalid as a result
600 of the transformation.
601 LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
602 when we need to go into loop closed SSA form. */
605 unloop_loops (bitmap loop_closed_ssa_invalidated
,
606 bool *irred_invalidated
)
608 while (loops_to_unloop
.length ())
610 struct loop
*loop
= loops_to_unloop
.pop ();
611 int n_unroll
= loops_to_unloop_nunroll
.pop ();
612 basic_block latch
= loop
->latch
;
613 edge latch_edge
= loop_latch_edge (loop
);
614 int flags
= latch_edge
->flags
;
615 location_t locus
= latch_edge
->goto_locus
;
617 gimple_stmt_iterator gsi
;
619 remove_exits_and_undefined_stmts (loop
, n_unroll
);
621 /* Unloop destroys the latch edge. */
622 unloop (loop
, irred_invalidated
, loop_closed_ssa_invalidated
);
624 /* Create new basic block for the latch edge destination and wire
626 stmt
= gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE
), 0);
627 latch_edge
= make_edge (latch
, create_basic_block (NULL
, NULL
, latch
), flags
);
628 latch_edge
->probability
= 0;
629 latch_edge
->count
= 0;
630 latch_edge
->flags
|= flags
;
631 latch_edge
->goto_locus
= locus
;
633 latch_edge
->dest
->loop_father
= current_loops
->tree_root
;
634 latch_edge
->dest
->count
= 0;
635 latch_edge
->dest
->frequency
= 0;
636 set_immediate_dominator (CDI_DOMINATORS
, latch_edge
->dest
, latch_edge
->src
);
638 gsi
= gsi_start_bb (latch_edge
->dest
);
639 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
641 loops_to_unloop
.release ();
642 loops_to_unloop_nunroll
.release ();
645 /* Tries to unroll LOOP completely, i.e. NITER times.
646 UL determines which loops we are allowed to unroll.
647 EXIT is the exit of the loop that should be eliminated.
648 MAXITER specfy bound on number of iterations, -1 if it is
649 not known or too large for HOST_WIDE_INT. The location
650 LOCUS corresponding to the loop is used when emitting
651 a summary of the unroll to the dump file. */
654 try_unroll_loop_completely (struct loop
*loop
,
655 edge exit
, tree niter
,
656 enum unroll_level ul
,
657 HOST_WIDE_INT maxiter
,
660 unsigned HOST_WIDE_INT n_unroll
, ninsns
, max_unroll
, unr_insns
;
662 struct loop_size size
;
663 bool n_unroll_found
= false;
664 edge edge_to_cancel
= NULL
;
666 /* See if we proved number of iterations to be low constant.
668 EXIT is an edge that will be removed in all but last iteration of
671 EDGE_TO_CACNEL is an edge that will be removed from the last iteration
672 of the unrolled sequence and is expected to make the final loop not
675 If the number of execution of loop is determined by standard induction
676 variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
678 if (tree_fits_uhwi_p (niter
))
680 n_unroll
= tree_to_uhwi (niter
);
681 n_unroll_found
= true;
682 edge_to_cancel
= EDGE_SUCC (exit
->src
, 0);
683 if (edge_to_cancel
== exit
)
684 edge_to_cancel
= EDGE_SUCC (exit
->src
, 1);
686 /* We do not know the number of iterations and thus we can not eliminate
691 /* See if we can improve our estimate by using recorded loop bounds. */
693 && (!n_unroll_found
|| (unsigned HOST_WIDE_INT
)maxiter
< n_unroll
))
696 n_unroll_found
= true;
697 /* Loop terminates before the IV variable test, so we can not
698 remove it in the last iteration. */
699 edge_to_cancel
= NULL
;
705 max_unroll
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
);
706 if (n_unroll
> max_unroll
)
710 edge_to_cancel
= loop_edge_to_cancel (loop
);
718 vec
<edge
> to_remove
= vNULL
;
719 if (ul
== UL_SINGLE_ITER
)
722 large
= tree_estimate_loop_size
723 (loop
, exit
, edge_to_cancel
, &size
,
724 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
));
725 ninsns
= size
.overall
;
728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
729 fprintf (dump_file
, "Not unrolling loop %d: it is too large.\n",
734 unr_insns
= estimated_unrolled_size (&size
, n_unroll
);
735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
737 fprintf (dump_file
, " Loop size: %d\n", (int) ninsns
);
738 fprintf (dump_file
, " Estimated size after unrolling: %d\n",
742 /* If the code is going to shrink, we don't need to be extra cautious
743 on guessing if the unrolling is going to be profitable. */
745 /* If there is IV variable that will become constant, we save
746 one instruction in the loop prologue we do not account
748 <= ninsns
+ (size
.constant_iv
!= false))
750 /* We unroll only inner loops, because we do not consider it profitable
751 otheriwse. We still can cancel loopback edge of not rolling loop;
752 this is always a good idea. */
753 else if (ul
== UL_NO_GROWTH
)
755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
756 fprintf (dump_file
, "Not unrolling loop %d: size would grow.\n",
760 /* Outer loops tend to be less interesting candidates for complette
761 unrolling unless we can do a lot of propagation into the inner loop
762 body. For now we disable outer loop unrolling when the code would
764 else if (loop
->inner
)
766 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
767 fprintf (dump_file
, "Not unrolling loop %d: "
768 "it is not innermost and code would grow.\n",
772 /* If there is call on a hot path through the loop, then
773 there is most probably not much to optimize. */
774 else if (size
.num_non_pure_calls_on_hot_path
)
776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
777 fprintf (dump_file
, "Not unrolling loop %d: "
778 "contains call and code would grow.\n",
782 /* If there is pure/const call in the function, then we
783 can still optimize the unrolled loop body if it contains
784 some other interesting code than the calls and code
785 storing or cumulating the return value. */
786 else if (size
.num_pure_calls_on_hot_path
787 /* One IV increment, one test, one ivtmp store
788 and one useful stmt. That is about minimal loop
790 && (size
.non_call_stmts_on_hot_path
791 <= 3 + size
.num_pure_calls_on_hot_path
))
793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
794 fprintf (dump_file
, "Not unrolling loop %d: "
795 "contains just pure calls and code would grow.\n",
799 /* Complette unrolling is major win when control flow is removed and
800 one big basic block is created. If the loop contains control flow
801 the optimization may still be a win because of eliminating the loop
802 overhead but it also may blow the branch predictor tables.
803 Limit number of branches on the hot path through the peeled
805 else if (size
.num_branches_on_hot_path
* (int)n_unroll
806 > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES
))
808 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
809 fprintf (dump_file
, "Not unrolling loop %d: "
810 " number of branches on hot path in the unrolled sequence"
811 " reach --param max-peel-branches limit.\n",
816 > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
))
818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
819 fprintf (dump_file
, "Not unrolling loop %d: "
820 "(--param max-completely-peeled-insns limit reached).\n",
825 initialize_original_copy_tables ();
826 wont_exit
= sbitmap_alloc (n_unroll
+ 1);
827 bitmap_ones (wont_exit
);
828 bitmap_clear_bit (wont_exit
, 0);
830 if (!gimple_duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
833 DLTHE_FLAG_UPDATE_FREQ
834 | DLTHE_FLAG_COMPLETTE_PEEL
))
836 free_original_copy_tables ();
838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
839 fprintf (dump_file
, "Failed to duplicate the loop\n");
843 FOR_EACH_VEC_ELT (to_remove
, i
, e
)
845 bool ok
= remove_path (e
);
849 to_remove
.release ();
851 free_original_copy_tables ();
855 /* Remove the conditional from the last copy of the loop. */
858 cond
= last_stmt (edge_to_cancel
->src
);
859 if (edge_to_cancel
->flags
& EDGE_TRUE_VALUE
)
860 gimple_cond_make_false (cond
);
862 gimple_cond_make_true (cond
);
864 /* Do not remove the path. Doing so may remove outer loop
865 and confuse bookkeeping code in tree_unroll_loops_completelly. */
868 /* Store the loop for later unlooping and exit removal. */
869 loops_to_unloop
.safe_push (loop
);
870 loops_to_unloop_nunroll
.safe_push (n_unroll
);
872 if (dump_enabled_p ())
875 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
876 "loop turned into non-loop; it never loops\n");
879 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, locus
,
880 "loop with %d iterations completely unrolled",
881 (int) (n_unroll
+ 1));
883 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
,
884 " (header execution count %d)",
885 (int)loop
->header
->count
);
886 dump_printf (MSG_OPTIMIZED_LOCATIONS
| TDF_DETAILS
, "\n");
890 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
893 fprintf (dump_file
, "Exit condition of peeled iterations was "
896 fprintf (dump_file
, "Last iteration exit edge was proved true.\n");
898 fprintf (dump_file
, "Latch of last iteration was marked by "
899 "__builtin_unreachable ().\n");
905 /* Adds a canonical induction variable to LOOP if suitable.
906 CREATE_IV is true if we may create a new iv. UL determines
907 which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
908 to determine the number of iterations of a loop by direct evaluation.
909 Returns true if cfg is changed. */
912 canonicalize_loop_induction_variables (struct loop
*loop
,
913 bool create_iv
, enum unroll_level ul
,
918 HOST_WIDE_INT maxiter
;
919 bool modified
= false;
920 location_t locus
= UNKNOWN_LOCATION
;
922 niter
= number_of_latch_executions (loop
);
923 exit
= single_exit (loop
);
924 if (TREE_CODE (niter
) == INTEGER_CST
)
925 locus
= gimple_location (last_stmt (exit
->src
));
928 /* If the loop has more than one exit, try checking all of them
929 for # of iterations determinable through scev. */
931 niter
= find_loop_niter (loop
, &exit
);
933 /* Finally if everything else fails, try brute force evaluation. */
935 && (chrec_contains_undetermined (niter
)
936 || TREE_CODE (niter
) != INTEGER_CST
))
937 niter
= find_loop_niter_by_eval (loop
, &exit
);
940 locus
= gimple_location (last_stmt (exit
->src
));
942 if (TREE_CODE (niter
) != INTEGER_CST
)
946 /* We work exceptionally hard here to estimate the bound
947 by find_loop_niter_by_eval. Be sure to keep it for future. */
948 if (niter
&& TREE_CODE (niter
) == INTEGER_CST
)
950 record_niter_bound (loop
, wi::to_widest (niter
),
951 exit
== single_likely_exit (loop
), true);
954 /* Force re-computation of loop bounds so we can remove redundant exits. */
955 maxiter
= max_loop_iterations_int (loop
);
957 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
958 && TREE_CODE (niter
) == INTEGER_CST
)
960 fprintf (dump_file
, "Loop %d iterates ", loop
->num
);
961 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
962 fprintf (dump_file
, " times.\n");
964 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
967 fprintf (dump_file
, "Loop %d iterates at most %i times.\n", loop
->num
,
971 /* Remove exits that are known to be never taken based on loop bound.
972 Needs to be called after compilation of max_loop_iterations_int that
973 populates the loop bounds. */
974 modified
|= remove_redundant_iv_tests (loop
);
976 if (try_unroll_loop_completely (loop
, exit
, niter
, ul
, maxiter
, locus
))
980 && niter
&& !chrec_contains_undetermined (niter
)
981 && exit
&& just_once_each_iteration_p (loop
, exit
->src
))
982 create_canonical_iv (loop
, exit
, niter
);
987 /* The main entry point of the pass. Adds canonical induction variables
988 to the suitable loops. */
991 canonicalize_induction_variables (void)
994 bool changed
= false;
995 bool irred_invalidated
= false;
996 bitmap loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
998 free_numbers_of_iterations_estimates ();
999 estimate_numbers_of_iterations ();
1001 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
1003 changed
|= canonicalize_loop_induction_variables (loop
,
1004 true, UL_SINGLE_ITER
,
1007 gcc_assert (!need_ssa_update_p (cfun
));
1009 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1010 if (irred_invalidated
1011 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1012 mark_irreducible_loops ();
1014 /* Clean up the information about numbers of iterations, since brute force
1015 evaluation could reveal new information. */
1018 if (!bitmap_empty_p (loop_closed_ssa_invalidated
))
1020 gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA
));
1021 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
1023 BITMAP_FREE (loop_closed_ssa_invalidated
);
1026 return TODO_cleanup_cfg
;
1030 /* Propagate VAL into all uses of SSA_NAME. */
1033 propagate_into_all_uses (tree ssa_name
, tree val
)
1035 imm_use_iterator iter
;
1038 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, ssa_name
)
1040 gimple_stmt_iterator use_stmt_gsi
= gsi_for_stmt (use_stmt
);
1043 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1046 if (is_gimple_assign (use_stmt
)
1047 && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt
))
1048 == GIMPLE_SINGLE_RHS
)
1050 tree rhs
= gimple_assign_rhs1 (use_stmt
);
1052 if (TREE_CODE (rhs
) == ADDR_EXPR
)
1053 recompute_tree_invariant_for_addr_expr (rhs
);
1056 fold_stmt_inplace (&use_stmt_gsi
);
1057 update_stmt (use_stmt
);
1058 maybe_clean_or_replace_eh_stmt (use_stmt
, use_stmt
);
1062 /* Propagate constant SSA_NAMEs defined in basic block BB. */
1065 propagate_constants_for_unrolling (basic_block bb
)
1067 gimple_stmt_iterator gsi
;
1069 /* Look for degenerate PHI nodes with constant argument. */
1070 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); )
1072 gimple phi
= gsi_stmt (gsi
);
1073 tree result
= gimple_phi_result (phi
);
1074 tree arg
= gimple_phi_arg_def (phi
, 0);
1076 if (gimple_phi_num_args (phi
) == 1 && TREE_CODE (arg
) == INTEGER_CST
)
1078 propagate_into_all_uses (result
, arg
);
1079 gsi_remove (&gsi
, true);
1080 release_ssa_name (result
);
1086 /* Look for assignments to SSA names with constant RHS. */
1087 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); )
1089 gimple stmt
= gsi_stmt (gsi
);
1092 if (is_gimple_assign (stmt
)
1093 && gimple_assign_rhs_code (stmt
) == INTEGER_CST
1094 && (lhs
= gimple_assign_lhs (stmt
), TREE_CODE (lhs
) == SSA_NAME
)
1095 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
))
1097 propagate_into_all_uses (lhs
, gimple_assign_rhs1 (stmt
));
1098 gsi_remove (&gsi
, true);
1099 release_ssa_name (lhs
);
1106 /* Process loops from innermost to outer, stopping at the innermost
1107 loop we unrolled. */
1110 tree_unroll_loops_completely_1 (bool may_increase_size
, bool unroll_outer
,
1111 vec
<loop_p
, va_heap
>& father_stack
,
1114 struct loop
*loop_father
;
1115 bool changed
= false;
1117 enum unroll_level ul
;
1119 /* Process inner loops first. */
1120 for (inner
= loop
->inner
; inner
!= NULL
; inner
= inner
->next
)
1121 changed
|= tree_unroll_loops_completely_1 (may_increase_size
,
1122 unroll_outer
, father_stack
,
1125 /* If we changed an inner loop we cannot process outer loops in this
1126 iteration because SSA form is not up-to-date. Continue with
1127 siblings of outer loops instead. */
1131 /* Don't unroll #pragma omp simd loops until the vectorizer
1132 attempts to vectorize those. */
1133 if (loop
->force_vectorize
)
1136 /* Try to unroll this loop. */
1137 loop_father
= loop_outer (loop
);
1141 if (may_increase_size
&& optimize_loop_nest_for_speed_p (loop
)
1142 /* Unroll outermost loops only if asked to do so or they do
1143 not cause code growth. */
1144 && (unroll_outer
|| loop_outer (loop_father
)))
1149 if (canonicalize_loop_induction_variables
1150 (loop
, false, ul
, !flag_tree_loop_ivcanon
))
1152 /* If we'll continue unrolling, we need to propagate constants
1153 within the new basic blocks to fold away induction variable
1154 computations; otherwise, the size might blow up before the
1155 iteration is complete and the IR eventually cleaned up. */
1156 if (loop_outer (loop_father
) && !loop_father
->aux
)
1158 father_stack
.safe_push (loop_father
);
1159 loop_father
->aux
= loop_father
;
1168 /* Unroll LOOPS completely if they iterate just few times. Unless
1169 MAY_INCREASE_SIZE is true, perform the unrolling only if the
1170 size of the code does not increase. */
1173 tree_unroll_loops_completely (bool may_increase_size
, bool unroll_outer
)
1175 auto_vec
<loop_p
, 16> father_stack
;
1178 bool irred_invalidated
= false;
1183 bitmap loop_closed_ssa_invalidated
= NULL
;
1185 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1186 loop_closed_ssa_invalidated
= BITMAP_ALLOC (NULL
);
1188 free_numbers_of_iterations_estimates ();
1189 estimate_numbers_of_iterations ();
1191 changed
= tree_unroll_loops_completely_1 (may_increase_size
,
1192 unroll_outer
, father_stack
,
1193 current_loops
->tree_root
);
1199 /* Be sure to skip unlooped loops while procesing father_stack
1201 FOR_EACH_VEC_ELT (loops_to_unloop
, i
, iter
)
1202 (*iter
)->aux
= NULL
;
1203 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1206 unloop_loops (loop_closed_ssa_invalidated
, &irred_invalidated
);
1208 /* We can not use TODO_update_ssa_no_phi because VOPS gets confused. */
1209 if (loop_closed_ssa_invalidated
1210 && !bitmap_empty_p (loop_closed_ssa_invalidated
))
1211 rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated
,
1214 update_ssa (TODO_update_ssa
);
1216 /* Propagate the constants within the new basic blocks. */
1217 FOR_EACH_VEC_ELT (father_stack
, i
, iter
)
1221 basic_block
*body
= get_loop_body_in_dom_order (*iter
);
1222 for (j
= 0; j
< (*iter
)->num_nodes
; j
++)
1223 propagate_constants_for_unrolling (body
[j
]);
1225 (*iter
)->aux
= NULL
;
1227 father_stack
.truncate (0);
1229 /* This will take care of removing completely unrolled loops
1230 from the loop structures so we can continue unrolling now
1232 if (cleanup_tree_cfg ())
1233 update_ssa (TODO_update_ssa_only_virtuals
);
1235 /* Clean up the information about numbers of iterations, since
1236 complete unrolling might have invalidated it. */
1238 #ifdef ENABLE_CHECKING
1239 if (loops_state_satisfies_p (LOOP_CLOSED_SSA
))
1240 verify_loop_closed_ssa (true);
1243 if (loop_closed_ssa_invalidated
)
1244 BITMAP_FREE (loop_closed_ssa_invalidated
);
1247 && ++iteration
<= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS
));
1249 father_stack
.release ();
1251 if (irred_invalidated
1252 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
))
1253 mark_irreducible_loops ();
1258 /* Canonical induction variable creation pass. */
1262 const pass_data pass_data_iv_canon
=
1264 GIMPLE_PASS
, /* type */
1265 "ivcanon", /* name */
1266 OPTGROUP_LOOP
, /* optinfo_flags */
1267 true, /* has_execute */
1268 TV_TREE_LOOP_IVCANON
, /* tv_id */
1269 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1270 0, /* properties_provided */
1271 0, /* properties_destroyed */
1272 0, /* todo_flags_start */
1273 0, /* todo_flags_finish */
1276 class pass_iv_canon
: public gimple_opt_pass
1279 pass_iv_canon (gcc::context
*ctxt
)
1280 : gimple_opt_pass (pass_data_iv_canon
, ctxt
)
1283 /* opt_pass methods: */
1284 virtual bool gate (function
*) { return flag_tree_loop_ivcanon
!= 0; }
1285 virtual unsigned int execute (function
*fun
);
1287 }; // class pass_iv_canon
1290 pass_iv_canon::execute (function
*fun
)
1292 if (number_of_loops (fun
) <= 1)
1295 return canonicalize_induction_variables ();
1301 make_pass_iv_canon (gcc::context
*ctxt
)
1303 return new pass_iv_canon (ctxt
);
1306 /* Complete unrolling of loops. */
1310 const pass_data pass_data_complete_unroll
=
1312 GIMPLE_PASS
, /* type */
1313 "cunroll", /* name */
1314 OPTGROUP_LOOP
, /* optinfo_flags */
1315 true, /* has_execute */
1316 TV_COMPLETE_UNROLL
, /* tv_id */
1317 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1318 0, /* properties_provided */
1319 0, /* properties_destroyed */
1320 0, /* todo_flags_start */
1321 0, /* todo_flags_finish */
1324 class pass_complete_unroll
: public gimple_opt_pass
1327 pass_complete_unroll (gcc::context
*ctxt
)
1328 : gimple_opt_pass (pass_data_complete_unroll
, ctxt
)
1331 /* opt_pass methods: */
1332 virtual unsigned int execute (function
*);
1334 }; // class pass_complete_unroll
1337 pass_complete_unroll::execute (function
*fun
)
1339 if (number_of_loops (fun
) <= 1)
1342 return tree_unroll_loops_completely (flag_unroll_loops
1344 || optimize
>= 3, true);
1350 make_pass_complete_unroll (gcc::context
*ctxt
)
1352 return new pass_complete_unroll (ctxt
);
1355 /* Complete unrolling of inner loops. */
1359 const pass_data pass_data_complete_unrolli
=
1361 GIMPLE_PASS
, /* type */
1362 "cunrolli", /* name */
1363 OPTGROUP_LOOP
, /* optinfo_flags */
1364 true, /* has_execute */
1365 TV_COMPLETE_UNROLL
, /* tv_id */
1366 ( PROP_cfg
| PROP_ssa
), /* properties_required */
1367 0, /* properties_provided */
1368 0, /* properties_destroyed */
1369 0, /* todo_flags_start */
1370 0, /* todo_flags_finish */
1373 class pass_complete_unrolli
: public gimple_opt_pass
1376 pass_complete_unrolli (gcc::context
*ctxt
)
1377 : gimple_opt_pass (pass_data_complete_unrolli
, ctxt
)
1380 /* opt_pass methods: */
1381 virtual bool gate (function
*) { return optimize
>= 2; }
1382 virtual unsigned int execute (function
*);
1384 }; // class pass_complete_unrolli
1387 pass_complete_unrolli::execute (function
*fun
)
1391 loop_optimizer_init (LOOPS_NORMAL
1392 | LOOPS_HAVE_RECORDED_EXITS
);
1393 if (number_of_loops (fun
) > 1)
1396 ret
= tree_unroll_loops_completely (optimize
>= 3, false);
1397 free_numbers_of_iterations_estimates ();
1400 loop_optimizer_finalize ();
1408 make_pass_complete_unrolli (gcc::context
*ctxt
)
1410 return new pass_complete_unrolli (ctxt
);