1 /* Loop unrolling and peeling.
2 Copyright (C) 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
29 #include "cfglayout.h"
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimalizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for futher passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
69 static void decide_unrolling_and_peeling
PARAMS ((struct loops
*, int));
70 static void peel_loops_completely
PARAMS ((struct loops
*, int));
71 static void decide_peel_simple
PARAMS ((struct loops
*, struct loop
*, int));
72 static void decide_peel_once_rolling
PARAMS ((struct loops
*, struct loop
*, int));
73 static void decide_peel_completely
PARAMS ((struct loops
*, struct loop
*, int));
74 static void decide_unroll_stupid
PARAMS ((struct loops
*, struct loop
*, int));
75 static void decide_unroll_constant_iterations
PARAMS ((struct loops
*, struct loop
*, int));
76 static void decide_unroll_runtime_iterations
PARAMS ((struct loops
*, struct loop
*, int));
77 static void peel_loop_simple
PARAMS ((struct loops
*, struct loop
*));
78 static void peel_loop_completely
PARAMS ((struct loops
*, struct loop
*));
79 static void unroll_loop_stupid
PARAMS ((struct loops
*, struct loop
*));
80 static void unroll_loop_constant_iterations
PARAMS ((struct loops
*,
82 static void unroll_loop_runtime_iterations
PARAMS ((struct loops
*,
85 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
87 unroll_and_peel_loops (loops
, flags
)
91 struct loop
*loop
, *next
;
94 /* First perform complete loop peeling (it is almost surely a win,
95 and affects parameters for further decision a lot). */
96 peel_loops_completely (loops
, flags
);
98 /* Now decide rest of unrolling and peeling. */
99 decide_unrolling_and_peeling (loops
, flags
);
101 loop
= loops
->tree_root
;
105 /* Scan the loops, inner ones first. */
106 while (loop
!= loops
->tree_root
)
118 /* And perform the appropriate transformations. */
119 switch (loop
->lpt_decision
.decision
)
121 case LPT_PEEL_COMPLETELY
:
124 case LPT_PEEL_SIMPLE
:
125 peel_loop_simple (loops
, loop
);
127 case LPT_UNROLL_CONSTANT
:
128 unroll_loop_constant_iterations (loops
, loop
);
130 case LPT_UNROLL_RUNTIME
:
131 unroll_loop_runtime_iterations (loops
, loop
);
133 case LPT_UNROLL_STUPID
:
134 unroll_loop_stupid (loops
, loop
);
144 #ifdef ENABLE_CHECKING
145 verify_dominators (loops
->cfg
.dom
);
146 verify_loop_structure (loops
);
153 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
155 peel_loops_completely (loops
, flags
)
159 struct loop
*loop
, *next
;
161 loop
= loops
->tree_root
;
165 while (loop
!= loops
->tree_root
)
176 loop
->lpt_decision
.decision
= LPT_NONE
;
180 fprintf (rtl_dump_file
, ";; Considering loop %d for complete peeling\n",
183 loop
->ninsns
= num_loop_insns (loop
);
185 decide_peel_once_rolling (loops
, loop
, flags
);
186 if (loop
->lpt_decision
.decision
== LPT_NONE
)
187 decide_peel_completely (loops
, loop
, flags
);
189 if (loop
->lpt_decision
.decision
== LPT_PEEL_COMPLETELY
)
191 peel_loop_completely (loops
, loop
);
192 #ifdef ENABLE_CHECKING
193 verify_dominators (loops
->cfg
.dom
);
194 verify_loop_structure (loops
);
201 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
203 decide_unrolling_and_peeling (loops
, flags
)
207 struct loop
*loop
= loops
->tree_root
, *next
;
212 /* Scan the loops, inner ones first. */
213 while (loop
!= loops
->tree_root
)
224 loop
->lpt_decision
.decision
= LPT_NONE
;
227 fprintf (rtl_dump_file
, ";; Considering loop %d\n", loop
->num
);
229 /* Do not peel cold areas. */
230 if (!maybe_hot_bb_p (loop
->header
))
233 fprintf (rtl_dump_file
, ";; Not considering loop, cold area\n");
238 /* Can the loop be manipulated? */
239 if (!can_duplicate_loop_p (loop
))
242 fprintf (rtl_dump_file
,
243 ";; Not considering loop, cannot duplicate\n");
248 /* Skip non-innermost loops. */
252 fprintf (rtl_dump_file
, ";; Not considering loop, is not innermost\n");
257 loop
->ninsns
= num_loop_insns (loop
);
258 loop
->av_ninsns
= average_num_loop_insns (loop
);
260 /* Try transformations one by one in decreasing order of
263 decide_unroll_constant_iterations (loops
, loop
, flags
);
264 if (loop
->lpt_decision
.decision
== LPT_NONE
)
265 decide_unroll_runtime_iterations (loops
, loop
, flags
);
266 if (loop
->lpt_decision
.decision
== LPT_NONE
)
267 decide_unroll_stupid (loops
, loop
, flags
);
268 if (loop
->lpt_decision
.decision
== LPT_NONE
)
269 decide_peel_simple (loops
, loop
, flags
);
275 /* Decide whether the LOOP is once rolling and suitable for complete
278 decide_peel_once_rolling (loops
, loop
, flags
)
281 int flags ATTRIBUTE_UNUSED
;
284 fprintf (rtl_dump_file
, ";; Considering peeling once rolling loop\n");
286 /* Is the loop small enough? */
287 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS
) < loop
->ninsns
)
290 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
294 /* Check for simple loops. */
295 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
298 /* Check number of iterations. */
299 if (!loop
->simple
|| !loop
->desc
.const_iter
|| loop
->desc
.niter
!= 0)
302 fprintf (rtl_dump_file
, ";; Unable to prove that the loop rolls exactly once\n");
308 fprintf (rtl_dump_file
, ";; Decided to peel exactly once rolling loop\n");
309 loop
->lpt_decision
.decision
= LPT_PEEL_COMPLETELY
;
312 /* Decide whether the LOOP is suitable for complete peeling. */
314 decide_peel_completely (loops
, loop
, flags
)
317 int flags ATTRIBUTE_UNUSED
;
322 fprintf (rtl_dump_file
, ";; Considering peeling completely\n");
324 /* Skip non-innermost loops. */
328 fprintf (rtl_dump_file
, ";; Not considering loop, is not innermost\n");
332 /* Do not peel cold areas. */
333 if (!maybe_hot_bb_p (loop
->header
))
336 fprintf (rtl_dump_file
, ";; Not considering loop, cold area\n");
340 /* Can the loop be manipulated? */
341 if (!can_duplicate_loop_p (loop
))
344 fprintf (rtl_dump_file
,
345 ";; Not considering loop, cannot duplicate\n");
349 /* npeel = number of iterations to peel. */
350 npeel
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
) / loop
->ninsns
;
351 if (npeel
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
352 npeel
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
);
354 /* Is the loop small enough? */
358 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
362 /* Check for simple loops. */
365 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
369 /* Check number of iterations. */
370 if (!loop
->simple
|| !loop
->desc
.const_iter
)
373 fprintf (rtl_dump_file
, ";; Unable to prove that the loop iterates constant times\n");
377 if (loop
->desc
.niter
> npeel
- 1)
381 fprintf (rtl_dump_file
, ";; Not peeling loop completely, rolls too much (");
382 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
,(HOST_WIDEST_INT
) loop
->desc
.niter
);
383 fprintf (rtl_dump_file
, " iterations > %d [maximum peelings])\n", npeel
);
390 fprintf (rtl_dump_file
, ";; Decided to peel loop completely\n");
391 loop
->lpt_decision
.decision
= LPT_PEEL_COMPLETELY
;
394 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
395 completely. The transformation done:
397 for (i = 0; i < 4; i++)
409 peel_loop_completely (loops
, loop
)
414 unsigned HOST_WIDE_INT npeel
;
415 unsigned n_remove_edges
, i
;
417 struct loop_desc
*desc
= &loop
->desc
;
423 wont_exit
= sbitmap_alloc (npeel
+ 1);
424 sbitmap_ones (wont_exit
);
425 RESET_BIT (wont_exit
, 0);
426 if (desc
->may_be_zero
)
427 RESET_BIT (wont_exit
, 1);
429 remove_edges
= xcalloc (npeel
, sizeof (edge
));
432 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
434 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
435 DLTHE_FLAG_UPDATE_FREQ
))
440 /* Remove the exit edges. */
441 for (i
= 0; i
< n_remove_edges
; i
++)
442 remove_path (loops
, remove_edges
[i
]);
446 /* Now remove the unreachable part of the last iteration and cancel
448 remove_path (loops
, desc
->in_edge
);
451 fprintf (rtl_dump_file
, ";; Peeled loop completely, %d times\n", (int) npeel
);
454 /* Decide whether to unroll LOOP iterating constant number of times and how much. */
456 decide_unroll_constant_iterations (loops
, loop
, flags
)
461 unsigned nunroll
, nunroll_by_av
, best_copies
, best_unroll
= -1, n_copies
, i
;
463 if (!(flags
& UAP_UNROLL
))
465 /* We were not asked to, just return back silently. */
470 fprintf (rtl_dump_file
, ";; Considering unrolling loop with constant number of iterations\n");
472 /* nunroll = total number of copies of the original loop body in
473 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
474 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
475 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
476 if (nunroll
> nunroll_by_av
)
477 nunroll
= nunroll_by_av
;
478 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
479 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
481 /* Skip big loops. */
485 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
489 /* Check for simple loops. */
492 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
496 /* Check number of iterations. */
497 if (!loop
->simple
|| !loop
->desc
.const_iter
)
500 fprintf (rtl_dump_file
, ";; Unable to prove that the loop iterates constant times\n");
504 /* Check whether the loop rolls enough to consider. */
505 if (loop
->desc
.niter
< 2 * nunroll
)
508 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
512 /* Success; now compute number of iterations to unroll. We alter
513 nunroll so that as few as possible copies of loop body are
514 neccesary, while still not decreasing the number of unrollings
515 too much (at most by 1). */
516 best_copies
= 2 * nunroll
+ 10;
519 if ((unsigned) i
- 1 >= loop
->desc
.niter
)
520 i
= loop
->desc
.niter
- 2;
522 for (; i
>= nunroll
- 1; i
--)
524 unsigned exit_mod
= loop
->desc
.niter
% (i
+ 1);
526 if (loop
->desc
.postincr
)
527 n_copies
= exit_mod
+ i
+ 1;
528 else if (exit_mod
!= (unsigned) i
|| loop
->desc
.may_be_zero
)
529 n_copies
= exit_mod
+ i
+ 2;
533 if (n_copies
< best_copies
)
535 best_copies
= n_copies
;
541 fprintf (rtl_dump_file
, ";; max_unroll %d (%d copies, initial %d).\n",
542 best_unroll
+ 1, best_copies
, nunroll
);
544 loop
->lpt_decision
.decision
= LPT_UNROLL_CONSTANT
;
545 loop
->lpt_decision
.times
= best_unroll
;
548 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
549 times. The transformation does this:
551 for (i = 0; i < 102; i++)
568 unroll_loop_constant_iterations (loops
, loop
)
572 unsigned HOST_WIDE_INT niter
;
575 unsigned n_remove_edges
, i
;
577 unsigned max_unroll
= loop
->lpt_decision
.times
;
578 struct loop_desc
*desc
= &loop
->desc
;
582 if (niter
<= (unsigned) max_unroll
+ 1)
583 abort (); /* Should not get here (such loop should be peeled instead). */
585 exit_mod
= niter
% (max_unroll
+ 1);
587 wont_exit
= sbitmap_alloc (max_unroll
+ 1);
588 sbitmap_ones (wont_exit
);
590 remove_edges
= xcalloc (max_unroll
+ exit_mod
+ 1, sizeof (edge
));
595 /* Counter is incremented after the exit test; leave exit test
596 in the first copy, so that the loops that start with test
597 of exit condition have continuous body after unrolling. */
600 fprintf (rtl_dump_file
, ";; Condition on beginning of loop.\n");
602 /* Peel exit_mod iterations. */
603 RESET_BIT (wont_exit
, 0);
604 if (desc
->may_be_zero
)
605 RESET_BIT (wont_exit
, 1);
608 && !duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
610 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
611 DLTHE_FLAG_UPDATE_FREQ
))
614 SET_BIT (wont_exit
, 1);
618 /* Leave exit test in last copy, for the same reason as above if
619 the loop tests the condition at the end of loop body. */
622 fprintf (rtl_dump_file
, ";; Condition on end of loop.\n");
624 /* We know that niter >= max_unroll + 2; so we do not need to care of
625 case when we would exit before reaching the loop. So just peel
626 exit_mod + 1 iterations.
628 if (exit_mod
!= (unsigned) max_unroll
|| desc
->may_be_zero
)
630 RESET_BIT (wont_exit
, 0);
631 if (desc
->may_be_zero
)
632 RESET_BIT (wont_exit
, 1);
634 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
636 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
637 DLTHE_FLAG_UPDATE_FREQ
))
640 SET_BIT (wont_exit
, 0);
641 SET_BIT (wont_exit
, 1);
644 RESET_BIT (wont_exit
, max_unroll
);
647 /* Now unroll the loop. */
648 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
650 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
651 DLTHE_FLAG_UPDATE_FREQ
))
656 /* Remove the edges. */
657 for (i
= 0; i
< n_remove_edges
; i
++)
658 remove_path (loops
, remove_edges
[i
]);
662 fprintf (rtl_dump_file
, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll
, num_loop_insns (loop
));
665 /* Decide whether to unroll LOOP iterating runtime computable number of times
668 decide_unroll_runtime_iterations (loops
, loop
, flags
)
673 unsigned nunroll
, nunroll_by_av
, i
;
675 if (!(flags
& UAP_UNROLL
))
677 /* We were not asked to, just return back silently. */
682 fprintf (rtl_dump_file
, ";; Considering unrolling loop with runtime computable number of iterations\n");
684 /* nunroll = total number of copies of the original loop body in
685 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
686 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
687 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
688 if (nunroll
> nunroll_by_av
)
689 nunroll
= nunroll_by_av
;
690 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
691 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
693 /* Skip big loops. */
697 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
701 /* Check for simple loops. */
704 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
708 /* Check simpleness. */
712 fprintf (rtl_dump_file
, ";; Unable to prove that the number of iterations can be counted in runtime\n");
716 if (loop
->desc
.const_iter
)
719 fprintf (rtl_dump_file
, ";; Loop iterates constant times\n");
723 /* If we have profile feedback, check whether the loop rolls. */
724 if (loop
->header
->count
&& expected_loop_iterations (loop
) < 2 * nunroll
)
727 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
731 /* Success; now force nunroll to be power of 2, as we are unable to
732 cope with overflows in computation of number of iterations. */
733 for (i
= 1; 2 * i
<= nunroll
; i
*= 2);
735 loop
->lpt_decision
.decision
= LPT_UNROLL_RUNTIME
;
736 loop
->lpt_decision
.times
= i
- 1;
739 /* Unroll LOOP for that we are able to count number of iterations in runtime
740 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
741 extra care for case n < 0):
743 for (i = 0; i < n; i++)
771 unroll_loop_runtime_iterations (loops
, loop
)
775 rtx niter
, init_code
, branch_code
, jump
, label
;
777 basic_block preheader
, *body
, *dom_bbs
, swtch
, ezc_swtch
;
781 unsigned n_peel
, n_remove_edges
;
782 edge
*remove_edges
, e
;
783 bool extra_zero_check
, last_may_exit
;
784 unsigned max_unroll
= loop
->lpt_decision
.times
;
785 struct loop_desc
*desc
= &loop
->desc
;
787 /* Remember blocks whose dominators will have to be updated. */
788 dom_bbs
= xcalloc (n_basic_blocks
, sizeof (basic_block
));
791 body
= get_loop_body (loop
);
792 for (i
= 0; i
< loop
->num_nodes
; i
++)
797 nldom
= get_dominated_by (loops
->cfg
.dom
, body
[i
], &ldom
);
798 for (j
= 0; j
< nldom
; j
++)
799 if (!flow_bb_inside_loop_p (loop
, ldom
[j
]))
800 dom_bbs
[n_dom_bbs
++] = ldom
[j
];
808 /* Leave exit in first copy (for explanation why see comment in
809 unroll_loop_constant_iterations). */
811 n_peel
= max_unroll
- 1;
812 extra_zero_check
= true;
813 last_may_exit
= false;
817 /* Leave exit in last copy (for explanation why see comment in
818 unroll_loop_constant_iterations). */
819 may_exit_copy
= max_unroll
;
821 extra_zero_check
= false;
822 last_may_exit
= true;
825 /* Get expression for number of iterations. */
827 niter
= count_loop_iterations (desc
, NULL
, NULL
);
830 niter
= force_operand (niter
, NULL
);
832 /* Count modulo by ANDing it with max_unroll; we use the fact that
833 the number of unrollings is a power of two, and thus this is correct
834 even if there is overflow in the computation. */
835 niter
= expand_simple_binop (GET_MODE (desc
->var
), AND
,
837 GEN_INT (max_unroll
),
838 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
840 init_code
= get_insns ();
843 /* Precondition the loop. */
844 loop_split_edge_with (loop_preheader_edge (loop
), init_code
, loops
);
846 remove_edges
= xcalloc (max_unroll
+ n_peel
+ 1, sizeof (edge
));
849 wont_exit
= sbitmap_alloc (max_unroll
+ 2);
851 /* Peel the first copy of loop body (almost always we must leave exit test
852 here; the only exception is when we have extra zero check and the number
853 of iterations is reliable (i.e. comes out of NE condition). Also record
854 the place of (possible) extra zero check. */
855 sbitmap_zero (wont_exit
);
856 if (extra_zero_check
&& desc
->cond
== NE
)
857 SET_BIT (wont_exit
, 1);
858 ezc_swtch
= loop_preheader_edge (loop
)->src
;
859 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
861 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
862 DLTHE_FLAG_UPDATE_FREQ
))
865 /* Record the place where switch will be built for preconditioning. */
866 swtch
= loop_split_edge_with (loop_preheader_edge (loop
),
869 for (i
= 0; i
< n_peel
; i
++)
872 sbitmap_zero (wont_exit
);
873 if (i
!= n_peel
- 1 || !last_may_exit
)
874 SET_BIT (wont_exit
, 1);
875 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
877 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
878 DLTHE_FLAG_UPDATE_FREQ
))
883 /* Create item for switch. */
884 j
= n_peel
- i
- (extra_zero_check
? 0 : 1);
885 p
= REG_BR_PROB_BASE
/ (i
+ 2);
887 preheader
= loop_split_edge_with (loop_preheader_edge (loop
),
889 label
= block_label (preheader
);
891 do_compare_rtx_and_jump (copy_rtx (niter
), GEN_INT (j
), EQ
, 0,
892 GET_MODE (desc
->var
), NULL_RTX
, NULL_RTX
,
894 jump
= get_last_insn ();
895 JUMP_LABEL (jump
) = label
;
897 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
898 GEN_INT (p
), REG_NOTES (jump
));
900 LABEL_NUSES (label
)++;
901 branch_code
= get_insns ();
904 swtch
= loop_split_edge_with (swtch
->pred
, branch_code
, loops
);
905 set_immediate_dominator (loops
->cfg
.dom
, preheader
, swtch
);
906 swtch
->succ
->probability
= REG_BR_PROB_BASE
- p
;
907 e
= make_edge (swtch
, preheader
,
908 swtch
->succ
->flags
& EDGE_IRREDUCIBLE_LOOP
);
913 if (extra_zero_check
)
915 /* Add branch for zero iterations. */
916 p
= REG_BR_PROB_BASE
/ (max_unroll
+ 1);
918 preheader
= loop_split_edge_with (loop_preheader_edge (loop
),
920 label
= block_label (preheader
);
922 do_compare_rtx_and_jump (copy_rtx (niter
), const0_rtx
, EQ
, 0,
923 GET_MODE (desc
->var
), NULL_RTX
, NULL_RTX
,
925 jump
= get_last_insn ();
926 JUMP_LABEL (jump
) = label
;
928 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
929 GEN_INT (p
), REG_NOTES (jump
));
931 LABEL_NUSES (label
)++;
932 branch_code
= get_insns ();
935 swtch
= loop_split_edge_with (swtch
->succ
, branch_code
, loops
);
936 set_immediate_dominator (loops
->cfg
.dom
, preheader
, swtch
);
937 swtch
->succ
->probability
= REG_BR_PROB_BASE
- p
;
938 e
= make_edge (swtch
, preheader
,
939 swtch
->succ
->flags
& EDGE_IRREDUCIBLE_LOOP
);
943 /* Recount dominators for outer blocks. */
944 iterate_fix_dominators (loops
->cfg
.dom
, dom_bbs
, n_dom_bbs
);
946 /* And unroll loop. */
948 sbitmap_ones (wont_exit
);
949 RESET_BIT (wont_exit
, may_exit_copy
);
951 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
953 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
954 DLTHE_FLAG_UPDATE_FREQ
))
959 /* Remove the edges. */
960 for (i
= 0; i
< n_remove_edges
; i
++)
961 remove_path (loops
, remove_edges
[i
]);
965 fprintf (rtl_dump_file
,
966 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
967 max_unroll
, num_loop_insns (loop
));
970 /* Decide whether to simply peel LOOP and how much. */
972 decide_peel_simple (loops
, loop
, flags
)
979 if (!(flags
& UAP_PEEL
))
981 /* We were not asked to, just return back silently. */
986 fprintf (rtl_dump_file
, ";; Considering simply peeling loop\n");
988 /* npeel = number of iterations to peel. */
989 npeel
= PARAM_VALUE (PARAM_MAX_PEELED_INSNS
) / loop
->ninsns
;
990 if (npeel
> (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES
))
991 npeel
= PARAM_VALUE (PARAM_MAX_PEEL_TIMES
);
993 /* Skip big loops. */
997 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
1001 /* Check for simple loops. */
1002 if (!loop
->has_desc
)
1004 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
1008 /* Check number of iterations. */
1009 if (loop
->simple
&& loop
->desc
.const_iter
)
1012 fprintf (rtl_dump_file
, ";; Loop iterates constant times\n");
1016 /* Do not simply peel loops with branches inside -- it increases number
1018 if (loop
->desc
.n_branches
> 1)
1021 fprintf (rtl_dump_file
, ";; Not peeling, contains branches\n");
1025 if (loop
->header
->count
)
1027 unsigned niter
= expected_loop_iterations (loop
);
1028 if (niter
+ 1 > npeel
)
1032 fprintf (rtl_dump_file
, ";; Not peeling loop, rolls too much (");
1033 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
, (HOST_WIDEST_INT
) (niter
+ 1));
1034 fprintf (rtl_dump_file
, " iterations > %d [maximum peelings])\n", npeel
);
1042 /* For now we have no good heuristics to decide whether loop peeling
1043 will be effective, so disable it. */
1045 fprintf (rtl_dump_file
,
1046 ";; Not peeling loop, no evidence it will be profitable\n");
1051 loop
->lpt_decision
.decision
= LPT_PEEL_SIMPLE
;
1052 loop
->lpt_decision
.times
= npeel
;
1055 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1061 if (!cond) goto end;
1063 if (!cond) goto end;
1070 peel_loop_simple (loops
, loop
)
1071 struct loops
*loops
;
1075 unsigned npeel
= loop
->lpt_decision
.times
;
1077 wont_exit
= sbitmap_alloc (npeel
+ 1);
1078 sbitmap_zero (wont_exit
);
1080 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
1081 loops
, npeel
, wont_exit
, NULL
, NULL
, NULL
,
1082 DLTHE_FLAG_UPDATE_FREQ
))
1088 fprintf (rtl_dump_file
, ";; Peeling loop %d times\n", npeel
);
1091 /* Decide whether to unroll LOOP stupidly and how much. */
1093 decide_unroll_stupid (loops
, loop
, flags
)
1094 struct loops
*loops
;
1098 unsigned nunroll
, nunroll_by_av
, i
;
1100 if (!(flags
& UAP_UNROLL_ALL
))
1102 /* We were not asked to, just return back silently. */
1107 fprintf (rtl_dump_file
, ";; Considering unrolling loop stupidly\n");
1109 /* nunroll = total number of copies of the original loop body in
1110 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1111 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
1112 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
1113 if (nunroll
> nunroll_by_av
)
1114 nunroll
= nunroll_by_av
;
1115 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
1116 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1118 /* Skip big loops. */
1122 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
1126 /* Check for simple loops. */
1127 if (!loop
->has_desc
)
1129 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
1133 /* Check simpleness. */
1137 fprintf (rtl_dump_file
, ";; The loop is simple\n");
1141 /* Do not unroll loops with branches inside -- it increases number
1143 if (loop
->desc
.n_branches
> 1)
1146 fprintf (rtl_dump_file
, ";; Not unrolling, contains branches\n");
1150 /* If we have profile feedback, check whether the loop rolls. */
1151 if (loop
->header
->count
&& expected_loop_iterations (loop
) < 2 * nunroll
)
1154 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
1158 /* Success. Now force nunroll to be power of 2, as it seems that this
1159 improves results (partially because of better aligments, partially
1160 because of some dark magic). */
1161 for (i
= 1; 2 * i
<= nunroll
; i
*= 2);
1163 loop
->lpt_decision
.decision
= LPT_UNROLL_STUPID
;
1164 loop
->lpt_decision
.times
= i
- 1;
1167 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1185 unroll_loop_stupid (loops
, loop
)
1186 struct loops
*loops
;
1190 unsigned nunroll
= loop
->lpt_decision
.times
;
1192 wont_exit
= sbitmap_alloc (nunroll
+ 1);
1193 sbitmap_zero (wont_exit
);
1195 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1196 loops
, nunroll
, wont_exit
, NULL
, NULL
, NULL
,
1197 DLTHE_FLAG_UPDATE_FREQ
))
1203 fprintf (rtl_dump_file
, ";; Unrolled loop %d times, %i insns\n",
1204 nunroll
, num_loop_insns (loop
));