1 /* Loop unrolling and peeling.
2 Copyright (C) 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
29 #include "cfglayout.h"
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimalizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for futher passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
69 static void decide_unrolling_and_peeling
PARAMS ((struct loops
*, int));
70 static void peel_loops_completely
PARAMS ((struct loops
*, int));
71 static void decide_peel_simple
PARAMS ((struct loops
*, struct loop
*, int));
72 static void decide_peel_once_rolling
PARAMS ((struct loops
*, struct loop
*, int));
73 static void decide_peel_completely
PARAMS ((struct loops
*, struct loop
*, int));
74 static void decide_unroll_stupid
PARAMS ((struct loops
*, struct loop
*, int));
75 static void decide_unroll_constant_iterations
PARAMS ((struct loops
*, struct loop
*, int));
76 static void decide_unroll_runtime_iterations
PARAMS ((struct loops
*, struct loop
*, int));
77 static void peel_loop_simple
PARAMS ((struct loops
*, struct loop
*));
78 static void peel_loop_completely
PARAMS ((struct loops
*, struct loop
*));
79 static void unroll_loop_stupid
PARAMS ((struct loops
*, struct loop
*));
80 static void unroll_loop_constant_iterations
PARAMS ((struct loops
*,
82 static void unroll_loop_runtime_iterations
PARAMS ((struct loops
*,
85 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
87 unroll_and_peel_loops (loops
, flags
)
91 struct loop
*loop
, *next
;
94 /* First perform complete loop peeling (it is almost surely a win,
95 and affects parameters for further decision a lot). */
96 peel_loops_completely (loops
, flags
);
98 /* Now decide rest of unrolling and peeling. */
99 decide_unrolling_and_peeling (loops
, flags
);
101 loop
= loops
->tree_root
;
105 /* Scan the loops, inner ones first. */
106 while (loop
!= loops
->tree_root
)
118 /* And perform the appropriate transformations. */
119 switch (loop
->lpt_decision
.decision
)
121 case LPT_PEEL_COMPLETELY
:
124 case LPT_PEEL_SIMPLE
:
125 peel_loop_simple (loops
, loop
);
127 case LPT_UNROLL_CONSTANT
:
128 unroll_loop_constant_iterations (loops
, loop
);
130 case LPT_UNROLL_RUNTIME
:
131 unroll_loop_runtime_iterations (loops
, loop
);
133 case LPT_UNROLL_STUPID
:
134 unroll_loop_stupid (loops
, loop
);
144 #ifdef ENABLE_CHECKING
145 verify_dominators (loops
->cfg
.dom
);
146 verify_loop_structure (loops
);
153 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
155 peel_loops_completely (loops
, flags
)
159 struct loop
*loop
, *next
;
161 loop
= loops
->tree_root
;
165 while (loop
!= loops
->tree_root
)
176 loop
->lpt_decision
.decision
= LPT_NONE
;
180 fprintf (rtl_dump_file
, ";; Considering loop %d for complete peeling\n",
183 loop
->ninsns
= num_loop_insns (loop
);
185 decide_peel_once_rolling (loops
, loop
, flags
);
186 if (loop
->lpt_decision
.decision
== LPT_NONE
)
187 decide_peel_completely (loops
, loop
, flags
);
189 if (loop
->lpt_decision
.decision
== LPT_PEEL_COMPLETELY
)
191 peel_loop_completely (loops
, loop
);
192 #ifdef ENABLE_CHECKING
193 verify_dominators (loops
->cfg
.dom
);
194 verify_loop_structure (loops
);
201 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
203 decide_unrolling_and_peeling (loops
, flags
)
207 struct loop
*loop
= loops
->tree_root
, *next
;
212 /* Scan the loops, inner ones first. */
213 while (loop
!= loops
->tree_root
)
224 loop
->lpt_decision
.decision
= LPT_NONE
;
227 fprintf (rtl_dump_file
, ";; Considering loop %d\n", loop
->num
);
229 /* Do not peel cold areas. */
230 if (!maybe_hot_bb_p (loop
->header
))
233 fprintf (rtl_dump_file
, ";; Not considering loop, cold area\n");
238 /* Can the loop be manipulated? */
239 if (!can_duplicate_loop_p (loop
))
242 fprintf (rtl_dump_file
,
243 ";; Not considering loop, cannot duplicate\n");
248 /* Skip non-innermost loops. */
252 fprintf (rtl_dump_file
, ";; Not considering loop, is not innermost\n");
257 loop
->ninsns
= num_loop_insns (loop
);
258 loop
->av_ninsns
= average_num_loop_insns (loop
);
260 /* Try transformations one by one in decreasing order of
263 decide_unroll_constant_iterations (loops
, loop
, flags
);
264 if (loop
->lpt_decision
.decision
== LPT_NONE
)
265 decide_unroll_runtime_iterations (loops
, loop
, flags
);
266 if (loop
->lpt_decision
.decision
== LPT_NONE
)
267 decide_unroll_stupid (loops
, loop
, flags
);
268 if (loop
->lpt_decision
.decision
== LPT_NONE
)
269 decide_peel_simple (loops
, loop
, flags
);
275 /* Decide whether the LOOP is once rolling and suitable for complete
278 decide_peel_once_rolling (loops
, loop
, flags
)
281 int flags ATTRIBUTE_UNUSED
;
284 fprintf (rtl_dump_file
, ";; Considering peeling once rolling loop\n");
286 /* Is the loop small enough? */
287 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS
) < loop
->ninsns
)
290 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
294 /* Check for simple loops. */
295 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
298 /* Check number of iterations. */
299 if (!loop
->simple
|| !loop
->desc
.const_iter
|| loop
->desc
.niter
!=0)
302 fprintf (rtl_dump_file
, ";; Unable to prove that the loop rolls exactly once\n");
308 fprintf (rtl_dump_file
, ";; Decided to peel exactly once rolling loop\n");
309 loop
->lpt_decision
.decision
= LPT_PEEL_COMPLETELY
;
312 /* Decide whether the LOOP is suitable for complete peeling. */
314 decide_peel_completely (loops
, loop
, flags
)
317 int flags ATTRIBUTE_UNUSED
;
322 fprintf (rtl_dump_file
, ";; Considering peeling completely\n");
324 /* Skip non-innermost loops. */
328 fprintf (rtl_dump_file
, ";; Not considering loop, is not innermost\n");
332 /* Do not peel cold areas. */
333 if (!maybe_hot_bb_p (loop
->header
))
336 fprintf (rtl_dump_file
, ";; Not considering loop, cold area\n");
340 /* Can the loop be manipulated? */
341 if (!can_duplicate_loop_p (loop
))
344 fprintf (rtl_dump_file
,
345 ";; Not considering loop, cannot duplicate\n");
349 /* npeel = number of iterations to peel. */
350 npeel
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS
) / loop
->ninsns
;
351 if (npeel
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
352 npeel
= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
);
354 /* Is the loop small enough? */
358 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
362 /* Check for simple loops. */
364 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
366 /* Check number of iterations. */
367 if (!loop
->simple
|| !loop
->desc
.const_iter
)
370 fprintf (rtl_dump_file
, ";; Unable to prove that the loop iterates constant times\n");
374 if (loop
->desc
.niter
> npeel
- 1)
378 fprintf (rtl_dump_file
, ";; Not peeling loop completely, rolls too much (");
379 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
,(HOST_WIDEST_INT
) loop
->desc
.niter
);
380 fprintf (rtl_dump_file
, "iterations > %d [maximum peelings])\n", npeel
);
387 fprintf (rtl_dump_file
, ";; Decided to peel loop completely\n");
388 loop
->lpt_decision
.decision
= LPT_PEEL_COMPLETELY
;
391 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
392 completely. The transformation done:
394 for (i = 0; i < 4; i++)
406 peel_loop_completely (loops
, loop
)
411 unsigned HOST_WIDE_INT npeel
;
412 unsigned n_remove_edges
, i
;
414 struct loop_desc
*desc
= &loop
->desc
;
420 wont_exit
= sbitmap_alloc (npeel
+ 1);
421 sbitmap_ones (wont_exit
);
422 RESET_BIT (wont_exit
, 0);
423 if (desc
->may_be_zero
)
424 RESET_BIT (wont_exit
, 1);
426 remove_edges
= xcalloc (npeel
, sizeof (edge
));
429 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
431 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
432 DLTHE_FLAG_UPDATE_FREQ
))
437 /* Remove the exit edges. */
438 for (i
= 0; i
< n_remove_edges
; i
++)
439 remove_path (loops
, remove_edges
[i
]);
443 /* Now remove the unreachable part of the last iteration and cancel
445 remove_path (loops
, desc
->in_edge
);
448 fprintf (rtl_dump_file
, ";; Peeled loop completely, %d times\n", (int) npeel
);
451 /* Decide whether to unroll LOOP iterating constant number of times and how much. */
453 decide_unroll_constant_iterations (loops
, loop
, flags
)
458 unsigned nunroll
, nunroll_by_av
, best_copies
, best_unroll
= -1, n_copies
, i
;
460 if (!(flags
& UAP_UNROLL
))
462 /* We were not asked to, just return back silently. */
467 fprintf (rtl_dump_file
, ";; Considering unrolling loop with constant number of iterations\n");
469 /* nunroll = total number of copies of the original loop body in
470 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
471 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
472 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
473 if (nunroll
> nunroll_by_av
)
474 nunroll
= nunroll_by_av
;
475 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
476 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
478 /* Skip big loops. */
482 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
486 /* Check for simple loops. */
488 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
490 /* Check number of iterations. */
491 if (!loop
->simple
|| !loop
->desc
.const_iter
)
494 fprintf (rtl_dump_file
, ";; Unable to prove that the loop iterates constant times\n");
498 /* Check whether the loop rolls enough to consider. */
499 if (loop
->desc
.niter
< 2 * nunroll
)
502 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
506 /* Success; now compute number of iterations to unroll. We alter
507 nunroll so that as few as possible copies of loop body are
508 neccesary, while still not decreasing the number of unrollings
509 too much (at most by 1). */
510 best_copies
= 2 * nunroll
+ 10;
513 if ((unsigned) i
- 1 >= loop
->desc
.niter
)
514 i
= loop
->desc
.niter
- 2;
516 for (; i
>= nunroll
- 1; i
--)
518 unsigned exit_mod
= loop
->desc
.niter
% (i
+ 1);
520 if (loop
->desc
.postincr
)
521 n_copies
= exit_mod
+ i
+ 1;
522 else if (exit_mod
!= (unsigned) i
|| loop
->desc
.may_be_zero
)
523 n_copies
= exit_mod
+ i
+ 2;
527 if (n_copies
< best_copies
)
529 best_copies
= n_copies
;
535 fprintf (rtl_dump_file
, ";; max_unroll %d (%d copies, initial %d).\n",
536 best_unroll
+ 1, best_copies
, nunroll
);
538 loop
->lpt_decision
.decision
= LPT_UNROLL_CONSTANT
;
539 loop
->lpt_decision
.times
= best_unroll
;
542 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
543 times. The transformation does this:
545 for (i = 0; i < 102; i++)
562 unroll_loop_constant_iterations (loops
, loop
)
566 unsigned HOST_WIDE_INT niter
;
569 unsigned n_remove_edges
, i
;
571 unsigned max_unroll
= loop
->lpt_decision
.times
;
572 struct loop_desc
*desc
= &loop
->desc
;
576 if (niter
<= (unsigned) max_unroll
+ 1)
577 abort (); /* Should not get here (such loop should be peeled instead). */
579 exit_mod
= niter
% (max_unroll
+ 1);
581 wont_exit
= sbitmap_alloc (max_unroll
+ 1);
582 sbitmap_ones (wont_exit
);
584 remove_edges
= xcalloc (max_unroll
+ exit_mod
+ 1, sizeof (edge
));
589 /* Counter is incremented after the exit test; leave exit test
590 in the first copy, so that the loops that start with test
591 of exit condition have continuous body after unrolling. */
594 fprintf (rtl_dump_file
, ";; Condition on beginning of loop.\n");
596 /* Peel exit_mod iterations. */
597 RESET_BIT (wont_exit
, 0);
598 if (desc
->may_be_zero
)
599 RESET_BIT (wont_exit
, 1);
602 && !duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
604 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
605 DLTHE_FLAG_UPDATE_FREQ
))
608 SET_BIT (wont_exit
, 1);
612 /* Leave exit test in last copy, for the same reason as above if
613 the loop tests the condition at the end of loop body. */
616 fprintf (rtl_dump_file
, ";; Condition on end of loop.\n");
618 /* We know that niter >= max_unroll + 2; so we do not need to care of
619 case when we would exit before reaching the loop. So just peel
620 exit_mod + 1 iterations.
622 if (exit_mod
!= (unsigned) max_unroll
|| desc
->may_be_zero
)
624 RESET_BIT (wont_exit
, 0);
625 if (desc
->may_be_zero
)
626 RESET_BIT (wont_exit
, 1);
628 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
630 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
631 DLTHE_FLAG_UPDATE_FREQ
))
634 SET_BIT (wont_exit
, 0);
635 SET_BIT (wont_exit
, 1);
638 RESET_BIT (wont_exit
, max_unroll
);
641 /* Now unroll the loop. */
642 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
644 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
645 DLTHE_FLAG_UPDATE_FREQ
))
650 /* Remove the edges. */
651 for (i
= 0; i
< n_remove_edges
; i
++)
652 remove_path (loops
, remove_edges
[i
]);
656 fprintf (rtl_dump_file
, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll
, num_loop_insns (loop
));
659 /* Decide whether to unroll LOOP iterating runtime computable number of times
662 decide_unroll_runtime_iterations (loops
, loop
, flags
)
667 unsigned nunroll
, nunroll_by_av
, i
;
669 if (!(flags
& UAP_UNROLL
))
671 /* We were not asked to, just return back silently. */
676 fprintf (rtl_dump_file
, ";; Considering unrolling loop with runtime computable number of iterations\n");
678 /* nunroll = total number of copies of the original loop body in
679 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
680 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
681 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
682 if (nunroll
> nunroll_by_av
)
683 nunroll
= nunroll_by_av
;
684 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
685 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
687 /* Skip big loops. */
691 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
695 /* Check for simple loops. */
697 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
699 /* Check simpleness. */
703 fprintf (rtl_dump_file
, ";; Unable to prove that the number of iterations can be counted in runtime\n");
707 if (loop
->desc
.const_iter
)
710 fprintf (rtl_dump_file
, ";; Loop iterates constant times\n");
714 /* If we have profile feedback, check whether the loop rolls. */
715 if (loop
->header
->count
&& expected_loop_iterations (loop
) < 2 * nunroll
)
718 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
722 /* Success; now force nunroll to be power of 2, as we are unable to
723 cope with overflows in computation of number of iterations. */
724 for (i
= 1; 2 * i
<= nunroll
; i
*= 2);
726 loop
->lpt_decision
.decision
= LPT_UNROLL_RUNTIME
;
727 loop
->lpt_decision
.times
= i
- 1;
730 /* Unroll LOOP for that we are able to count number of iterations in runtime
731 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
732 extra care for case n < 0):
734 for (i = 0; i < n; i++)
762 unroll_loop_runtime_iterations (loops
, loop
)
766 rtx niter
, init_code
, branch_code
, jump
, label
;
768 basic_block preheader
, *body
, *dom_bbs
, swtch
, ezc_swtch
;
772 unsigned n_peel
, n_remove_edges
;
773 edge
*remove_edges
, e
;
774 bool extra_zero_check
, last_may_exit
;
775 unsigned max_unroll
= loop
->lpt_decision
.times
;
776 struct loop_desc
*desc
= &loop
->desc
;
778 /* Remember blocks whose dominators will have to be updated. */
779 dom_bbs
= xcalloc (n_basic_blocks
, sizeof (basic_block
));
782 body
= get_loop_body (loop
);
783 for (i
= 0; i
< loop
->num_nodes
; i
++)
788 nldom
= get_dominated_by (loops
->cfg
.dom
, body
[i
], &ldom
);
789 for (j
= 0; j
< nldom
; j
++)
790 if (!flow_bb_inside_loop_p (loop
, ldom
[j
]))
791 dom_bbs
[n_dom_bbs
++] = ldom
[j
];
799 /* Leave exit in first copy (for explanation why see comment in
800 unroll_loop_constant_iterations). */
802 n_peel
= max_unroll
- 1;
803 extra_zero_check
= true;
804 last_may_exit
= false;
808 /* Leave exit in last copy (for explanation why see comment in
809 unroll_loop_constant_iterations). */
810 may_exit_copy
= max_unroll
;
812 extra_zero_check
= false;
813 last_may_exit
= true;
816 /* Get expression for number of iterations. */
818 niter
= count_loop_iterations (desc
, NULL
, NULL
);
821 niter
= force_operand (niter
, NULL
);
823 /* Count modulo by ANDing it with max_unroll; we use the fact that
824 the number of unrollings is a power of two, and thus this is correct
825 even if there is overflow in the computation. */
826 niter
= expand_simple_binop (GET_MODE (desc
->var
), AND
,
828 GEN_INT (max_unroll
),
829 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
831 init_code
= get_insns ();
834 /* Precondition the loop. */
835 loop_split_edge_with (loop_preheader_edge (loop
), init_code
, loops
);
837 remove_edges
= xcalloc (max_unroll
+ n_peel
+ 1, sizeof (edge
));
840 wont_exit
= sbitmap_alloc (max_unroll
+ 2);
842 /* Peel the first copy of loop body (almost always we must leave exit test
843 here; the only exception is when we have extra zero check and the number
844 of iterations is reliable (i.e. comes out of NE condition). Also record
845 the place of (possible) extra zero check. */
846 sbitmap_zero (wont_exit
);
847 if (extra_zero_check
&& desc
->cond
== NE
)
848 SET_BIT (wont_exit
, 1);
849 ezc_swtch
= loop_preheader_edge (loop
)->src
;
850 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
852 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
853 DLTHE_FLAG_UPDATE_FREQ
))
856 /* Record the place where switch will be built for preconditioning. */
857 swtch
= loop_split_edge_with (loop_preheader_edge (loop
),
860 for (i
= 0; i
< n_peel
; i
++)
863 sbitmap_zero (wont_exit
);
864 if (i
!= n_peel
- 1 || !last_may_exit
)
865 SET_BIT (wont_exit
, 1);
866 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
868 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
869 DLTHE_FLAG_UPDATE_FREQ
))
874 /* Create item for switch. */
875 j
= n_peel
- i
- (extra_zero_check
? 0 : 1);
876 p
= REG_BR_PROB_BASE
/ (i
+ 2);
878 preheader
= loop_split_edge_with (loop_preheader_edge (loop
),
880 label
= block_label (preheader
);
882 do_compare_rtx_and_jump (copy_rtx (niter
), GEN_INT (j
), EQ
, 0,
883 GET_MODE (desc
->var
), NULL_RTX
, NULL_RTX
,
885 jump
= get_last_insn ();
886 JUMP_LABEL (jump
) = label
;
888 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
889 GEN_INT (p
), REG_NOTES (jump
));
891 LABEL_NUSES (label
)++;
892 branch_code
= get_insns ();
895 swtch
= loop_split_edge_with (swtch
->pred
, branch_code
, loops
);
896 set_immediate_dominator (loops
->cfg
.dom
, preheader
, swtch
);
897 swtch
->succ
->probability
= REG_BR_PROB_BASE
- p
;
898 e
= make_edge (swtch
, preheader
, 0);
903 if (extra_zero_check
)
905 /* Add branch for zero iterations. */
906 p
= REG_BR_PROB_BASE
/ (max_unroll
+ 1);
908 preheader
= loop_split_edge_with (loop_preheader_edge (loop
),
910 label
= block_label (preheader
);
912 do_compare_rtx_and_jump (copy_rtx (niter
), const0_rtx
, EQ
, 0,
913 GET_MODE (desc
->var
), NULL_RTX
, NULL_RTX
,
915 jump
= get_last_insn ();
916 JUMP_LABEL (jump
) = label
;
918 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
919 GEN_INT (p
), REG_NOTES (jump
));
921 LABEL_NUSES (label
)++;
922 branch_code
= get_insns ();
925 swtch
= loop_split_edge_with (swtch
->succ
, branch_code
, loops
);
926 set_immediate_dominator (loops
->cfg
.dom
, preheader
, swtch
);
927 swtch
->succ
->probability
= REG_BR_PROB_BASE
- p
;
928 e
= make_edge (swtch
, preheader
, 0);
932 /* Recount dominators for outer blocks. */
933 iterate_fix_dominators (loops
->cfg
.dom
, dom_bbs
, n_dom_bbs
);
935 /* And unroll loop. */
937 sbitmap_ones (wont_exit
);
938 RESET_BIT (wont_exit
, may_exit_copy
);
940 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
942 wont_exit
, desc
->out_edge
, remove_edges
, &n_remove_edges
,
943 DLTHE_FLAG_UPDATE_FREQ
))
948 /* Remove the edges. */
949 for (i
= 0; i
< n_remove_edges
; i
++)
950 remove_path (loops
, remove_edges
[i
]);
954 fprintf (rtl_dump_file
,
955 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
956 max_unroll
, num_loop_insns (loop
));
959 /* Decide whether to simply peel LOOP and how much. */
961 decide_peel_simple (loops
, loop
, flags
)
968 if (!(flags
& UAP_PEEL
))
970 /* We were not asked to, just return back silently. */
975 fprintf (rtl_dump_file
, ";; Considering simply peeling loop\n");
977 /* npeel = number of iterations to peel. */
978 npeel
= PARAM_VALUE (PARAM_MAX_PEELED_INSNS
) / loop
->ninsns
;
979 if (npeel
> (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES
))
980 npeel
= PARAM_VALUE (PARAM_MAX_PEEL_TIMES
);
982 /* Skip big loops. */
986 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
990 /* Check for simple loops. */
992 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
994 /* Check number of iterations. */
995 if (loop
->simple
&& loop
->desc
.const_iter
)
998 fprintf (rtl_dump_file
, ";; Loop iterates constant times\n");
1002 /* Do not simply peel loops with branches inside -- it increases number
1004 if (loop
->desc
.n_branches
> 1)
1007 fprintf (rtl_dump_file
, ";; Not peeling, contains branches\n");
1011 if (loop
->header
->count
)
1013 unsigned niter
= expected_loop_iterations (loop
);
1014 if (niter
+ 1 > npeel
)
1018 fprintf (rtl_dump_file
, ";; Not peeling loop, rolls too much (");
1019 fprintf (rtl_dump_file
, HOST_WIDEST_INT_PRINT_DEC
, (HOST_WIDEST_INT
) (niter
+ 1));
1020 fprintf (rtl_dump_file
, " iterations > %d [maximum peelings])\n", npeel
);
1028 /* For now we have no good heuristics to decide whether loop peeling
1029 will be effective, so disable it. */
1031 fprintf (rtl_dump_file
,
1032 ";; Not peeling loop, no evidence it will be profitable\n");
1037 loop
->lpt_decision
.decision
= LPT_PEEL_SIMPLE
;
1038 loop
->lpt_decision
.times
= npeel
;
1041 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1047 if (!cond) goto end;
1049 if (!cond) goto end;
1056 peel_loop_simple (loops
, loop
)
1057 struct loops
*loops
;
1061 unsigned npeel
= loop
->lpt_decision
.times
;
1063 wont_exit
= sbitmap_alloc (npeel
+ 1);
1064 sbitmap_zero (wont_exit
);
1066 if (!duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
1067 loops
, npeel
, wont_exit
, NULL
, NULL
, NULL
,
1068 DLTHE_FLAG_UPDATE_FREQ
))
1074 fprintf (rtl_dump_file
, ";; Peeling loop %d times\n", npeel
);
1077 /* Decide whether to unroll LOOP stupidly and how much. */
1079 decide_unroll_stupid (loops
, loop
, flags
)
1080 struct loops
*loops
;
1084 unsigned nunroll
, nunroll_by_av
, i
;
1086 if (!(flags
& UAP_UNROLL_ALL
))
1088 /* We were not asked to, just return back silently. */
1093 fprintf (rtl_dump_file
, ";; Considering unrolling loop stupidly\n");
1095 /* nunroll = total number of copies of the original loop body in
1096 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1097 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
1098 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
1099 if (nunroll
> nunroll_by_av
)
1100 nunroll
= nunroll_by_av
;
1101 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
1102 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1104 /* Skip big loops. */
1108 fprintf (rtl_dump_file
, ";; Not considering loop, is too big\n");
1112 /* Check for simple loops. */
1113 if (!loop
->has_desc
)
1114 loop
->simple
= simple_loop_p (loops
, loop
, &loop
->desc
);
1116 /* Check simpleness. */
1120 fprintf (rtl_dump_file
, ";; The loop is simple\n");
1124 /* Do not unroll loops with branches inside -- it increases number
1126 if (loop
->desc
.n_branches
> 1)
1129 fprintf (rtl_dump_file
, ";; Not unrolling, contains branches\n");
1133 /* If we have profile feedback, check whether the loop rolls. */
1134 if (loop
->header
->count
&& expected_loop_iterations (loop
) < 2 * nunroll
)
1137 fprintf (rtl_dump_file
, ";; Not unrolling loop, doesn't roll\n");
1141 /* Success. Now force nunroll to be power of 2, as it seems that this
1142 improves results (partially because of better aligments, partially
1143 because of some dark magic). */
1144 for (i
= 1; 2 * i
<= nunroll
; i
*= 2);
1146 loop
->lpt_decision
.decision
= LPT_UNROLL_STUPID
;
1147 loop
->lpt_decision
.times
= i
- 1;
1150 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1168 unroll_loop_stupid (loops
, loop
)
1169 struct loops
*loops
;
1173 unsigned nunroll
= loop
->lpt_decision
.times
;
1175 wont_exit
= sbitmap_alloc (nunroll
+ 1);
1176 sbitmap_zero (wont_exit
);
1178 if (!duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1179 loops
, nunroll
, wont_exit
, NULL
, NULL
, NULL
,
1180 DLTHE_FLAG_UPDATE_FREQ
))
1186 fprintf (rtl_dump_file
, ";; Unrolled loop %d times, %i insns\n",
1187 nunroll
, num_loop_insns (loop
));