* config/arm/arm.h (REG_CLASS_NAMES): Add missing comma.
[official-gcc.git] / gcc / loop-unroll.c
blob6c796af577c836b31a8fa04416e800eed1aa9af5
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "cfglayout.h"
30 #include "params.h"
31 #include "output.h"
32 #include "expr.h"
34 /* This pass performs loop unrolling and peeling. We only perform these
35 optimizations on innermost loops (with single exception) because
36 the impact on performance is greatest here, and we want to avoid
37 unnecessary code size growth. The gain is caused by greater sequentiality
38 of code, better code to optimize for further passes and in some cases
39 by fewer testings of exit conditions. The main problem is code growth,
40 that impacts performance negatively due to effect of caches.
42 What we do:
44 -- complete peeling of once-rolling loops; this is the above mentioned
45 exception, as this causes loop to be cancelled completely and
46 does not cause code growth
47 -- complete peeling of loops that roll (small) constant times.
48 -- simple peeling of first iterations of loops that do not roll much
49 (according to profile feedback)
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll/peel.
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
69 static void decide_unrolling_and_peeling (struct loops *, int);
70 static void peel_loops_completely (struct loops *, int);
71 static void decide_peel_simple (struct loop *, int);
72 static void decide_peel_once_rolling (struct loop *, int);
73 static void decide_peel_completely (struct loop *, int);
74 static void decide_unroll_stupid (struct loop *, int);
75 static void decide_unroll_constant_iterations (struct loop *, int);
76 static void decide_unroll_runtime_iterations (struct loop *, int);
77 static void peel_loop_simple (struct loops *, struct loop *);
78 static void peel_loop_completely (struct loops *, struct loop *);
79 static void unroll_loop_stupid (struct loops *, struct loop *);
80 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
81 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
83 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
84 void
85 unroll_and_peel_loops (struct loops *loops, int flags)
87 struct loop *loop, *next;
88 int check;
90 /* First perform complete loop peeling (it is almost surely a win,
91 and affects parameters for further decision a lot). */
92 peel_loops_completely (loops, flags);
94 /* Now decide rest of unrolling and peeling. */
95 decide_unrolling_and_peeling (loops, flags);
97 loop = loops->tree_root;
98 while (loop->inner)
99 loop = loop->inner;
101 /* Scan the loops, inner ones first. */
102 while (loop != loops->tree_root)
104 if (loop->next)
106 next = loop->next;
107 while (next->inner)
108 next = next->inner;
110 else
111 next = loop->outer;
113 check = 1;
114 /* And perform the appropriate transformations. */
115 switch (loop->lpt_decision.decision)
117 case LPT_PEEL_COMPLETELY:
118 /* Already done. */
119 abort ();
120 case LPT_PEEL_SIMPLE:
121 peel_loop_simple (loops, loop);
122 break;
123 case LPT_UNROLL_CONSTANT:
124 unroll_loop_constant_iterations (loops, loop);
125 break;
126 case LPT_UNROLL_RUNTIME:
127 unroll_loop_runtime_iterations (loops, loop);
128 break;
129 case LPT_UNROLL_STUPID:
130 unroll_loop_stupid (loops, loop);
131 break;
132 case LPT_NONE:
133 check = 0;
134 break;
135 default:
136 abort ();
138 if (check)
140 #ifdef ENABLE_CHECKING
141 verify_dominators (CDI_DOMINATORS);
142 verify_loop_structure (loops);
143 #endif
145 loop = next;
149 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
150 static void
151 peel_loops_completely (struct loops *loops, int flags)
153 struct loop *loop, *next;
155 loop = loops->tree_root;
156 while (loop->inner)
157 loop = loop->inner;
159 while (loop != loops->tree_root)
161 if (loop->next)
163 next = loop->next;
164 while (next->inner)
165 next = next->inner;
167 else
168 next = loop->outer;
170 loop->lpt_decision.decision = LPT_NONE;
171 loop->has_desc = 0;
173 if (rtl_dump_file)
174 fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
175 loop->num);
177 loop->ninsns = num_loop_insns (loop);
179 decide_peel_once_rolling (loop, flags);
180 if (loop->lpt_decision.decision == LPT_NONE)
181 decide_peel_completely (loop, flags);
183 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
185 peel_loop_completely (loops, loop);
186 #ifdef ENABLE_CHECKING
187 verify_dominators (CDI_DOMINATORS);
188 verify_loop_structure (loops);
189 #endif
191 loop = next;
195 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
196 static void
197 decide_unrolling_and_peeling (struct loops *loops, int flags)
199 struct loop *loop = loops->tree_root, *next;
201 while (loop->inner)
202 loop = loop->inner;
204 /* Scan the loops, inner ones first. */
205 while (loop != loops->tree_root)
207 if (loop->next)
209 next = loop->next;
210 while (next->inner)
211 next = next->inner;
213 else
214 next = loop->outer;
216 loop->lpt_decision.decision = LPT_NONE;
218 if (rtl_dump_file)
219 fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
221 /* Do not peel cold areas. */
222 if (!maybe_hot_bb_p (loop->header))
224 if (rtl_dump_file)
225 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
226 loop = next;
227 continue;
230 /* Can the loop be manipulated? */
231 if (!can_duplicate_loop_p (loop))
233 if (rtl_dump_file)
234 fprintf (rtl_dump_file,
235 ";; Not considering loop, cannot duplicate\n");
236 loop = next;
237 continue;
240 /* Skip non-innermost loops. */
241 if (loop->inner)
243 if (rtl_dump_file)
244 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
245 loop = next;
246 continue;
249 loop->ninsns = num_loop_insns (loop);
250 loop->av_ninsns = average_num_loop_insns (loop);
252 /* Try transformations one by one in decreasing order of
253 priority. */
255 decide_unroll_constant_iterations (loop, flags);
256 if (loop->lpt_decision.decision == LPT_NONE)
257 decide_unroll_runtime_iterations (loop, flags);
258 if (loop->lpt_decision.decision == LPT_NONE)
259 decide_unroll_stupid (loop, flags);
260 if (loop->lpt_decision.decision == LPT_NONE)
261 decide_peel_simple (loop, flags);
263 loop = next;
267 /* Decide whether the LOOP is once rolling and suitable for complete
268 peeling. */
269 static void
270 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
272 if (rtl_dump_file)
273 fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
275 /* Is the loop small enough? */
276 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
278 if (rtl_dump_file)
279 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
280 return;
283 /* Check for simple loops. */
284 loop->simple = simple_loop_p (loop, &loop->desc);
285 loop->has_desc = 1;
287 /* Check number of iterations. */
288 if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
290 if (rtl_dump_file)
291 fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
292 return;
295 /* Success. */
296 if (rtl_dump_file)
297 fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n");
298 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
301 /* Decide whether the LOOP is suitable for complete peeling. */
302 static void
303 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
305 unsigned npeel;
307 if (rtl_dump_file)
308 fprintf (rtl_dump_file, ";; Considering peeling completely\n");
310 /* Skip non-innermost loops. */
311 if (loop->inner)
313 if (rtl_dump_file)
314 fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
315 return;
318 /* Do not peel cold areas. */
319 if (!maybe_hot_bb_p (loop->header))
321 if (rtl_dump_file)
322 fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
323 return;
326 /* Can the loop be manipulated? */
327 if (!can_duplicate_loop_p (loop))
329 if (rtl_dump_file)
330 fprintf (rtl_dump_file,
331 ";; Not considering loop, cannot duplicate\n");
332 return;
335 /* npeel = number of iterations to peel. */
336 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
337 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
338 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
340 /* Is the loop small enough? */
341 if (!npeel)
343 if (rtl_dump_file)
344 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
345 return;
348 /* Check for simple loops. */
349 if (!loop->has_desc)
351 loop->simple = simple_loop_p (loop, &loop->desc);
352 loop->has_desc = 1;
355 /* Check number of iterations. */
356 if (!loop->simple || !loop->desc.const_iter)
358 if (rtl_dump_file)
359 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
360 return;
363 if (loop->desc.niter > npeel - 1)
365 if (rtl_dump_file)
367 fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
368 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
369 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
371 return;
374 /* Success. */
375 if (rtl_dump_file)
376 fprintf (rtl_dump_file, ";; Decided to peel loop completely\n");
377 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
380 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
381 completely. The transformation done:
383 for (i = 0; i < 4; i++)
384 body;
388 i = 0;
389 body; i++;
390 body; i++;
391 body; i++;
392 body; i++;
394 static void
395 peel_loop_completely (struct loops *loops, struct loop *loop)
397 sbitmap wont_exit;
398 unsigned HOST_WIDE_INT npeel;
399 unsigned n_remove_edges, i;
400 edge *remove_edges;
401 struct loop_desc *desc = &loop->desc;
403 npeel = desc->niter;
405 if (npeel)
407 wont_exit = sbitmap_alloc (npeel + 1);
408 sbitmap_ones (wont_exit);
409 RESET_BIT (wont_exit, 0);
410 if (desc->may_be_zero)
411 RESET_BIT (wont_exit, 1);
413 remove_edges = xcalloc (npeel, sizeof (edge));
414 n_remove_edges = 0;
416 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
417 loops, npeel,
418 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
419 DLTHE_FLAG_UPDATE_FREQ))
420 abort ();
422 free (wont_exit);
424 /* Remove the exit edges. */
425 for (i = 0; i < n_remove_edges; i++)
426 remove_path (loops, remove_edges[i]);
427 free (remove_edges);
430 /* Now remove the unreachable part of the last iteration and cancel
431 the loop. */
432 remove_path (loops, desc->in_edge);
434 if (rtl_dump_file)
435 fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
438 /* Decide whether to unroll LOOP iterating constant number of times and how much. */
439 static void
440 decide_unroll_constant_iterations (struct loop *loop, int flags)
442 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
444 if (!(flags & UAP_UNROLL))
446 /* We were not asked to, just return back silently. */
447 return;
450 if (rtl_dump_file)
451 fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
453 /* nunroll = total number of copies of the original loop body in
454 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
455 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
456 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
457 if (nunroll > nunroll_by_av)
458 nunroll = nunroll_by_av;
459 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
460 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
462 /* Skip big loops. */
463 if (nunroll <= 1)
465 if (rtl_dump_file)
466 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
467 return;
470 /* Check for simple loops. */
471 if (!loop->has_desc)
473 loop->simple = simple_loop_p (loop, &loop->desc);
474 loop->has_desc = 1;
477 /* Check number of iterations. */
478 if (!loop->simple || !loop->desc.const_iter)
480 if (rtl_dump_file)
481 fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
482 return;
485 /* Check whether the loop rolls enough to consider. */
486 if (loop->desc.niter < 2 * nunroll)
488 if (rtl_dump_file)
489 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
490 return;
493 /* Success; now compute number of iterations to unroll. We alter
494 nunroll so that as few as possible copies of loop body are
495 necessary, while still not decreasing the number of unrollings
496 too much (at most by 1). */
497 best_copies = 2 * nunroll + 10;
499 i = 2 * nunroll + 2;
500 if ((unsigned) i - 1 >= loop->desc.niter)
501 i = loop->desc.niter - 2;
503 for (; i >= nunroll - 1; i--)
505 unsigned exit_mod = loop->desc.niter % (i + 1);
507 if (loop->desc.postincr)
508 n_copies = exit_mod + i + 1;
509 else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
510 n_copies = exit_mod + i + 2;
511 else
512 n_copies = i + 1;
514 if (n_copies < best_copies)
516 best_copies = n_copies;
517 best_unroll = i;
521 if (rtl_dump_file)
522 fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
523 best_unroll + 1, best_copies, nunroll);
525 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
526 loop->lpt_decision.times = best_unroll;
529 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
530 times. The transformation does this:
532 for (i = 0; i < 102; i++)
533 body;
537 i = 0;
538 body; i++;
539 body; i++;
540 while (i < 102)
542 body; i++;
543 body; i++;
544 body; i++;
545 body; i++;
548 static void
549 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
551 unsigned HOST_WIDE_INT niter;
552 unsigned exit_mod;
553 sbitmap wont_exit;
554 unsigned n_remove_edges, i;
555 edge *remove_edges;
556 unsigned max_unroll = loop->lpt_decision.times;
557 struct loop_desc *desc = &loop->desc;
559 niter = desc->niter;
561 if (niter <= (unsigned) max_unroll + 1)
562 abort (); /* Should not get here (such loop should be peeled instead). */
564 exit_mod = niter % (max_unroll + 1);
566 wont_exit = sbitmap_alloc (max_unroll + 1);
567 sbitmap_ones (wont_exit);
569 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
570 n_remove_edges = 0;
572 if (desc->postincr)
574 /* Counter is incremented after the exit test; leave exit test
575 in the first copy, so that the loops that start with test
576 of exit condition have continuous body after unrolling. */
578 if (rtl_dump_file)
579 fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n");
581 /* Peel exit_mod iterations. */
582 RESET_BIT (wont_exit, 0);
583 if (desc->may_be_zero)
584 RESET_BIT (wont_exit, 1);
586 if (exit_mod
587 && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
588 loops, exit_mod,
589 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
590 DLTHE_FLAG_UPDATE_FREQ))
591 abort ();
593 SET_BIT (wont_exit, 1);
595 else
597 /* Leave exit test in last copy, for the same reason as above if
598 the loop tests the condition at the end of loop body. */
600 if (rtl_dump_file)
601 fprintf (rtl_dump_file, ";; Condition on end of loop.\n");
603 /* We know that niter >= max_unroll + 2; so we do not need to care of
604 case when we would exit before reaching the loop. So just peel
605 exit_mod + 1 iterations.
607 if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
609 RESET_BIT (wont_exit, 0);
610 if (desc->may_be_zero)
611 RESET_BIT (wont_exit, 1);
613 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
614 loops, exit_mod + 1,
615 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
616 DLTHE_FLAG_UPDATE_FREQ))
617 abort ();
619 SET_BIT (wont_exit, 0);
620 SET_BIT (wont_exit, 1);
623 RESET_BIT (wont_exit, max_unroll);
626 /* Now unroll the loop. */
627 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
628 loops, max_unroll,
629 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
630 DLTHE_FLAG_UPDATE_FREQ))
631 abort ();
633 free (wont_exit);
635 /* Remove the edges. */
636 for (i = 0; i < n_remove_edges; i++)
637 remove_path (loops, remove_edges[i]);
638 free (remove_edges);
640 if (rtl_dump_file)
641 fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop));
644 /* Decide whether to unroll LOOP iterating runtime computable number of times
645 and how much. */
646 static void
647 decide_unroll_runtime_iterations (struct loop *loop, int flags)
649 unsigned nunroll, nunroll_by_av, i;
651 if (!(flags & UAP_UNROLL))
653 /* We were not asked to, just return back silently. */
654 return;
657 if (rtl_dump_file)
658 fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
660 /* nunroll = total number of copies of the original loop body in
661 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
662 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
663 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
664 if (nunroll > nunroll_by_av)
665 nunroll = nunroll_by_av;
666 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
667 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
669 /* Skip big loops. */
670 if (nunroll <= 1)
672 if (rtl_dump_file)
673 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
674 return;
677 /* Check for simple loops. */
678 if (!loop->has_desc)
680 loop->simple = simple_loop_p (loop, &loop->desc);
681 loop->has_desc = 1;
684 /* Check simpleness. */
685 if (!loop->simple)
687 if (rtl_dump_file)
688 fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
689 return;
692 if (loop->desc.const_iter)
694 if (rtl_dump_file)
695 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
696 return;
699 /* If we have profile feedback, check whether the loop rolls. */
700 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
702 if (rtl_dump_file)
703 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
704 return;
707 /* Success; now force nunroll to be power of 2, as we are unable to
708 cope with overflows in computation of number of iterations. */
709 for (i = 1; 2 * i <= nunroll; i *= 2);
711 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
712 loop->lpt_decision.times = i - 1;
715 /* Unroll LOOP for that we are able to count number of iterations in runtime
716 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
717 extra care for case n < 0):
719 for (i = 0; i < n; i++)
720 body;
724 i = 0;
725 mod = n % 4;
727 switch (mod)
729 case 3:
730 body; i++;
731 case 2:
732 body; i++;
733 case 1:
734 body; i++;
735 case 0: ;
738 while (i < n)
740 body; i++;
741 body; i++;
742 body; i++;
743 body; i++;
746 static void
747 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
749 rtx niter, init_code, branch_code, jump, label;
750 unsigned i, j, p;
751 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
752 unsigned n_dom_bbs;
753 sbitmap wont_exit;
754 int may_exit_copy;
755 unsigned n_peel, n_remove_edges;
756 edge *remove_edges, e;
757 bool extra_zero_check, last_may_exit;
758 unsigned max_unroll = loop->lpt_decision.times;
759 struct loop_desc *desc = &loop->desc;
761 /* Remember blocks whose dominators will have to be updated. */
762 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
763 n_dom_bbs = 0;
765 body = get_loop_body (loop);
766 for (i = 0; i < loop->num_nodes; i++)
768 unsigned nldom;
769 basic_block *ldom;
771 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
772 for (j = 0; j < nldom; j++)
773 if (!flow_bb_inside_loop_p (loop, ldom[j]))
774 dom_bbs[n_dom_bbs++] = ldom[j];
776 free (ldom);
778 free (body);
780 if (desc->postincr)
782 /* Leave exit in first copy (for explanation why see comment in
783 unroll_loop_constant_iterations). */
784 may_exit_copy = 0;
785 n_peel = max_unroll - 1;
786 extra_zero_check = true;
787 last_may_exit = false;
789 else
791 /* Leave exit in last copy (for explanation why see comment in
792 unroll_loop_constant_iterations). */
793 may_exit_copy = max_unroll;
794 n_peel = max_unroll;
795 extra_zero_check = false;
796 last_may_exit = true;
799 /* Get expression for number of iterations. */
800 start_sequence ();
801 niter = count_loop_iterations (desc, NULL, NULL);
802 if (!niter)
803 abort ();
804 niter = force_operand (niter, NULL);
806 /* Count modulo by ANDing it with max_unroll; we use the fact that
807 the number of unrollings is a power of two, and thus this is correct
808 even if there is overflow in the computation. */
809 niter = expand_simple_binop (GET_MODE (desc->var), AND,
810 niter,
811 GEN_INT (max_unroll),
812 NULL_RTX, 0, OPTAB_LIB_WIDEN);
814 init_code = get_insns ();
815 end_sequence ();
817 /* Precondition the loop. */
818 loop_split_edge_with (loop_preheader_edge (loop), init_code);
820 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
821 n_remove_edges = 0;
823 wont_exit = sbitmap_alloc (max_unroll + 2);
825 /* Peel the first copy of loop body (almost always we must leave exit test
826 here; the only exception is when we have extra zero check and the number
827 of iterations is reliable (i.e. comes out of NE condition). Also record
828 the place of (possible) extra zero check. */
829 sbitmap_zero (wont_exit);
830 if (extra_zero_check && desc->cond == NE)
831 SET_BIT (wont_exit, 1);
832 ezc_swtch = loop_preheader_edge (loop)->src;
833 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
834 loops, 1,
835 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
836 DLTHE_FLAG_UPDATE_FREQ))
837 abort ();
839 /* Record the place where switch will be built for preconditioning. */
840 swtch = loop_split_edge_with (loop_preheader_edge (loop),
841 NULL_RTX);
843 for (i = 0; i < n_peel; i++)
845 /* Peel the copy. */
846 sbitmap_zero (wont_exit);
847 if (i != n_peel - 1 || !last_may_exit)
848 SET_BIT (wont_exit, 1);
849 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
850 loops, 1,
851 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
852 DLTHE_FLAG_UPDATE_FREQ))
853 abort ();
855 /* Create item for switch. */
856 j = n_peel - i - (extra_zero_check ? 0 : 1);
857 p = REG_BR_PROB_BASE / (i + 2);
859 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
860 label = block_label (preheader);
861 start_sequence ();
862 do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
863 GET_MODE (desc->var), NULL_RTX, NULL_RTX,
864 label);
865 jump = get_last_insn ();
866 JUMP_LABEL (jump) = label;
867 REG_NOTES (jump)
868 = gen_rtx_EXPR_LIST (REG_BR_PROB,
869 GEN_INT (p), REG_NOTES (jump));
871 LABEL_NUSES (label)++;
872 branch_code = get_insns ();
873 end_sequence ();
875 swtch = loop_split_edge_with (swtch->pred, branch_code);
876 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
877 swtch->succ->probability = REG_BR_PROB_BASE - p;
878 e = make_edge (swtch, preheader,
879 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
880 e->probability = p;
883 if (extra_zero_check)
885 /* Add branch for zero iterations. */
886 p = REG_BR_PROB_BASE / (max_unroll + 1);
887 swtch = ezc_swtch;
888 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
889 label = block_label (preheader);
890 start_sequence ();
891 do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
892 GET_MODE (desc->var), NULL_RTX, NULL_RTX,
893 label);
894 jump = get_last_insn ();
895 JUMP_LABEL (jump) = label;
896 REG_NOTES (jump)
897 = gen_rtx_EXPR_LIST (REG_BR_PROB,
898 GEN_INT (p), REG_NOTES (jump));
900 LABEL_NUSES (label)++;
901 branch_code = get_insns ();
902 end_sequence ();
904 swtch = loop_split_edge_with (swtch->succ, branch_code);
905 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
906 swtch->succ->probability = REG_BR_PROB_BASE - p;
907 e = make_edge (swtch, preheader,
908 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
909 e->probability = p;
912 /* Recount dominators for outer blocks. */
913 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
915 /* And unroll loop. */
917 sbitmap_ones (wont_exit);
918 RESET_BIT (wont_exit, may_exit_copy);
920 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
921 loops, max_unroll,
922 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
923 DLTHE_FLAG_UPDATE_FREQ))
924 abort ();
926 free (wont_exit);
928 /* Remove the edges. */
929 for (i = 0; i < n_remove_edges; i++)
930 remove_path (loops, remove_edges[i]);
931 free (remove_edges);
933 if (rtl_dump_file)
934 fprintf (rtl_dump_file,
935 ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
936 max_unroll, num_loop_insns (loop));
939 /* Decide whether to simply peel LOOP and how much. */
940 static void
941 decide_peel_simple (struct loop *loop, int flags)
943 unsigned npeel;
945 if (!(flags & UAP_PEEL))
947 /* We were not asked to, just return back silently. */
948 return;
951 if (rtl_dump_file)
952 fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
954 /* npeel = number of iterations to peel. */
955 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
956 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
957 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
959 /* Skip big loops. */
960 if (!npeel)
962 if (rtl_dump_file)
963 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
964 return;
967 /* Check for simple loops. */
968 if (!loop->has_desc)
970 loop->simple = simple_loop_p (loop, &loop->desc);
971 loop->has_desc = 1;
974 /* Check number of iterations. */
975 if (loop->simple && loop->desc.const_iter)
977 if (rtl_dump_file)
978 fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
979 return;
982 /* Do not simply peel loops with branches inside -- it increases number
983 of mispredicts. */
984 if (loop->desc.n_branches > 1)
986 if (rtl_dump_file)
987 fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
988 return;
991 if (loop->header->count)
993 unsigned niter = expected_loop_iterations (loop);
994 if (niter + 1 > npeel)
996 if (rtl_dump_file)
998 fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much (");
999 fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1));
1000 fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
1002 return;
1004 npeel = niter + 1;
1006 else
1008 /* For now we have no good heuristics to decide whether loop peeling
1009 will be effective, so disable it. */
1010 if (rtl_dump_file)
1011 fprintf (rtl_dump_file,
1012 ";; Not peeling loop, no evidence it will be profitable\n");
1013 return;
1016 /* Success. */
1017 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1018 loop->lpt_decision.times = npeel;
1021 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1022 while (cond)
1023 body;
1027 if (!cond) goto end;
1028 body;
1029 if (!cond) goto end;
1030 body;
1031 while (cond)
1032 body;
1033 end: ;
1035 static void
1036 peel_loop_simple (struct loops *loops, struct loop *loop)
1038 sbitmap wont_exit;
1039 unsigned npeel = loop->lpt_decision.times;
1041 wont_exit = sbitmap_alloc (npeel + 1);
1042 sbitmap_zero (wont_exit);
1044 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1045 loops, npeel, wont_exit, NULL, NULL, NULL,
1046 DLTHE_FLAG_UPDATE_FREQ))
1047 abort ();
1049 free (wont_exit);
1051 if (rtl_dump_file)
1052 fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
1055 /* Decide whether to unroll LOOP stupidly and how much. */
1056 static void
1057 decide_unroll_stupid (struct loop *loop, int flags)
1059 unsigned nunroll, nunroll_by_av, i;
1061 if (!(flags & UAP_UNROLL_ALL))
1063 /* We were not asked to, just return back silently. */
1064 return;
1067 if (rtl_dump_file)
1068 fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
1070 /* nunroll = total number of copies of the original loop body in
1071 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1072 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1073 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1074 if (nunroll > nunroll_by_av)
1075 nunroll = nunroll_by_av;
1076 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1077 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1079 /* Skip big loops. */
1080 if (nunroll <= 1)
1082 if (rtl_dump_file)
1083 fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1084 return;
1087 /* Check for simple loops. */
1088 if (!loop->has_desc)
1090 loop->simple = simple_loop_p (loop, &loop->desc);
1091 loop->has_desc = 1;
1094 /* Check simpleness. */
1095 if (loop->simple)
1097 if (rtl_dump_file)
1098 fprintf (rtl_dump_file, ";; The loop is simple\n");
1099 return;
1102 /* Do not unroll loops with branches inside -- it increases number
1103 of mispredicts. */
1104 if (loop->desc.n_branches > 1)
1106 if (rtl_dump_file)
1107 fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
1108 return;
1111 /* If we have profile feedback, check whether the loop rolls. */
1112 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
1114 if (rtl_dump_file)
1115 fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
1116 return;
1119 /* Success. Now force nunroll to be power of 2, as it seems that this
1120 improves results (partially because of better alignments, partially
1121 because of some dark magic). */
1122 for (i = 1; 2 * i <= nunroll; i *= 2);
1124 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1125 loop->lpt_decision.times = i - 1;
1128 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1129 while (cond)
1130 body;
1134 while (cond)
1136 body;
1137 if (!cond) break;
1138 body;
1139 if (!cond) break;
1140 body;
1141 if (!cond) break;
1142 body;
1145 static void
1146 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1148 sbitmap wont_exit;
1149 unsigned nunroll = loop->lpt_decision.times;
1151 wont_exit = sbitmap_alloc (nunroll + 1);
1152 sbitmap_zero (wont_exit);
1154 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1155 loops, nunroll, wont_exit, NULL, NULL, NULL,
1156 DLTHE_FLAG_UPDATE_FREQ))
1157 abort ();
1159 free (wont_exit);
1161 if (rtl_dump_file)
1162 fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
1163 nunroll, num_loop_insns (loop));