* config/stormy16/stormy16.c (xstormy16_asm_output_aligned_common):
[official-gcc.git] / gcc / loop-unroll.c
blobf0e95dc76222a0298244931bc2b5cdebd6879509
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "cfglayout.h"
30 #include "params.h"
31 #include "output.h"
32 #include "expr.h"
33 #include "hashtab.h"
34 #include "recog.h"
36 /* This pass performs loop unrolling and peeling. We only perform these
37 optimizations on innermost loops (with single exception) because
38 the impact on performance is greatest here, and we want to avoid
39 unnecessary code size growth. The gain is caused by greater sequentiality
40 of code, better code to optimize for further passes and in some cases
41 by fewer testings of exit conditions. The main problem is code growth,
42 that impacts performance negatively due to effect of caches.
44 What we do:
46 -- complete peeling of once-rolling loops; this is the above mentioned
47 exception, as this causes loop to be cancelled completely and
48 does not cause code growth
49 -- complete peeling of loops that roll (small) constant times.
50 -- simple peeling of first iterations of loops that do not roll much
51 (according to profile feedback)
52 -- unrolling of loops that roll constant times; this is almost always
53 win, as we get rid of exit condition tests.
54 -- unrolling of loops that roll number of times that we can compute
55 in runtime; we also get rid of exit condition tests here, but there
56 is the extra expense for calculating the number of iterations
57 -- simple unrolling of remaining loops; this is performed only if we
58 are asked to, as the gain is questionable in this case and often
59 it may even slow down the code
60 For more detailed descriptions of each of those, see comments at
61 appropriate function below.
63 There is a lot of parameters (defined and described in params.def) that
64 control how much we unroll/peel.
66 ??? A great problem is that we don't have a good way how to determine
67 how many times we should unroll the loop; the experiments I have made
68 showed that this choice may affect performance in order of several %.
71 /* Information about induction variables to split. */
73 struct iv_to_split
75 rtx insn; /* The insn in that the induction variable occurs. */
76 rtx base_var; /* The variable on that the values in the further
77 iterations are based. */
78 rtx step; /* Step of the induction variable. */
79 unsigned n_loc;
80 unsigned loc[3]; /* Location where the definition of the induction
81 variable occurs in the insn. For example if
82 N_LOC is 2, the expression is located at
83 XEXP (XEXP (single_set, loc[0]), loc[1]). */
86 struct split_ivs_info
88 htab_t insns_to_split; /* A hashtable of insns to split. */
89 unsigned first_new_block; /* The first basic block that was
90 duplicated. */
93 static void decide_unrolling_and_peeling (struct loops *, int);
94 static void peel_loops_completely (struct loops *, int);
95 static void decide_peel_simple (struct loop *, int);
96 static void decide_peel_once_rolling (struct loop *, int);
97 static void decide_peel_completely (struct loop *, int);
98 static void decide_unroll_stupid (struct loop *, int);
99 static void decide_unroll_constant_iterations (struct loop *, int);
100 static void decide_unroll_runtime_iterations (struct loop *, int);
101 static void peel_loop_simple (struct loops *, struct loop *);
102 static void peel_loop_completely (struct loops *, struct loop *);
103 static void unroll_loop_stupid (struct loops *, struct loop *);
104 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
105 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
106 static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
107 static void si_info_start_duplication (struct split_ivs_info *);
108 static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
109 static void free_si_info (struct split_ivs_info *);
111 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
112 void
113 unroll_and_peel_loops (struct loops *loops, int flags)
115 struct loop *loop, *next;
116 bool check;
118 /* First perform complete loop peeling (it is almost surely a win,
119 and affects parameters for further decision a lot). */
120 peel_loops_completely (loops, flags);
122 /* Now decide rest of unrolling and peeling. */
123 decide_unrolling_and_peeling (loops, flags);
125 loop = loops->tree_root;
126 while (loop->inner)
127 loop = loop->inner;
129 /* Scan the loops, inner ones first. */
130 while (loop != loops->tree_root)
132 if (loop->next)
134 next = loop->next;
135 while (next->inner)
136 next = next->inner;
138 else
139 next = loop->outer;
141 check = true;
142 /* And perform the appropriate transformations. */
143 switch (loop->lpt_decision.decision)
145 case LPT_PEEL_COMPLETELY:
146 /* Already done. */
147 gcc_unreachable ();
148 case LPT_PEEL_SIMPLE:
149 peel_loop_simple (loops, loop);
150 break;
151 case LPT_UNROLL_CONSTANT:
152 unroll_loop_constant_iterations (loops, loop);
153 break;
154 case LPT_UNROLL_RUNTIME:
155 unroll_loop_runtime_iterations (loops, loop);
156 break;
157 case LPT_UNROLL_STUPID:
158 unroll_loop_stupid (loops, loop);
159 break;
160 case LPT_NONE:
161 check = false;
162 break;
163 default:
164 gcc_unreachable ();
166 if (check)
168 #ifdef ENABLE_CHECKING
169 verify_dominators (CDI_DOMINATORS);
170 verify_loop_structure (loops);
171 #endif
173 loop = next;
176 iv_analysis_done ();
179 /* Check whether exit of the LOOP is at the end of loop body. */
181 static bool
182 loop_exit_at_end_p (struct loop *loop)
184 struct niter_desc *desc = get_simple_loop_desc (loop);
185 rtx insn;
187 if (desc->in_edge->dest != loop->latch)
188 return false;
190 /* Check that the latch is empty. */
191 FOR_BB_INSNS (loop->latch, insn)
193 if (INSN_P (insn))
194 return false;
197 return true;
200 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
201 static void
202 peel_loops_completely (struct loops *loops, int flags)
204 struct loop *loop, *next;
206 loop = loops->tree_root;
207 while (loop->inner)
208 loop = loop->inner;
210 while (loop != loops->tree_root)
212 if (loop->next)
214 next = loop->next;
215 while (next->inner)
216 next = next->inner;
218 else
219 next = loop->outer;
221 loop->lpt_decision.decision = LPT_NONE;
223 if (dump_file)
224 fprintf (dump_file,
225 "\n;; *** Considering loop %d for complete peeling ***\n",
226 loop->num);
228 loop->ninsns = num_loop_insns (loop);
230 decide_peel_once_rolling (loop, flags);
231 if (loop->lpt_decision.decision == LPT_NONE)
232 decide_peel_completely (loop, flags);
234 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
236 peel_loop_completely (loops, loop);
237 #ifdef ENABLE_CHECKING
238 verify_dominators (CDI_DOMINATORS);
239 verify_loop_structure (loops);
240 #endif
242 loop = next;
246 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */
247 static void
248 decide_unrolling_and_peeling (struct loops *loops, int flags)
250 struct loop *loop = loops->tree_root, *next;
252 while (loop->inner)
253 loop = loop->inner;
255 /* Scan the loops, inner ones first. */
256 while (loop != loops->tree_root)
258 if (loop->next)
260 next = loop->next;
261 while (next->inner)
262 next = next->inner;
264 else
265 next = loop->outer;
267 loop->lpt_decision.decision = LPT_NONE;
269 if (dump_file)
270 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
272 /* Do not peel cold areas. */
273 if (!maybe_hot_bb_p (loop->header))
275 if (dump_file)
276 fprintf (dump_file, ";; Not considering loop, cold area\n");
277 loop = next;
278 continue;
281 /* Can the loop be manipulated? */
282 if (!can_duplicate_loop_p (loop))
284 if (dump_file)
285 fprintf (dump_file,
286 ";; Not considering loop, cannot duplicate\n");
287 loop = next;
288 continue;
291 /* Skip non-innermost loops. */
292 if (loop->inner)
294 if (dump_file)
295 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
296 loop = next;
297 continue;
300 loop->ninsns = num_loop_insns (loop);
301 loop->av_ninsns = average_num_loop_insns (loop);
303 /* Try transformations one by one in decreasing order of
304 priority. */
306 decide_unroll_constant_iterations (loop, flags);
307 if (loop->lpt_decision.decision == LPT_NONE)
308 decide_unroll_runtime_iterations (loop, flags);
309 if (loop->lpt_decision.decision == LPT_NONE)
310 decide_unroll_stupid (loop, flags);
311 if (loop->lpt_decision.decision == LPT_NONE)
312 decide_peel_simple (loop, flags);
314 loop = next;
318 /* Decide whether the LOOP is once rolling and suitable for complete
319 peeling. */
320 static void
321 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
323 struct niter_desc *desc;
325 if (dump_file)
326 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
328 /* Is the loop small enough? */
329 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
331 if (dump_file)
332 fprintf (dump_file, ";; Not considering loop, is too big\n");
333 return;
336 /* Check for simple loops. */
337 desc = get_simple_loop_desc (loop);
339 /* Check number of iterations. */
340 if (!desc->simple_p
341 || desc->assumptions
342 || !desc->const_iter
343 || desc->niter != 0)
345 if (dump_file)
346 fprintf (dump_file,
347 ";; Unable to prove that the loop rolls exactly once\n");
348 return;
351 /* Success. */
352 if (dump_file)
353 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
354 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
357 /* Decide whether the LOOP is suitable for complete peeling. */
358 static void
359 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
361 unsigned npeel;
362 struct niter_desc *desc;
364 if (dump_file)
365 fprintf (dump_file, "\n;; Considering peeling completely\n");
367 /* Skip non-innermost loops. */
368 if (loop->inner)
370 if (dump_file)
371 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
372 return;
375 /* Do not peel cold areas. */
376 if (!maybe_hot_bb_p (loop->header))
378 if (dump_file)
379 fprintf (dump_file, ";; Not considering loop, cold area\n");
380 return;
383 /* Can the loop be manipulated? */
384 if (!can_duplicate_loop_p (loop))
386 if (dump_file)
387 fprintf (dump_file,
388 ";; Not considering loop, cannot duplicate\n");
389 return;
392 /* npeel = number of iterations to peel. */
393 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
394 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
395 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
397 /* Is the loop small enough? */
398 if (!npeel)
400 if (dump_file)
401 fprintf (dump_file, ";; Not considering loop, is too big\n");
402 return;
405 /* Check for simple loops. */
406 desc = get_simple_loop_desc (loop);
408 /* Check number of iterations. */
409 if (!desc->simple_p
410 || desc->assumptions
411 || !desc->const_iter)
413 if (dump_file)
414 fprintf (dump_file,
415 ";; Unable to prove that the loop iterates constant times\n");
416 return;
419 if (desc->niter > npeel - 1)
421 if (dump_file)
423 fprintf (dump_file,
424 ";; Not peeling loop completely, rolls too much (");
425 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
426 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
428 return;
431 /* Success. */
432 if (dump_file)
433 fprintf (dump_file, ";; Decided to peel loop completely\n");
434 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
437 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
438 completely. The transformation done:
440 for (i = 0; i < 4; i++)
441 body;
445 i = 0;
446 body; i++;
447 body; i++;
448 body; i++;
449 body; i++;
451 static void
452 peel_loop_completely (struct loops *loops, struct loop *loop)
454 sbitmap wont_exit;
455 unsigned HOST_WIDE_INT npeel;
456 unsigned n_remove_edges, i;
457 edge *remove_edges, ei;
458 struct niter_desc *desc = get_simple_loop_desc (loop);
459 struct split_ivs_info *si_info = NULL;
461 npeel = desc->niter;
463 if (npeel)
465 wont_exit = sbitmap_alloc (npeel + 1);
466 sbitmap_ones (wont_exit);
467 RESET_BIT (wont_exit, 0);
468 if (desc->noloop_assumptions)
469 RESET_BIT (wont_exit, 1);
471 remove_edges = xcalloc (npeel, sizeof (edge));
472 n_remove_edges = 0;
474 if (flag_split_ivs_in_unroller)
475 si_info = analyze_ivs_to_split (loop);
477 si_info_start_duplication (si_info);
478 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
479 loops, npeel,
480 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
481 DLTHE_FLAG_UPDATE_FREQ))
482 abort ();
484 free (wont_exit);
486 if (si_info)
488 split_ivs_in_copies (si_info, npeel, false, true);
489 free_si_info (si_info);
492 /* Remove the exit edges. */
493 for (i = 0; i < n_remove_edges; i++)
494 remove_path (loops, remove_edges[i]);
495 free (remove_edges);
498 ei = desc->in_edge;
499 free_simple_loop_desc (loop);
501 /* Now remove the unreachable part of the last iteration and cancel
502 the loop. */
503 remove_path (loops, ei);
505 if (dump_file)
506 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
509 /* Decide whether to unroll LOOP iterating constant number of times
510 and how much. */
512 static void
513 decide_unroll_constant_iterations (struct loop *loop, int flags)
515 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
516 struct niter_desc *desc;
518 if (!(flags & UAP_UNROLL))
520 /* We were not asked to, just return back silently. */
521 return;
524 if (dump_file)
525 fprintf (dump_file,
526 "\n;; Considering unrolling loop with constant "
527 "number of iterations\n");
529 /* nunroll = total number of copies of the original loop body in
530 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
531 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
532 nunroll_by_av
533 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
534 if (nunroll > nunroll_by_av)
535 nunroll = nunroll_by_av;
536 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
537 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
539 /* Skip big loops. */
540 if (nunroll <= 1)
542 if (dump_file)
543 fprintf (dump_file, ";; Not considering loop, is too big\n");
544 return;
547 /* Check for simple loops. */
548 desc = get_simple_loop_desc (loop);
550 /* Check number of iterations. */
551 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
553 if (dump_file)
554 fprintf (dump_file,
555 ";; Unable to prove that the loop iterates constant times\n");
556 return;
559 /* Check whether the loop rolls enough to consider. */
560 if (desc->niter < 2 * nunroll)
562 if (dump_file)
563 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
564 return;
567 /* Success; now compute number of iterations to unroll. We alter
568 nunroll so that as few as possible copies of loop body are
569 necessary, while still not decreasing the number of unrollings
570 too much (at most by 1). */
571 best_copies = 2 * nunroll + 10;
573 i = 2 * nunroll + 2;
574 if (i - 1 >= desc->niter)
575 i = desc->niter - 2;
577 for (; i >= nunroll - 1; i--)
579 unsigned exit_mod = desc->niter % (i + 1);
581 if (!loop_exit_at_end_p (loop))
582 n_copies = exit_mod + i + 1;
583 else if (exit_mod != (unsigned) i
584 || desc->noloop_assumptions != NULL_RTX)
585 n_copies = exit_mod + i + 2;
586 else
587 n_copies = i + 1;
589 if (n_copies < best_copies)
591 best_copies = n_copies;
592 best_unroll = i;
596 if (dump_file)
597 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
598 best_unroll + 1, best_copies, nunroll);
600 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
601 loop->lpt_decision.times = best_unroll;
603 if (dump_file)
604 fprintf (dump_file,
605 ";; Decided to unroll the constant times rolling loop, %d times.\n",
606 loop->lpt_decision.times);
609 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
610 times. The transformation does this:
612 for (i = 0; i < 102; i++)
613 body;
617 i = 0;
618 body; i++;
619 body; i++;
620 while (i < 102)
622 body; i++;
623 body; i++;
624 body; i++;
625 body; i++;
628 static void
629 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
631 unsigned HOST_WIDE_INT niter;
632 unsigned exit_mod;
633 sbitmap wont_exit;
634 unsigned n_remove_edges, i;
635 edge *remove_edges;
636 unsigned max_unroll = loop->lpt_decision.times;
637 struct niter_desc *desc = get_simple_loop_desc (loop);
638 bool exit_at_end = loop_exit_at_end_p (loop);
639 struct split_ivs_info *si_info = NULL;
641 niter = desc->niter;
643 /* Should not get here (such loop should be peeled instead). */
644 gcc_assert (niter > max_unroll + 1);
646 exit_mod = niter % (max_unroll + 1);
648 wont_exit = sbitmap_alloc (max_unroll + 1);
649 sbitmap_ones (wont_exit);
651 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
652 n_remove_edges = 0;
654 if (flag_split_ivs_in_unroller)
655 si_info = analyze_ivs_to_split (loop);
657 if (!exit_at_end)
659 /* The exit is not at the end of the loop; leave exit test
660 in the first copy, so that the loops that start with test
661 of exit condition have continuous body after unrolling. */
663 if (dump_file)
664 fprintf (dump_file, ";; Condition on beginning of loop.\n");
666 /* Peel exit_mod iterations. */
667 RESET_BIT (wont_exit, 0);
668 if (desc->noloop_assumptions)
669 RESET_BIT (wont_exit, 1);
671 if (exit_mod)
673 si_info_start_duplication (si_info);
674 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
675 loops, exit_mod,
676 wont_exit, desc->out_edge,
677 remove_edges, &n_remove_edges,
678 DLTHE_FLAG_UPDATE_FREQ))
679 abort ();
681 if (si_info && exit_mod > 1)
682 split_ivs_in_copies (si_info, exit_mod, false, false);
684 desc->noloop_assumptions = NULL_RTX;
685 desc->niter -= exit_mod;
686 desc->niter_max -= exit_mod;
689 SET_BIT (wont_exit, 1);
691 else
693 /* Leave exit test in last copy, for the same reason as above if
694 the loop tests the condition at the end of loop body. */
696 if (dump_file)
697 fprintf (dump_file, ";; Condition on end of loop.\n");
699 /* We know that niter >= max_unroll + 2; so we do not need to care of
700 case when we would exit before reaching the loop. So just peel
701 exit_mod + 1 iterations. */
702 if (exit_mod != max_unroll
703 || desc->noloop_assumptions)
705 RESET_BIT (wont_exit, 0);
706 if (desc->noloop_assumptions)
707 RESET_BIT (wont_exit, 1);
709 si_info_start_duplication (si_info);
710 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
711 loops, exit_mod + 1,
712 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
713 DLTHE_FLAG_UPDATE_FREQ))
714 abort ();
716 if (si_info && exit_mod > 0)
717 split_ivs_in_copies (si_info, exit_mod + 1, false, false);
719 desc->niter -= exit_mod + 1;
720 desc->niter_max -= exit_mod + 1;
721 desc->noloop_assumptions = NULL_RTX;
723 SET_BIT (wont_exit, 0);
724 SET_BIT (wont_exit, 1);
727 RESET_BIT (wont_exit, max_unroll);
730 /* Now unroll the loop. */
731 si_info_start_duplication (si_info);
732 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
733 loops, max_unroll,
734 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
735 DLTHE_FLAG_UPDATE_FREQ))
736 abort ();
738 if (si_info)
740 split_ivs_in_copies (si_info, max_unroll, true, true);
741 free_si_info (si_info);
744 free (wont_exit);
746 if (exit_at_end)
748 basic_block exit_block = desc->in_edge->src->rbi->copy;
749 /* Find a new in and out edge; they are in the last copy we have made. */
751 if (exit_block->succ->dest == desc->out_edge->dest)
753 desc->out_edge = exit_block->succ;
754 desc->in_edge = exit_block->succ->succ_next;
756 else
758 desc->out_edge = exit_block->succ->succ_next;
759 desc->in_edge = exit_block->succ;
763 desc->niter /= max_unroll + 1;
764 desc->niter_max /= max_unroll + 1;
765 desc->niter_expr = GEN_INT (desc->niter);
767 /* Remove the edges. */
768 for (i = 0; i < n_remove_edges; i++)
769 remove_path (loops, remove_edges[i]);
770 free (remove_edges);
772 if (dump_file)
773 fprintf (dump_file,
774 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
775 max_unroll, num_loop_insns (loop));
778 /* Decide whether to unroll LOOP iterating runtime computable number of times
779 and how much. */
780 static void
781 decide_unroll_runtime_iterations (struct loop *loop, int flags)
783 unsigned nunroll, nunroll_by_av, i;
784 struct niter_desc *desc;
786 if (!(flags & UAP_UNROLL))
788 /* We were not asked to, just return back silently. */
789 return;
792 if (dump_file)
793 fprintf (dump_file,
794 "\n;; Considering unrolling loop with runtime "
795 "computable number of iterations\n");
797 /* nunroll = total number of copies of the original loop body in
798 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
799 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
800 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
801 if (nunroll > nunroll_by_av)
802 nunroll = nunroll_by_av;
803 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
804 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
806 /* Skip big loops. */
807 if (nunroll <= 1)
809 if (dump_file)
810 fprintf (dump_file, ";; Not considering loop, is too big\n");
811 return;
814 /* Check for simple loops. */
815 desc = get_simple_loop_desc (loop);
817 /* Check simpleness. */
818 if (!desc->simple_p || desc->assumptions)
820 if (dump_file)
821 fprintf (dump_file,
822 ";; Unable to prove that the number of iterations "
823 "can be counted in runtime\n");
824 return;
827 if (desc->const_iter)
829 if (dump_file)
830 fprintf (dump_file, ";; Loop iterates constant times\n");
831 return;
834 /* If we have profile feedback, check whether the loop rolls. */
835 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
837 if (dump_file)
838 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
839 return;
842 /* Success; now force nunroll to be power of 2, as we are unable to
843 cope with overflows in computation of number of iterations. */
844 for (i = 1; 2 * i <= nunroll; i *= 2)
845 continue;
847 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
848 loop->lpt_decision.times = i - 1;
850 if (dump_file)
851 fprintf (dump_file,
852 ";; Decided to unroll the runtime computable "
853 "times rolling loop, %d times.\n",
854 loop->lpt_decision.times);
857 /* Unroll LOOP for that we are able to count number of iterations in runtime
858 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
859 extra care for case n < 0):
861 for (i = 0; i < n; i++)
862 body;
866 i = 0;
867 mod = n % 4;
869 switch (mod)
871 case 3:
872 body; i++;
873 case 2:
874 body; i++;
875 case 1:
876 body; i++;
877 case 0: ;
880 while (i < n)
882 body; i++;
883 body; i++;
884 body; i++;
885 body; i++;
888 static void
889 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
891 rtx old_niter, niter, init_code, branch_code, tmp;
892 unsigned i, j, p;
893 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
894 unsigned n_dom_bbs;
895 sbitmap wont_exit;
896 int may_exit_copy;
897 unsigned n_peel, n_remove_edges;
898 edge *remove_edges, e;
899 bool extra_zero_check, last_may_exit;
900 unsigned max_unroll = loop->lpt_decision.times;
901 struct niter_desc *desc = get_simple_loop_desc (loop);
902 bool exit_at_end = loop_exit_at_end_p (loop);
903 struct split_ivs_info *si_info = NULL;
905 if (flag_split_ivs_in_unroller)
906 si_info = analyze_ivs_to_split (loop);
908 /* Remember blocks whose dominators will have to be updated. */
909 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
910 n_dom_bbs = 0;
912 body = get_loop_body (loop);
913 for (i = 0; i < loop->num_nodes; i++)
915 unsigned nldom;
916 basic_block *ldom;
918 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
919 for (j = 0; j < nldom; j++)
920 if (!flow_bb_inside_loop_p (loop, ldom[j]))
921 dom_bbs[n_dom_bbs++] = ldom[j];
923 free (ldom);
925 free (body);
927 if (!exit_at_end)
929 /* Leave exit in first copy (for explanation why see comment in
930 unroll_loop_constant_iterations). */
931 may_exit_copy = 0;
932 n_peel = max_unroll - 1;
933 extra_zero_check = true;
934 last_may_exit = false;
936 else
938 /* Leave exit in last copy (for explanation why see comment in
939 unroll_loop_constant_iterations). */
940 may_exit_copy = max_unroll;
941 n_peel = max_unroll;
942 extra_zero_check = false;
943 last_may_exit = true;
946 /* Get expression for number of iterations. */
947 start_sequence ();
948 old_niter = niter = gen_reg_rtx (desc->mode);
949 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
950 if (tmp != niter)
951 emit_move_insn (niter, tmp);
953 /* Count modulo by ANDing it with max_unroll; we use the fact that
954 the number of unrollings is a power of two, and thus this is correct
955 even if there is overflow in the computation. */
956 niter = expand_simple_binop (desc->mode, AND,
957 niter,
958 GEN_INT (max_unroll),
959 NULL_RTX, 0, OPTAB_LIB_WIDEN);
961 init_code = get_insns ();
962 end_sequence ();
964 /* Precondition the loop. */
965 loop_split_edge_with (loop_preheader_edge (loop), init_code);
967 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
968 n_remove_edges = 0;
970 wont_exit = sbitmap_alloc (max_unroll + 2);
972 /* Peel the first copy of loop body (almost always we must leave exit test
973 here; the only exception is when we have extra zero check and the number
974 of iterations is reliable. Also record the place of (possible) extra
975 zero check. */
976 sbitmap_zero (wont_exit);
977 if (extra_zero_check
978 && !desc->noloop_assumptions)
979 SET_BIT (wont_exit, 1);
980 ezc_swtch = loop_preheader_edge (loop)->src;
981 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
982 loops, 1,
983 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
984 DLTHE_FLAG_UPDATE_FREQ))
985 abort ();
987 /* Record the place where switch will be built for preconditioning. */
988 swtch = loop_split_edge_with (loop_preheader_edge (loop),
989 NULL_RTX);
991 for (i = 0; i < n_peel; i++)
993 /* Peel the copy. */
994 sbitmap_zero (wont_exit);
995 if (i != n_peel - 1 || !last_may_exit)
996 SET_BIT (wont_exit, 1);
997 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
998 loops, 1,
999 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1000 DLTHE_FLAG_UPDATE_FREQ))
1001 abort ();
1003 /* Create item for switch. */
1004 j = n_peel - i - (extra_zero_check ? 0 : 1);
1005 p = REG_BR_PROB_BASE / (i + 2);
1007 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1008 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1009 block_label (preheader), p, NULL_RTX);
1011 swtch = loop_split_edge_with (swtch->pred, branch_code);
1012 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1013 swtch->succ->probability = REG_BR_PROB_BASE - p;
1014 e = make_edge (swtch, preheader,
1015 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
1016 e->probability = p;
1019 if (extra_zero_check)
1021 /* Add branch for zero iterations. */
1022 p = REG_BR_PROB_BASE / (max_unroll + 1);
1023 swtch = ezc_swtch;
1024 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1025 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1026 block_label (preheader), p, NULL_RTX);
1028 swtch = loop_split_edge_with (swtch->succ, branch_code);
1029 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1030 swtch->succ->probability = REG_BR_PROB_BASE - p;
1031 e = make_edge (swtch, preheader,
1032 swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
1033 e->probability = p;
1036 /* Recount dominators for outer blocks. */
1037 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1039 /* And unroll loop. */
1041 sbitmap_ones (wont_exit);
1042 RESET_BIT (wont_exit, may_exit_copy);
1044 si_info_start_duplication (si_info);
1045 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1046 loops, max_unroll,
1047 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1048 DLTHE_FLAG_UPDATE_FREQ))
1049 abort ();
1051 if (si_info)
1053 split_ivs_in_copies (si_info, max_unroll, true, true);
1054 free_si_info (si_info);
1057 free (wont_exit);
1059 if (exit_at_end)
1061 basic_block exit_block = desc->in_edge->src->rbi->copy;
1062 /* Find a new in and out edge; they are in the last copy we have made. */
1064 if (exit_block->succ->dest == desc->out_edge->dest)
1066 desc->out_edge = exit_block->succ;
1067 desc->in_edge = exit_block->succ->succ_next;
1069 else
1071 desc->out_edge = exit_block->succ->succ_next;
1072 desc->in_edge = exit_block->succ;
1076 /* Remove the edges. */
1077 for (i = 0; i < n_remove_edges; i++)
1078 remove_path (loops, remove_edges[i]);
1079 free (remove_edges);
1081 /* We must be careful when updating the number of iterations due to
1082 preconditioning and the fact that the value must be valid at entry
1083 of the loop. After passing through the above code, we see that
1084 the correct new number of iterations is this: */
1085 gcc_assert (!desc->const_iter);
1086 desc->niter_expr =
1087 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1088 desc->niter_max /= max_unroll + 1;
1089 if (exit_at_end)
1091 desc->niter_expr =
1092 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1093 desc->noloop_assumptions = NULL_RTX;
1094 desc->niter_max--;
1097 if (dump_file)
1098 fprintf (dump_file,
1099 ";; Unrolled loop %d times, counting # of iterations "
1100 "in runtime, %i insns\n",
1101 max_unroll, num_loop_insns (loop));
1104 /* Decide whether to simply peel LOOP and how much. */
1105 static void
1106 decide_peel_simple (struct loop *loop, int flags)
1108 unsigned npeel;
1109 struct niter_desc *desc;
1111 if (!(flags & UAP_PEEL))
1113 /* We were not asked to, just return back silently. */
1114 return;
1117 if (dump_file)
1118 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1120 /* npeel = number of iterations to peel. */
1121 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1122 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1123 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1125 /* Skip big loops. */
1126 if (!npeel)
1128 if (dump_file)
1129 fprintf (dump_file, ";; Not considering loop, is too big\n");
1130 return;
1133 /* Check for simple loops. */
1134 desc = get_simple_loop_desc (loop);
1136 /* Check number of iterations. */
1137 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1139 if (dump_file)
1140 fprintf (dump_file, ";; Loop iterates constant times\n");
1141 return;
1144 /* Do not simply peel loops with branches inside -- it increases number
1145 of mispredicts. */
1146 if (num_loop_branches (loop) > 1)
1148 if (dump_file)
1149 fprintf (dump_file, ";; Not peeling, contains branches\n");
1150 return;
1153 if (loop->header->count)
1155 unsigned niter = expected_loop_iterations (loop);
1156 if (niter + 1 > npeel)
1158 if (dump_file)
1160 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1161 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1162 (HOST_WIDEST_INT) (niter + 1));
1163 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1164 npeel);
1166 return;
1168 npeel = niter + 1;
1170 else
1172 /* For now we have no good heuristics to decide whether loop peeling
1173 will be effective, so disable it. */
1174 if (dump_file)
1175 fprintf (dump_file,
1176 ";; Not peeling loop, no evidence it will be profitable\n");
1177 return;
1180 /* Success. */
1181 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1182 loop->lpt_decision.times = npeel;
1184 if (dump_file)
1185 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1186 loop->lpt_decision.times);
1189 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1190 while (cond)
1191 body;
1195 if (!cond) goto end;
1196 body;
1197 if (!cond) goto end;
1198 body;
1199 while (cond)
1200 body;
1201 end: ;
1203 static void
1204 peel_loop_simple (struct loops *loops, struct loop *loop)
1206 sbitmap wont_exit;
1207 unsigned npeel = loop->lpt_decision.times;
1208 struct niter_desc *desc = get_simple_loop_desc (loop);
1209 struct split_ivs_info *si_info = NULL;
1211 if (flag_split_ivs_in_unroller && npeel > 1)
1212 si_info = analyze_ivs_to_split (loop);
1214 wont_exit = sbitmap_alloc (npeel + 1);
1215 sbitmap_zero (wont_exit);
1217 si_info_start_duplication (si_info);
1218 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1219 loops, npeel, wont_exit, NULL, NULL, NULL,
1220 DLTHE_FLAG_UPDATE_FREQ))
1221 abort ();
1223 free (wont_exit);
1225 if (si_info)
1227 split_ivs_in_copies (si_info, npeel, false, false);
1228 free_si_info (si_info);
1231 if (desc->simple_p)
1233 if (desc->const_iter)
1235 desc->niter -= npeel;
1236 desc->niter_expr = GEN_INT (desc->niter);
1237 desc->noloop_assumptions = NULL_RTX;
1239 else
1241 /* We cannot just update niter_expr, as its value might be clobbered
1242 inside loop. We could handle this by counting the number into
1243 temporary just like we do in runtime unrolling, but it does not
1244 seem worthwhile. */
1245 free_simple_loop_desc (loop);
1248 if (dump_file)
1249 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1252 /* Decide whether to unroll LOOP stupidly and how much. */
1253 static void
1254 decide_unroll_stupid (struct loop *loop, int flags)
1256 unsigned nunroll, nunroll_by_av, i;
1257 struct niter_desc *desc;
1259 if (!(flags & UAP_UNROLL_ALL))
1261 /* We were not asked to, just return back silently. */
1262 return;
1265 if (dump_file)
1266 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1268 /* nunroll = total number of copies of the original loop body in
1269 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1270 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1271 nunroll_by_av
1272 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1273 if (nunroll > nunroll_by_av)
1274 nunroll = nunroll_by_av;
1275 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1276 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1278 /* Skip big loops. */
1279 if (nunroll <= 1)
1281 if (dump_file)
1282 fprintf (dump_file, ";; Not considering loop, is too big\n");
1283 return;
1286 /* Check for simple loops. */
1287 desc = get_simple_loop_desc (loop);
1289 /* Check simpleness. */
1290 if (desc->simple_p && !desc->assumptions)
1292 if (dump_file)
1293 fprintf (dump_file, ";; The loop is simple\n");
1294 return;
1297 /* Do not unroll loops with branches inside -- it increases number
1298 of mispredicts. */
1299 if (num_loop_branches (loop) > 1)
1301 if (dump_file)
1302 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1303 return;
1306 /* If we have profile feedback, check whether the loop rolls. */
1307 if (loop->header->count
1308 && expected_loop_iterations (loop) < 2 * nunroll)
1310 if (dump_file)
1311 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1312 return;
1315 /* Success. Now force nunroll to be power of 2, as it seems that this
1316 improves results (partially because of better alignments, partially
1317 because of some dark magic). */
1318 for (i = 1; 2 * i <= nunroll; i *= 2)
1319 continue;
1321 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1322 loop->lpt_decision.times = i - 1;
1324 if (dump_file)
1325 fprintf (dump_file,
1326 ";; Decided to unroll the loop stupidly, %d times.\n",
1327 loop->lpt_decision.times);
1330 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1331 while (cond)
1332 body;
1336 while (cond)
1338 body;
1339 if (!cond) break;
1340 body;
1341 if (!cond) break;
1342 body;
1343 if (!cond) break;
1344 body;
1347 static void
1348 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1350 sbitmap wont_exit;
1351 unsigned nunroll = loop->lpt_decision.times;
1352 struct niter_desc *desc = get_simple_loop_desc (loop);
1353 struct split_ivs_info *si_info = NULL;
1355 if (flag_split_ivs_in_unroller)
1356 si_info = analyze_ivs_to_split (loop);
1358 wont_exit = sbitmap_alloc (nunroll + 1);
1359 sbitmap_zero (wont_exit);
1361 si_info_start_duplication (si_info);
1362 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1363 loops, nunroll, wont_exit, NULL, NULL, NULL,
1364 DLTHE_FLAG_UPDATE_FREQ))
1365 abort ();
1367 if (si_info)
1369 split_ivs_in_copies (si_info, nunroll, true, true);
1370 free_si_info (si_info);
1373 free (wont_exit);
1375 if (desc->simple_p)
1377 /* We indeed may get here provided that there are nontrivial assumptions
1378 for a loop to be really simple. We could update the counts, but the
1379 problem is that we are unable to decide which exit will be taken
1380 (not really true in case the number of iterations is constant,
1381 but noone will do anything with this information, so we do not
1382 worry about it). */
1383 desc->simple_p = false;
1386 if (dump_file)
1387 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1388 nunroll, num_loop_insns (loop));
1391 /* A hash function for information about insns to split. */
1393 static hashval_t
1394 si_info_hash (const void *ivts)
1396 return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1399 /* An equality functions for information about insns to split. */
1401 static int
1402 si_info_eq (const void *ivts1, const void *ivts2)
1404 const struct iv_to_split *i1 = ivts1;
1405 const struct iv_to_split *i2 = ivts2;
1407 return i1->insn == i2->insn;
1410 /* Determine whether there is an induction variable in INSN that
1411 we would like to split during unrolling. Return NULL if INSN
1412 contains no interesting IVs. Otherwise, allocate an IV_TO_SPLIT
1413 structure, fill it with the relevant information and return a
1414 pointer to it. */
1416 static struct iv_to_split *
1417 analyze_iv_to_split_insn (rtx insn)
1419 rtx set, dest;
1420 struct rtx_iv iv;
1421 struct iv_to_split *ivts;
1423 /* For now we just split the basic induction variables. Later this may be
1424 extended for example by selecting also addresses of memory references. */
1425 set = single_set (insn);
1426 if (!set)
1427 return NULL;
1429 dest = SET_DEST (set);
1430 if (!REG_P (dest))
1431 return NULL;
1433 if (!biv_p (insn, dest))
1434 return NULL;
1436 if (!iv_analyze (insn, dest, &iv))
1437 abort ();
1439 if (iv.step == const0_rtx
1440 || iv.mode != iv.extend_mode)
1441 return NULL;
1443 /* Record the insn to split. */
1444 ivts = xmalloc (sizeof (struct iv_to_split));
1445 ivts->insn = insn;
1446 ivts->base_var = NULL_RTX;
1447 ivts->step = iv.step;
1448 ivts->n_loc = 1;
1449 ivts->loc[0] = 1;
1451 return ivts;
1454 /* Determines which of induction variables in LOOP to split.
1455 Return a SPLIT_IVS_INFO struct with the hash table filled
1456 with all insns to split IVs in. The FIRST_NEW_BLOCK field
1457 is undefined for the return value. */
1459 static struct split_ivs_info *
1460 analyze_ivs_to_split (struct loop *loop)
1462 basic_block *body, bb;
1463 unsigned i;
1464 struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
1465 rtx insn;
1466 struct iv_to_split *ivts;
1467 PTR *slot;
1469 si_info->insns_to_split = htab_create (5 * loop->num_nodes,
1470 si_info_hash, si_info_eq, free);
1472 iv_analysis_loop_init (loop);
1474 body = get_loop_body (loop);
1475 for (i = 0; i < loop->num_nodes; i++)
1477 bb = body[i];
1478 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1479 continue;
1481 FOR_BB_INSNS (bb, insn)
1483 if (!INSN_P (insn))
1484 continue;
1486 ivts = analyze_iv_to_split_insn (insn);
1488 if (!ivts)
1489 continue;
1491 slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
1492 *slot = ivts;
1496 free (body);
1498 return si_info;
1501 /* Called just before loop duplication. Records start of duplicated area
1502 to SI_INFO. */
1504 static void
1505 si_info_start_duplication (struct split_ivs_info *si_info)
1507 if (si_info)
1508 si_info->first_new_block = last_basic_block;
1511 /* Determine the number of iterations between initialization of the base
1512 variable and the current copy (N_COPY). N_COPIES is the total number
1513 of newly created copies. UNROLLING is true if we are unrolling
1514 (not peeling) the loop. */
1516 static unsigned
1517 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1519 if (unrolling)
1521 /* If we are unrolling, initialization is done in the original loop
1522 body (number 0). */
1523 return n_copy;
1525 else
1527 /* If we are peeling, the copy in that the initialization occurs has
1528 number 1. The original loop (number 0) is the last. */
1529 if (n_copy)
1530 return n_copy - 1;
1531 else
1532 return n_copies;
1536 /* Locate in EXPR the expression corresponding to the location recorded
1537 in IVTS, and return a pointer to the RTX for this location. */
1539 static rtx *
1540 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1542 unsigned i;
1543 rtx *ret = &expr;
1545 for (i = 0; i < ivts->n_loc; i++)
1546 ret = &XEXP (*ret, ivts->loc[i]);
1548 return ret;
1551 /* Allocate basic variable for the induction variable chain. Callback for
1552 htab_traverse. */
1554 static int
1555 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1557 struct iv_to_split *ivts = *slot;
1558 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1560 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1562 return 1;
1565 /* Insert initialization of basic variable of IVTS before INSN, taking
1566 the initial value from INSN. */
1568 static void
1569 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1571 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1572 rtx seq;
1574 start_sequence ();
1575 expr = force_operand (expr, ivts->base_var);
1576 if (expr != ivts->base_var)
1577 emit_move_insn (ivts->base_var, expr);
1578 seq = get_insns ();
1579 end_sequence ();
1581 emit_insn_before (seq, insn);
1584 /* Replace the use of induction variable described in IVTS in INSN
1585 by base variable + DELTA * step. */
1587 static void
1588 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1590 rtx expr, *loc, seq, incr, var;
1591 enum machine_mode mode = GET_MODE (ivts->base_var);
1592 rtx src, dest, set;
1594 /* Construct base + DELTA * step. */
1595 if (!delta)
1596 expr = ivts->base_var;
1597 else
1599 incr = simplify_gen_binary (MULT, mode,
1600 ivts->step, gen_int_mode (delta, mode));
1601 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1602 ivts->base_var, incr);
1605 /* Figure out where to do the replacement. */
1606 loc = get_ivts_expr (single_set (insn), ivts);
1608 /* If we can make the replacement right away, we're done. */
1609 if (validate_change (insn, loc, expr, 0))
1610 return;
1612 /* Otherwise, force EXPR into a register and try again. */
1613 start_sequence ();
1614 var = gen_reg_rtx (mode);
1615 expr = force_operand (expr, var);
1616 if (expr != var)
1617 emit_move_insn (var, expr);
1618 seq = get_insns ();
1619 end_sequence ();
1620 emit_insn_before (seq, insn);
1622 if (validate_change (insn, loc, var, 0))
1623 return;
1625 /* The last chance. Try recreating the assignment in insn
1626 completely from scratch. */
1627 set = single_set (insn);
1628 gcc_assert (set);
1630 start_sequence ();
1631 *loc = var;
1632 src = copy_rtx (SET_SRC (set));
1633 dest = copy_rtx (SET_DEST (set));
1634 src = force_operand (src, dest);
1635 if (src != dest)
1636 emit_move_insn (dest, src);
1637 seq = get_insns ();
1638 end_sequence ();
1640 emit_insn_before (seq, insn);
1641 delete_insn (insn);
1644 /* Splits induction variables (that are marked in SI_INFO) in copies of loop.
1645 I.e. replace
1647 i = i + 1;
1649 i = i + 1;
1651 i = i + 1;
1654 type chains by
1656 i0 = i + 1
1658 i = i0 + 1
1660 i = i0 + 2
1663 UNROLLING is true if we unrolled (not peeled) the loop.
1664 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1665 the loop (as it should happen in complete unrolling, but not in ordinary
1666 peeling of the loop). */
1668 static void
1669 split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
1670 bool unrolling, bool rewrite_original_loop)
1672 unsigned i, delta;
1673 basic_block bb, orig_bb;
1674 rtx insn, orig_insn, next;
1675 struct iv_to_split ivts_templ, *ivts;
1677 /* Sanity check -- we need to put initialization in the original loop
1678 body. */
1679 gcc_assert (!unrolling || rewrite_original_loop);
1681 /* Allocate the basic variables (i0). */
1682 htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
1684 for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1686 bb = BASIC_BLOCK (i);
1687 orig_bb = bb->rbi->original;
1689 delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
1690 unrolling);
1691 orig_insn = BB_HEAD (orig_bb);
1692 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
1694 next = NEXT_INSN (insn);
1695 if (!INSN_P (insn))
1696 continue;
1698 while (!INSN_P (orig_insn))
1699 orig_insn = NEXT_INSN (orig_insn);
1701 ivts_templ.insn = orig_insn;
1702 ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1703 if (ivts)
1706 #ifdef ENABLE_CHECKING
1707 if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
1708 abort ();
1709 #endif
1711 if (!delta)
1712 insert_base_initialization (ivts, insn);
1713 split_iv (ivts, insn, delta);
1715 orig_insn = NEXT_INSN (orig_insn);
1719 if (!rewrite_original_loop)
1720 return;
1722 /* Rewrite also the original loop body. Find them as originals of the blocks
1723 in the last copied iteration, i.e. those that have
1724 bb->rbi->original->copy == bb. */
1725 for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
1727 bb = BASIC_BLOCK (i);
1728 orig_bb = bb->rbi->original;
1729 if (orig_bb->rbi->copy != bb)
1730 continue;
1732 delta = determine_split_iv_delta (0, n_copies, unrolling);
1733 for (orig_insn = BB_HEAD (orig_bb);
1734 orig_insn != NEXT_INSN (BB_END (bb));
1735 orig_insn = next)
1737 next = NEXT_INSN (orig_insn);
1739 if (!INSN_P (orig_insn))
1740 continue;
1742 ivts_templ.insn = orig_insn;
1743 ivts = htab_find (si_info->insns_to_split, &ivts_templ);
1744 if (!ivts)
1745 continue;
1747 if (!delta)
1748 insert_base_initialization (ivts, orig_insn);
1749 split_iv (ivts, orig_insn, delta);
1754 /* Release SI_INFO. */
1756 static void
1757 free_si_info (struct split_ivs_info *si_info)
1759 htab_delete (si_info->insns_to_split);
1760 free (si_info);