Move ici specific files to new directory ici; add new file ici/ici.c
[official-gcc.git] / gcc / loop-unroll.c
blobe362b2956fc0a2eef26e6caca83e975e4dcd37f6
1 /* Loop unrolling and peeling.
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "params.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "hashtab.h"
35 #include "recog.h"
36 #include "plugin.h"
38 /* This pass performs loop unrolling and peeling. We only perform these
39 optimizations on innermost loops (with single exception) because
40 the impact on performance is greatest here, and we want to avoid
41 unnecessary code size growth. The gain is caused by greater sequentiality
42 of code, better code to optimize for further passes and in some cases
43 by fewer testings of exit conditions. The main problem is code growth,
44 that impacts performance negatively due to effect of caches.
46 What we do:
48 -- complete peeling of once-rolling loops; this is the above mentioned
49 exception, as this causes loop to be cancelled completely and
50 does not cause code growth
51 -- complete peeling of loops that roll (small) constant times.
52 -- simple peeling of first iterations of loops that do not roll much
53 (according to profile feedback)
54 -- unrolling of loops that roll constant times; this is almost always
55 win, as we get rid of exit condition tests.
56 -- unrolling of loops that roll number of times that we can compute
57 in runtime; we also get rid of exit condition tests here, but there
58 is the extra expense for calculating the number of iterations
59 -- simple unrolling of remaining loops; this is performed only if we
60 are asked to, as the gain is questionable in this case and often
61 it may even slow down the code
62 For more detailed descriptions of each of those, see comments at
63 appropriate function below.
65 There is a lot of parameters (defined and described in params.def) that
66 control how much we unroll/peel.
68 ??? A great problem is that we don't have a good way how to determine
69 how many times we should unroll the loop; the experiments I have made
70 showed that this choice may affect performance in order of several %.
73 /* Information about induction variables to split. */
75 struct iv_to_split
77 rtx insn; /* The insn in that the induction variable occurs. */
78 rtx base_var; /* The variable on that the values in the further
79 iterations are based. */
80 rtx step; /* Step of the induction variable. */
81 struct iv_to_split *next; /* Next entry in walking order. */
82 unsigned n_loc;
83 unsigned loc[3]; /* Location where the definition of the induction
84 variable occurs in the insn. For example if
85 N_LOC is 2, the expression is located at
86 XEXP (XEXP (single_set, loc[0]), loc[1]). */
89 /* Information about accumulators to expand. */
91 struct var_to_expand
93 rtx insn; /* The insn in that the variable expansion occurs. */
94 rtx reg; /* The accumulator which is expanded. */
95 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */
96 struct var_to_expand *next; /* Next entry in walking order. */
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction
98 or multiplication. */
99 int expansion_count; /* Count the number of expansions generated so far. */
100 int reuse_expansion; /* The expansion we intend to reuse to expand
101 the accumulator. If REUSE_EXPANSION is 0 reuse
102 the original accumulator. Else use
103 var_expansions[REUSE_EXPANSION - 1]. */
104 unsigned accum_pos; /* The position in which the accumulator is placed in
105 the insn src. For example in x = x + something
106 accum_pos is 0 while in x = something + x accum_pos
107 is 1. */
110 /* Information about optimization applied in
111 the unrolled loop. */
113 struct opt_info
115 htab_t insns_to_split; /* A hashtable of insns to split. */
116 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
117 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
118 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
119 to expand. */
120 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
121 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
122 unsigned first_new_block; /* The first basic block that was
123 duplicated. */
124 basic_block loop_exit; /* The loop exit basic block. */
125 basic_block loop_preheader; /* The loop preheader basic block. */
128 static void decide_unrolling_and_peeling (int);
129 static void peel_loops_completely (int);
130 static void decide_peel_simple (struct loop *, int);
131 static void decide_peel_once_rolling (struct loop *, int);
132 static void decide_peel_completely (struct loop *, int);
133 static void decide_unroll_stupid (struct loop *, int);
134 static void decide_unroll_constant_iterations (struct loop *, int);
135 static void decide_unroll_runtime_iterations (struct loop *, int);
136 static void peel_loop_simple (struct loop *);
137 static void peel_loop_completely (struct loop *);
138 static void unroll_loop_stupid (struct loop *);
139 static void unroll_loop_constant_iterations (struct loop *);
140 static void unroll_loop_runtime_iterations (struct loop *);
141 static struct opt_info *analyze_insns_in_loop (struct loop *);
142 static void opt_info_start_duplication (struct opt_info *);
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
144 static void free_opt_info (struct opt_info *);
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
147 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
149 static void insert_var_expansion_initialization (struct var_to_expand *,
150 basic_block);
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
152 basic_block);
153 static rtx get_expansion (struct var_to_expand *);
155 /* Unroll and/or peel (depending on FLAGS) LOOPS. */
156 void
157 unroll_and_peel_loops (int flags)
159 struct loop *loop;
160 bool check;
161 loop_iterator li;
163 /* First perform complete loop peeling (it is almost surely a win,
164 and affects parameters for further decision a lot). */
165 peel_loops_completely (flags);
167 /* Now decide rest of unrolling and peeling. */
168 decide_unrolling_and_peeling (flags);
170 /* Scan the loops, inner ones first. */
171 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
173 check = true;
175 /* And perform the appropriate transformations. */
176 switch (loop->lpt_decision.decision)
178 case LPT_PEEL_COMPLETELY:
179 /* Already done. */
180 gcc_unreachable ();
181 case LPT_PEEL_SIMPLE:
182 peel_loop_simple (loop);
183 break;
184 case LPT_UNROLL_CONSTANT:
185 unroll_loop_constant_iterations (loop);
186 break;
187 case LPT_UNROLL_RUNTIME:
188 unroll_loop_runtime_iterations (loop);
189 break;
190 case LPT_UNROLL_STUPID:
191 unroll_loop_stupid (loop);
192 break;
193 case LPT_NONE:
194 check = false;
195 break;
196 default:
197 gcc_unreachable ();
199 if (check)
201 #ifdef ENABLE_CHECKING
202 verify_dominators (CDI_DOMINATORS);
203 verify_loop_structure ();
204 #endif
208 iv_analysis_done ();
211 /* Check whether exit of the LOOP is at the end of loop body. */
213 static bool
214 loop_exit_at_end_p (struct loop *loop)
216 struct niter_desc *desc = get_simple_loop_desc (loop);
217 rtx insn;
219 if (desc->in_edge->dest != loop->latch)
220 return false;
222 /* Check that the latch is empty. */
223 FOR_BB_INSNS (loop->latch, insn)
225 if (INSN_P (insn))
226 return false;
229 return true;
232 /* Depending on FLAGS, check whether to peel loops completely and do so. */
233 static void
234 peel_loops_completely (int flags)
236 struct loop *loop;
237 loop_iterator li;
239 /* Scan the loops, the inner ones first. */
240 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
242 loop->lpt_decision.decision = LPT_NONE;
244 if (dump_file)
245 fprintf (dump_file,
246 "\n;; *** Considering loop %d for complete peeling ***\n",
247 loop->num);
249 loop->ninsns = num_loop_insns (loop);
251 decide_peel_once_rolling (loop, flags);
252 if (loop->lpt_decision.decision == LPT_NONE)
253 decide_peel_completely (loop, flags);
255 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
257 peel_loop_completely (loop);
258 #ifdef ENABLE_CHECKING
259 verify_dominators (CDI_DOMINATORS);
260 verify_loop_structure ();
261 #endif
266 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
267 static void
268 decide_unrolling_and_peeling (int flags)
270 struct loop *loop;
271 loop_iterator li;
273 /* ICI: parameter holders */
275 /* Scan the loops, inner ones first. */
276 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
278 loop->lpt_decision.decision = LPT_NONE;
280 if (dump_file)
281 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
284 /* Do not peel cold areas. */
285 if (optimize_loop_for_size_p (loop))
287 static bool ici_optimize_loop_for_size_p;
288 ici_optimize_loop_for_size_p = 1;
289 invoke_plugin_va_callbacks
290 (PLUGIN_UNROLL_PARAMETER_HANDLER,
291 "_loop.id", EP_INT, &(loop->num),
292 "loop.optimize_loop_for_size_p", EP_UNSIGNED_CHAR,
293 &ici_optimize_loop_for_size_p, NULL);
295 if (ici_optimize_loop_for_size_p)
297 if (dump_file)
298 fprintf (dump_file, ";; Not considering loop, cold area\n");
299 continue;
303 /* Can the loop be manipulated? */
304 if (!can_duplicate_loop_p (loop))
306 static bool ici_can_duplicate_loop_p;
307 ici_can_duplicate_loop_p = 0;
308 invoke_plugin_va_callbacks
309 (PLUGIN_UNROLL_PARAMETER_HANDLER,
310 "_loop.id", EP_INT, &(loop->num),
311 "_loop.can_duplicate_loop_p", EP_UNSIGNED_CHAR,
312 &ici_can_duplicate_loop_p, NULL);
314 if (dump_file)
315 fprintf (dump_file,
316 ";; Not considering loop, cannot duplicate\n");
317 continue;
320 /* Skip non-innermost loops. */
321 if (loop->inner)
323 static bool ici_is_inner_most;
324 ici_is_inner_most = 0;
325 invoke_plugin_va_callbacks
326 (PLUGIN_UNROLL_PARAMETER_HANDLER,
327 "_loop.id", EP_INT, &(loop->num),
328 "_loop.is_inner_most", EP_UNSIGNED_CHAR, &ici_is_inner_most,
329 NULL);
331 if (dump_file)
332 fprintf (dump_file,
333 ";; Not considering loop, is not innermost\n");
334 continue;
337 loop->ninsns = num_loop_insns (loop);
338 loop->av_ninsns = average_num_loop_insns (loop);
340 /* Try transformations one by one in decreasing order of
341 priority. */
343 decide_unroll_constant_iterations (loop, flags);
344 if (loop->lpt_decision.decision == LPT_NONE)
345 decide_unroll_runtime_iterations (loop, flags);
346 if (loop->lpt_decision.decision == LPT_NONE)
347 decide_unroll_stupid (loop, flags);
348 if (loop->lpt_decision.decision == LPT_NONE)
349 decide_peel_simple (loop, flags);
351 invoke_plugin_va_callbacks
352 (PLUGIN_UNROLL_PARAMETER_HANDLER,
353 "_loop.id", EP_INT, &(loop->num),
354 /* ICI: Number of loop insns. */
355 "_loop.ninsns", EP_UNSIGNED, &(loop->ninsns),
356 /* ICI: Average number of executed insns per iteration. */
357 "_loop.av_ninsns", EP_UNSIGNED, &(loop->av_ninsns),
358 /* ICI: Number of blocks contained within the loop. */
359 "_loop.num_nodes", EP_UNSIGNED, &(loop->num_nodes),
360 /* ICI: Loop unrolling/peeling decision. */
361 "loop.lpt_decision.times", EP_UNSIGNED, &(loop->lpt_decision.times),
362 "loop.lpt_decision.decision", EP_INT, /* Enum treated as int. */
363 &(loop->lpt_decision.decision), NULL);
367 /* Decide whether the LOOP is once rolling and suitable for complete
368 peeling. */
369 static void
370 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
372 struct niter_desc *desc;
374 if (dump_file)
375 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
377 /* Is the loop small enough? */
378 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
380 if (dump_file)
381 fprintf (dump_file, ";; Not considering loop, is too big\n");
382 return;
385 /* Check for simple loops. */
386 desc = get_simple_loop_desc (loop);
388 /* Check number of iterations. */
389 if (!desc->simple_p
390 || desc->assumptions
391 || desc->infinite
392 || !desc->const_iter
393 || desc->niter != 0)
395 if (dump_file)
396 fprintf (dump_file,
397 ";; Unable to prove that the loop rolls exactly once\n");
398 return;
401 /* Success. */
402 if (dump_file)
403 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
404 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
407 /* Decide whether the LOOP is suitable for complete peeling. */
408 static void
409 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
411 unsigned npeel;
412 struct niter_desc *desc;
414 if (dump_file)
415 fprintf (dump_file, "\n;; Considering peeling completely\n");
417 /* Skip non-innermost loops. */
418 if (loop->inner)
420 if (dump_file)
421 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
422 return;
425 /* Do not peel cold areas. */
426 if (optimize_loop_for_size_p (loop))
428 if (dump_file)
429 fprintf (dump_file, ";; Not considering loop, cold area\n");
430 return;
433 /* Can the loop be manipulated? */
434 if (!can_duplicate_loop_p (loop))
436 if (dump_file)
437 fprintf (dump_file,
438 ";; Not considering loop, cannot duplicate\n");
439 return;
442 /* npeel = number of iterations to peel. */
443 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
444 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
445 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
447 /* Is the loop small enough? */
448 if (!npeel)
450 if (dump_file)
451 fprintf (dump_file, ";; Not considering loop, is too big\n");
452 return;
455 /* Check for simple loops. */
456 desc = get_simple_loop_desc (loop);
458 /* Check number of iterations. */
459 if (!desc->simple_p
460 || desc->assumptions
461 || !desc->const_iter
462 || desc->infinite)
464 if (dump_file)
465 fprintf (dump_file,
466 ";; Unable to prove that the loop iterates constant times\n");
467 return;
470 if (desc->niter > npeel - 1)
472 if (dump_file)
474 fprintf (dump_file,
475 ";; Not peeling loop completely, rolls too much (");
476 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
477 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
479 return;
482 /* Success. */
483 if (dump_file)
484 fprintf (dump_file, ";; Decided to peel loop completely\n");
485 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
488 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
489 completely. The transformation done:
491 for (i = 0; i < 4; i++)
492 body;
496 i = 0;
497 body; i++;
498 body; i++;
499 body; i++;
500 body; i++;
502 static void
503 peel_loop_completely (struct loop *loop)
505 sbitmap wont_exit;
506 unsigned HOST_WIDE_INT npeel;
507 unsigned i;
508 VEC (edge, heap) *remove_edges;
509 edge ein;
510 struct niter_desc *desc = get_simple_loop_desc (loop);
511 struct opt_info *opt_info = NULL;
513 npeel = desc->niter;
515 if (npeel)
517 bool ok;
519 wont_exit = sbitmap_alloc (npeel + 1);
520 sbitmap_ones (wont_exit);
521 RESET_BIT (wont_exit, 0);
522 if (desc->noloop_assumptions)
523 RESET_BIT (wont_exit, 1);
525 remove_edges = NULL;
527 if (flag_split_ivs_in_unroller)
528 opt_info = analyze_insns_in_loop (loop);
530 opt_info_start_duplication (opt_info);
531 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
532 npeel,
533 wont_exit, desc->out_edge,
534 &remove_edges,
535 DLTHE_FLAG_UPDATE_FREQ
536 | DLTHE_FLAG_COMPLETTE_PEEL
537 | (opt_info
538 ? DLTHE_RECORD_COPY_NUMBER : 0));
539 gcc_assert (ok);
541 free (wont_exit);
543 if (opt_info)
545 apply_opt_in_copies (opt_info, npeel, false, true);
546 free_opt_info (opt_info);
549 /* Remove the exit edges. */
550 for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
551 remove_path (ein);
552 VEC_free (edge, heap, remove_edges);
555 ein = desc->in_edge;
556 free_simple_loop_desc (loop);
558 /* Now remove the unreachable part of the last iteration and cancel
559 the loop. */
560 remove_path (ein);
562 if (dump_file)
563 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
566 /* Decide whether to unroll LOOP iterating constant number of times
567 and how much. */
569 static void
570 decide_unroll_constant_iterations (struct loop *loop, int flags)
572 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
573 struct niter_desc *desc;
575 if (!(flags & UAP_UNROLL))
577 /* We were not asked to, just return back silently. */
578 return;
581 if (dump_file)
582 fprintf (dump_file,
583 "\n;; Considering unrolling loop with constant "
584 "number of iterations\n");
586 /* nunroll = total number of copies of the original loop body in
587 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
588 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
589 nunroll_by_av
590 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
591 if (nunroll > nunroll_by_av)
592 nunroll = nunroll_by_av;
593 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
594 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
596 /* Skip big loops. */
597 if (nunroll <= 1)
599 if (dump_file)
600 fprintf (dump_file, ";; Not considering loop, is too big\n");
601 return;
604 /* Check for simple loops. */
605 desc = get_simple_loop_desc (loop);
607 /* Check number of iterations. */
608 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
610 if (dump_file)
611 fprintf (dump_file,
612 ";; Unable to prove that the loop iterates constant times\n");
613 return;
616 /* Check whether the loop rolls enough to consider. */
617 if (desc->niter < 2 * nunroll)
619 if (dump_file)
620 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
621 return;
624 /* Success; now compute number of iterations to unroll. We alter
625 nunroll so that as few as possible copies of loop body are
626 necessary, while still not decreasing the number of unrollings
627 too much (at most by 1). */
628 best_copies = 2 * nunroll + 10;
630 i = 2 * nunroll + 2;
631 if (i - 1 >= desc->niter)
632 i = desc->niter - 2;
634 for (; i >= nunroll - 1; i--)
636 unsigned exit_mod = desc->niter % (i + 1);
638 if (!loop_exit_at_end_p (loop))
639 n_copies = exit_mod + i + 1;
640 else if (exit_mod != (unsigned) i
641 || desc->noloop_assumptions != NULL_RTX)
642 n_copies = exit_mod + i + 2;
643 else
644 n_copies = i + 1;
646 if (n_copies < best_copies)
648 best_copies = n_copies;
649 best_unroll = i;
653 if (dump_file)
654 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
655 best_unroll + 1, best_copies, nunroll);
657 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
658 loop->lpt_decision.times = best_unroll;
660 if (dump_file)
661 fprintf (dump_file,
662 ";; Decided to unroll the constant times rolling loop, %d times.\n",
663 loop->lpt_decision.times);
666 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
667 times. The transformation does this:
669 for (i = 0; i < 102; i++)
670 body;
674 i = 0;
675 body; i++;
676 body; i++;
677 while (i < 102)
679 body; i++;
680 body; i++;
681 body; i++;
682 body; i++;
685 static void
686 unroll_loop_constant_iterations (struct loop *loop)
688 unsigned HOST_WIDE_INT niter;
689 unsigned exit_mod;
690 sbitmap wont_exit;
691 unsigned i;
692 VEC (edge, heap) *remove_edges;
693 edge e;
694 unsigned max_unroll = loop->lpt_decision.times;
695 struct niter_desc *desc = get_simple_loop_desc (loop);
696 bool exit_at_end = loop_exit_at_end_p (loop);
697 struct opt_info *opt_info = NULL;
698 bool ok;
700 niter = desc->niter;
702 /* Should not get here (such loop should be peeled instead). */
703 gcc_assert (niter > max_unroll + 1);
705 exit_mod = niter % (max_unroll + 1);
707 wont_exit = sbitmap_alloc (max_unroll + 1);
708 sbitmap_ones (wont_exit);
710 remove_edges = NULL;
711 if (flag_split_ivs_in_unroller
712 || flag_variable_expansion_in_unroller)
713 opt_info = analyze_insns_in_loop (loop);
715 if (!exit_at_end)
717 /* The exit is not at the end of the loop; leave exit test
718 in the first copy, so that the loops that start with test
719 of exit condition have continuous body after unrolling. */
721 if (dump_file)
722 fprintf (dump_file, ";; Condition on beginning of loop.\n");
724 /* Peel exit_mod iterations. */
725 RESET_BIT (wont_exit, 0);
726 if (desc->noloop_assumptions)
727 RESET_BIT (wont_exit, 1);
729 if (exit_mod)
731 opt_info_start_duplication (opt_info);
732 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
733 exit_mod,
734 wont_exit, desc->out_edge,
735 &remove_edges,
736 DLTHE_FLAG_UPDATE_FREQ
737 | (opt_info && exit_mod > 1
738 ? DLTHE_RECORD_COPY_NUMBER
739 : 0));
740 gcc_assert (ok);
742 if (opt_info && exit_mod > 1)
743 apply_opt_in_copies (opt_info, exit_mod, false, false);
745 desc->noloop_assumptions = NULL_RTX;
746 desc->niter -= exit_mod;
747 desc->niter_max -= exit_mod;
750 SET_BIT (wont_exit, 1);
752 else
754 /* Leave exit test in last copy, for the same reason as above if
755 the loop tests the condition at the end of loop body. */
757 if (dump_file)
758 fprintf (dump_file, ";; Condition on end of loop.\n");
760 /* We know that niter >= max_unroll + 2; so we do not need to care of
761 case when we would exit before reaching the loop. So just peel
762 exit_mod + 1 iterations. */
763 if (exit_mod != max_unroll
764 || desc->noloop_assumptions)
766 RESET_BIT (wont_exit, 0);
767 if (desc->noloop_assumptions)
768 RESET_BIT (wont_exit, 1);
770 opt_info_start_duplication (opt_info);
771 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
772 exit_mod + 1,
773 wont_exit, desc->out_edge,
774 &remove_edges,
775 DLTHE_FLAG_UPDATE_FREQ
776 | (opt_info && exit_mod > 0
777 ? DLTHE_RECORD_COPY_NUMBER
778 : 0));
779 gcc_assert (ok);
781 if (opt_info && exit_mod > 0)
782 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
784 desc->niter -= exit_mod + 1;
785 desc->niter_max -= exit_mod + 1;
786 desc->noloop_assumptions = NULL_RTX;
788 SET_BIT (wont_exit, 0);
789 SET_BIT (wont_exit, 1);
792 RESET_BIT (wont_exit, max_unroll);
795 /* Now unroll the loop. */
797 opt_info_start_duplication (opt_info);
798 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
799 max_unroll,
800 wont_exit, desc->out_edge,
801 &remove_edges,
802 DLTHE_FLAG_UPDATE_FREQ
803 | (opt_info
804 ? DLTHE_RECORD_COPY_NUMBER
805 : 0));
806 gcc_assert (ok);
808 if (opt_info)
810 apply_opt_in_copies (opt_info, max_unroll, true, true);
811 free_opt_info (opt_info);
814 free (wont_exit);
816 if (exit_at_end)
818 basic_block exit_block = get_bb_copy (desc->in_edge->src);
819 /* Find a new in and out edge; they are in the last copy we have made. */
821 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
823 desc->out_edge = EDGE_SUCC (exit_block, 0);
824 desc->in_edge = EDGE_SUCC (exit_block, 1);
826 else
828 desc->out_edge = EDGE_SUCC (exit_block, 1);
829 desc->in_edge = EDGE_SUCC (exit_block, 0);
833 desc->niter /= max_unroll + 1;
834 desc->niter_max /= max_unroll + 1;
835 desc->niter_expr = GEN_INT (desc->niter);
837 /* Remove the edges. */
838 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
839 remove_path (e);
840 VEC_free (edge, heap, remove_edges);
842 if (dump_file)
843 fprintf (dump_file,
844 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
845 max_unroll, num_loop_insns (loop));
848 /* Decide whether to unroll LOOP iterating runtime computable number of times
849 and how much. */
850 static void
851 decide_unroll_runtime_iterations (struct loop *loop, int flags)
853 unsigned nunroll, nunroll_by_av, i;
854 struct niter_desc *desc;
856 if (!(flags & UAP_UNROLL))
858 /* We were not asked to, just return back silently. */
859 return;
862 if (dump_file)
863 fprintf (dump_file,
864 "\n;; Considering unrolling loop with runtime "
865 "computable number of iterations\n");
867 /* nunroll = total number of copies of the original loop body in
868 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
869 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
870 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
871 if (nunroll > nunroll_by_av)
872 nunroll = nunroll_by_av;
873 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
874 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
876 /* Skip big loops. */
877 if (nunroll <= 1)
879 if (dump_file)
880 fprintf (dump_file, ";; Not considering loop, is too big\n");
881 return;
884 /* Check for simple loops. */
885 desc = get_simple_loop_desc (loop);
887 /* Check simpleness. */
888 if (!desc->simple_p || desc->assumptions)
890 if (dump_file)
891 fprintf (dump_file,
892 ";; Unable to prove that the number of iterations "
893 "can be counted in runtime\n");
894 return;
897 if (desc->const_iter)
899 if (dump_file)
900 fprintf (dump_file, ";; Loop iterates constant times\n");
901 return;
904 /* If we have profile feedback, check whether the loop rolls. */
905 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
907 if (dump_file)
908 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
909 return;
912 /* Success; now force nunroll to be power of 2, as we are unable to
913 cope with overflows in computation of number of iterations. */
914 for (i = 1; 2 * i <= nunroll; i *= 2)
915 continue;
917 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
918 loop->lpt_decision.times = i - 1;
920 if (dump_file)
921 fprintf (dump_file,
922 ";; Decided to unroll the runtime computable "
923 "times rolling loop, %d times.\n",
924 loop->lpt_decision.times);
927 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
928 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
929 and NULL is returned instead. */
931 basic_block
932 split_edge_and_insert (edge e, rtx insns)
934 basic_block bb;
936 if (!insns)
937 return NULL;
938 bb = split_edge (e);
939 emit_insn_after (insns, BB_END (bb));
941 /* ??? We used to assume that INSNS can contain control flow insns, and
942 that we had to try to find sub basic blocks in BB to maintain a valid
943 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
944 and call break_superblocks when going out of cfglayout mode. But it
945 turns out that this never happens; and that if it does ever happen,
946 the verify_flow_info call in loop_optimizer_finalize would fail.
948 There are two reasons why we expected we could have control flow insns
949 in INSNS. The first is when a comparison has to be done in parts, and
950 the second is when the number of iterations is computed for loops with
951 the number of iterations known at runtime. In both cases, test cases
952 to get control flow in INSNS appear to be impossible to construct:
954 * If do_compare_rtx_and_jump needs several branches to do comparison
955 in a mode that needs comparison by parts, we cannot analyze the
956 number of iterations of the loop, and we never get to unrolling it.
958 * The code in expand_divmod that was suspected to cause creation of
959 branching code seems to be only accessed for signed division. The
960 divisions used by # of iterations analysis are always unsigned.
961 Problems might arise on architectures that emits branching code
962 for some operations that may appear in the unroller (especially
963 for division), but we have no such architectures.
965 Considering all this, it was decided that we should for now assume
966 that INSNS can in theory contain control flow insns, but in practice
967 it never does. So we don't handle the theoretical case, and should
968 a real failure ever show up, we have a pretty good clue for how to
969 fix it. */
971 return bb;
974 /* Unroll LOOP for that we are able to count number of iterations in runtime
975 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
976 extra care for case n < 0):
978 for (i = 0; i < n; i++)
979 body;
983 i = 0;
984 mod = n % 4;
986 switch (mod)
988 case 3:
989 body; i++;
990 case 2:
991 body; i++;
992 case 1:
993 body; i++;
994 case 0: ;
997 while (i < n)
999 body; i++;
1000 body; i++;
1001 body; i++;
1002 body; i++;
1005 static void
1006 unroll_loop_runtime_iterations (struct loop *loop)
1008 rtx old_niter, niter, init_code, branch_code, tmp;
1009 unsigned i, j, p;
1010 basic_block preheader, *body, swtch, ezc_swtch;
1011 VEC (basic_block, heap) *dom_bbs;
1012 sbitmap wont_exit;
1013 int may_exit_copy;
1014 unsigned n_peel;
1015 VEC (edge, heap) *remove_edges;
1016 edge e;
1017 bool extra_zero_check, last_may_exit;
1018 unsigned max_unroll = loop->lpt_decision.times;
1019 struct niter_desc *desc = get_simple_loop_desc (loop);
1020 bool exit_at_end = loop_exit_at_end_p (loop);
1021 struct opt_info *opt_info = NULL;
1022 bool ok;
1024 if (flag_split_ivs_in_unroller
1025 || flag_variable_expansion_in_unroller)
1026 opt_info = analyze_insns_in_loop (loop);
1028 /* Remember blocks whose dominators will have to be updated. */
1029 dom_bbs = NULL;
1031 body = get_loop_body (loop);
1032 for (i = 0; i < loop->num_nodes; i++)
1034 VEC (basic_block, heap) *ldom;
1035 basic_block bb;
1037 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
1038 for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
1039 if (!flow_bb_inside_loop_p (loop, bb))
1040 VEC_safe_push (basic_block, heap, dom_bbs, bb);
1042 VEC_free (basic_block, heap, ldom);
1044 free (body);
1046 if (!exit_at_end)
1048 /* Leave exit in first copy (for explanation why see comment in
1049 unroll_loop_constant_iterations). */
1050 may_exit_copy = 0;
1051 n_peel = max_unroll - 1;
1052 extra_zero_check = true;
1053 last_may_exit = false;
1055 else
1057 /* Leave exit in last copy (for explanation why see comment in
1058 unroll_loop_constant_iterations). */
1059 may_exit_copy = max_unroll;
1060 n_peel = max_unroll;
1061 extra_zero_check = false;
1062 last_may_exit = true;
1065 /* Get expression for number of iterations. */
1066 start_sequence ();
1067 old_niter = niter = gen_reg_rtx (desc->mode);
1068 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1069 if (tmp != niter)
1070 emit_move_insn (niter, tmp);
1072 /* Count modulo by ANDing it with max_unroll; we use the fact that
1073 the number of unrollings is a power of two, and thus this is correct
1074 even if there is overflow in the computation. */
1075 niter = expand_simple_binop (desc->mode, AND,
1076 niter,
1077 GEN_INT (max_unroll),
1078 NULL_RTX, 0, OPTAB_LIB_WIDEN);
1080 init_code = get_insns ();
1081 end_sequence ();
1082 unshare_all_rtl_in_chain (init_code);
1084 /* Precondition the loop. */
1085 split_edge_and_insert (loop_preheader_edge (loop), init_code);
1087 remove_edges = NULL;
1089 wont_exit = sbitmap_alloc (max_unroll + 2);
1091 /* Peel the first copy of loop body (almost always we must leave exit test
1092 here; the only exception is when we have extra zero check and the number
1093 of iterations is reliable. Also record the place of (possible) extra
1094 zero check. */
1095 sbitmap_zero (wont_exit);
1096 if (extra_zero_check
1097 && !desc->noloop_assumptions)
1098 SET_BIT (wont_exit, 1);
1099 ezc_swtch = loop_preheader_edge (loop)->src;
1100 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1101 1, wont_exit, desc->out_edge,
1102 &remove_edges,
1103 DLTHE_FLAG_UPDATE_FREQ);
1104 gcc_assert (ok);
1106 /* Record the place where switch will be built for preconditioning. */
1107 swtch = split_edge (loop_preheader_edge (loop));
1109 for (i = 0; i < n_peel; i++)
1111 /* Peel the copy. */
1112 sbitmap_zero (wont_exit);
1113 if (i != n_peel - 1 || !last_may_exit)
1114 SET_BIT (wont_exit, 1);
1115 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1116 1, wont_exit, desc->out_edge,
1117 &remove_edges,
1118 DLTHE_FLAG_UPDATE_FREQ);
1119 gcc_assert (ok);
1121 /* Create item for switch. */
1122 j = n_peel - i - (extra_zero_check ? 0 : 1);
1123 p = REG_BR_PROB_BASE / (i + 2);
1125 preheader = split_edge (loop_preheader_edge (loop));
1126 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1127 block_label (preheader), p,
1128 NULL_RTX);
1130 /* We rely on the fact that the compare and jump cannot be optimized out,
1131 and hence the cfg we create is correct. */
1132 gcc_assert (branch_code != NULL_RTX);
1134 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1135 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1136 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1137 e = make_edge (swtch, preheader,
1138 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1139 e->probability = p;
1142 if (extra_zero_check)
1144 /* Add branch for zero iterations. */
1145 p = REG_BR_PROB_BASE / (max_unroll + 1);
1146 swtch = ezc_swtch;
1147 preheader = split_edge (loop_preheader_edge (loop));
1148 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1149 block_label (preheader), p,
1150 NULL_RTX);
1151 gcc_assert (branch_code != NULL_RTX);
1153 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1154 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1155 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1156 e = make_edge (swtch, preheader,
1157 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1158 e->probability = p;
1161 /* Recount dominators for outer blocks. */
1162 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1164 /* And unroll loop. */
1166 sbitmap_ones (wont_exit);
1167 RESET_BIT (wont_exit, may_exit_copy);
1168 opt_info_start_duplication (opt_info);
1170 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1171 max_unroll,
1172 wont_exit, desc->out_edge,
1173 &remove_edges,
1174 DLTHE_FLAG_UPDATE_FREQ
1175 | (opt_info
1176 ? DLTHE_RECORD_COPY_NUMBER
1177 : 0));
1178 gcc_assert (ok);
1180 if (opt_info)
1182 apply_opt_in_copies (opt_info, max_unroll, true, true);
1183 free_opt_info (opt_info);
1186 free (wont_exit);
1188 if (exit_at_end)
1190 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1191 /* Find a new in and out edge; they are in the last copy we have
1192 made. */
1194 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1196 desc->out_edge = EDGE_SUCC (exit_block, 0);
1197 desc->in_edge = EDGE_SUCC (exit_block, 1);
1199 else
1201 desc->out_edge = EDGE_SUCC (exit_block, 1);
1202 desc->in_edge = EDGE_SUCC (exit_block, 0);
1206 /* Remove the edges. */
1207 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
1208 remove_path (e);
1209 VEC_free (edge, heap, remove_edges);
1211 /* We must be careful when updating the number of iterations due to
1212 preconditioning and the fact that the value must be valid at entry
1213 of the loop. After passing through the above code, we see that
1214 the correct new number of iterations is this: */
1215 gcc_assert (!desc->const_iter);
1216 desc->niter_expr =
1217 simplify_gen_binary (UDIV, desc->mode, old_niter,
1218 GEN_INT (max_unroll + 1));
1219 desc->niter_max /= max_unroll + 1;
1220 if (exit_at_end)
1222 desc->niter_expr =
1223 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1224 desc->noloop_assumptions = NULL_RTX;
1225 desc->niter_max--;
1228 if (dump_file)
1229 fprintf (dump_file,
1230 ";; Unrolled loop %d times, counting # of iterations "
1231 "in runtime, %i insns\n",
1232 max_unroll, num_loop_insns (loop));
1234 VEC_free (basic_block, heap, dom_bbs);
1237 /* Decide whether to simply peel LOOP and how much. */
1238 static void
1239 decide_peel_simple (struct loop *loop, int flags)
1241 unsigned npeel;
1242 struct niter_desc *desc;
1244 if (!(flags & UAP_PEEL))
1246 /* We were not asked to, just return back silently. */
1247 return;
1250 if (dump_file)
1251 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1253 /* npeel = number of iterations to peel. */
1254 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1255 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1256 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1258 /* Skip big loops. */
1259 if (!npeel)
1261 if (dump_file)
1262 fprintf (dump_file, ";; Not considering loop, is too big\n");
1263 return;
1266 /* Check for simple loops. */
1267 desc = get_simple_loop_desc (loop);
1269 /* Check number of iterations. */
1270 if (desc->simple_p && !desc->assumptions && desc->const_iter)
1272 if (dump_file)
1273 fprintf (dump_file, ";; Loop iterates constant times\n");
1274 return;
1277 /* Do not simply peel loops with branches inside -- it increases number
1278 of mispredicts. */
1279 if (num_loop_branches (loop) > 1)
1281 if (dump_file)
1282 fprintf (dump_file, ";; Not peeling, contains branches\n");
1283 return;
1286 if (loop->header->count)
1288 unsigned niter = expected_loop_iterations (loop);
1289 if (niter + 1 > npeel)
1291 if (dump_file)
1293 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1294 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1295 (HOST_WIDEST_INT) (niter + 1));
1296 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1297 npeel);
1299 return;
1301 npeel = niter + 1;
1303 else
1305 /* For now we have no good heuristics to decide whether loop peeling
1306 will be effective, so disable it. */
1307 if (dump_file)
1308 fprintf (dump_file,
1309 ";; Not peeling loop, no evidence it will be profitable\n");
1310 return;
1313 /* Success. */
1314 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1315 loop->lpt_decision.times = npeel;
1317 if (dump_file)
1318 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1319 loop->lpt_decision.times);
1322 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1323 while (cond)
1324 body;
1328 if (!cond) goto end;
1329 body;
1330 if (!cond) goto end;
1331 body;
1332 while (cond)
1333 body;
1334 end: ;
1336 static void
1337 peel_loop_simple (struct loop *loop)
1339 sbitmap wont_exit;
1340 unsigned npeel = loop->lpt_decision.times;
1341 struct niter_desc *desc = get_simple_loop_desc (loop);
1342 struct opt_info *opt_info = NULL;
1343 bool ok;
1345 if (flag_split_ivs_in_unroller && npeel > 1)
1346 opt_info = analyze_insns_in_loop (loop);
1348 wont_exit = sbitmap_alloc (npeel + 1);
1349 sbitmap_zero (wont_exit);
1351 opt_info_start_duplication (opt_info);
1353 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1354 npeel, wont_exit, NULL,
1355 NULL, DLTHE_FLAG_UPDATE_FREQ
1356 | (opt_info
1357 ? DLTHE_RECORD_COPY_NUMBER
1358 : 0));
1359 gcc_assert (ok);
1361 free (wont_exit);
1363 if (opt_info)
1365 apply_opt_in_copies (opt_info, npeel, false, false);
1366 free_opt_info (opt_info);
1369 if (desc->simple_p)
1371 if (desc->const_iter)
1373 desc->niter -= npeel;
1374 desc->niter_expr = GEN_INT (desc->niter);
1375 desc->noloop_assumptions = NULL_RTX;
1377 else
1379 /* We cannot just update niter_expr, as its value might be clobbered
1380 inside loop. We could handle this by counting the number into
1381 temporary just like we do in runtime unrolling, but it does not
1382 seem worthwhile. */
1383 free_simple_loop_desc (loop);
1386 if (dump_file)
1387 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1390 /* Decide whether to unroll LOOP stupidly and how much. */
1391 static void
1392 decide_unroll_stupid (struct loop *loop, int flags)
1394 unsigned nunroll, nunroll_by_av, i;
1395 struct niter_desc *desc;
1397 if (!(flags & UAP_UNROLL_ALL))
1399 /* We were not asked to, just return back silently. */
1400 return;
1403 if (dump_file)
1404 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1406 /* nunroll = total number of copies of the original loop body in
1407 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1408 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1409 nunroll_by_av
1410 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1411 if (nunroll > nunroll_by_av)
1412 nunroll = nunroll_by_av;
1413 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1414 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1416 /* Skip big loops. */
1417 if (nunroll <= 1)
1419 if (dump_file)
1420 fprintf (dump_file, ";; Not considering loop, is too big\n");
1421 return;
1424 /* Check for simple loops. */
1425 desc = get_simple_loop_desc (loop);
1427 /* Check simpleness. */
1428 if (desc->simple_p && !desc->assumptions)
1430 if (dump_file)
1431 fprintf (dump_file, ";; The loop is simple\n");
1432 return;
1435 /* Do not unroll loops with branches inside -- it increases number
1436 of mispredicts. */
1437 if (num_loop_branches (loop) > 1)
1439 if (dump_file)
1440 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1441 return;
1444 /* If we have profile feedback, check whether the loop rolls. */
1445 if (loop->header->count
1446 && expected_loop_iterations (loop) < 2 * nunroll)
1448 if (dump_file)
1449 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1450 return;
1453 /* Success. Now force nunroll to be power of 2, as it seems that this
1454 improves results (partially because of better alignments, partially
1455 because of some dark magic). */
1456 for (i = 1; 2 * i <= nunroll; i *= 2)
1457 continue;
1459 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1460 loop->lpt_decision.times = i - 1;
1462 if (dump_file)
1463 fprintf (dump_file,
1464 ";; Decided to unroll the loop stupidly, %d times.\n",
1465 loop->lpt_decision.times);
1468 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1469 while (cond)
1470 body;
1474 while (cond)
1476 body;
1477 if (!cond) break;
1478 body;
1479 if (!cond) break;
1480 body;
1481 if (!cond) break;
1482 body;
1485 static void
1486 unroll_loop_stupid (struct loop *loop)
1488 sbitmap wont_exit;
1489 unsigned nunroll = loop->lpt_decision.times;
1490 struct niter_desc *desc = get_simple_loop_desc (loop);
1491 struct opt_info *opt_info = NULL;
1492 bool ok;
1494 if (flag_split_ivs_in_unroller
1495 || flag_variable_expansion_in_unroller)
1496 opt_info = analyze_insns_in_loop (loop);
1499 wont_exit = sbitmap_alloc (nunroll + 1);
1500 sbitmap_zero (wont_exit);
1501 opt_info_start_duplication (opt_info);
1503 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1504 nunroll, wont_exit,
1505 NULL, NULL,
1506 DLTHE_FLAG_UPDATE_FREQ
1507 | (opt_info
1508 ? DLTHE_RECORD_COPY_NUMBER
1509 : 0));
1510 gcc_assert (ok);
1512 if (opt_info)
1514 apply_opt_in_copies (opt_info, nunroll, true, true);
1515 free_opt_info (opt_info);
1518 free (wont_exit);
1520 if (desc->simple_p)
1522 /* We indeed may get here provided that there are nontrivial assumptions
1523 for a loop to be really simple. We could update the counts, but the
1524 problem is that we are unable to decide which exit will be taken
1525 (not really true in case the number of iterations is constant,
1526 but noone will do anything with this information, so we do not
1527 worry about it). */
1528 desc->simple_p = false;
1531 if (dump_file)
1532 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1533 nunroll, num_loop_insns (loop));
1536 /* A hash function for information about insns to split. */
1538 static hashval_t
1539 si_info_hash (const void *ivts)
1541 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1544 /* An equality functions for information about insns to split. */
1546 static int
1547 si_info_eq (const void *ivts1, const void *ivts2)
1549 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1550 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1552 return i1->insn == i2->insn;
1555 /* Return a hash for VES, which is really a "var_to_expand *". */
1557 static hashval_t
1558 ve_info_hash (const void *ves)
1560 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1563 /* Return true if IVTS1 and IVTS2 (which are really both of type
1564 "var_to_expand *") refer to the same instruction. */
1566 static int
1567 ve_info_eq (const void *ivts1, const void *ivts2)
1569 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1570 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1572 return i1->insn == i2->insn;
1575 /* Returns true if REG is referenced in one insn in LOOP. */
1577 bool
1578 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1580 basic_block *body, bb;
1581 unsigned i;
1582 int count_ref = 0;
1583 rtx insn;
1585 body = get_loop_body (loop);
1586 for (i = 0; i < loop->num_nodes; i++)
1588 bb = body[i];
1590 FOR_BB_INSNS (bb, insn)
1592 if (rtx_referenced_p (reg, insn))
1593 count_ref++;
1596 return (count_ref == 1);
1599 /* Determine whether INSN contains an accumulator
1600 which can be expanded into separate copies,
1601 one for each copy of the LOOP body.
1603 for (i = 0 ; i < n; i++)
1604 sum += a[i];
1608 sum += a[i]
1609 ....
1610 i = i+1;
1611 sum1 += a[i]
1612 ....
1613 i = i+1
1614 sum2 += a[i];
1615 ....
1617 Return NULL if INSN contains no opportunity for expansion of accumulator.
1618 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1619 information and return a pointer to it.
1622 static struct var_to_expand *
1623 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1625 rtx set, dest, src, op1, op2, something;
1626 struct var_to_expand *ves;
1627 enum machine_mode mode1, mode2;
1628 unsigned accum_pos;
1630 set = single_set (insn);
1631 if (!set)
1632 return NULL;
1634 dest = SET_DEST (set);
1635 src = SET_SRC (set);
1637 if (GET_CODE (src) != PLUS
1638 && GET_CODE (src) != MINUS
1639 && GET_CODE (src) != MULT)
1640 return NULL;
1642 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1643 in MD. But if there is no optab to generate the insn, we can not
1644 perform the variable expansion. This can happen if an MD provides
1645 an insn but not a named pattern to generate it, for example to avoid
1646 producing code that needs additional mode switches like for x87/mmx.
1648 So we check have_insn_for which looks for an optab for the operation
1649 in SRC. If it doesn't exist, we can't perform the expansion even
1650 though INSN is valid. */
1651 if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1652 return NULL;
1654 op1 = XEXP (src, 0);
1655 op2 = XEXP (src, 1);
1657 if (!REG_P (dest)
1658 && !(GET_CODE (dest) == SUBREG
1659 && REG_P (SUBREG_REG (dest))))
1660 return NULL;
1662 if (rtx_equal_p (dest, op1))
1663 accum_pos = 0;
1664 else if (rtx_equal_p (dest, op2))
1665 accum_pos = 1;
1666 else
1667 return NULL;
1669 /* The method of expansion that we are using; which includes
1670 the initialization of the expansions with zero and the summation of
1671 the expansions at the end of the computation will yield wrong results
1672 for (x = something - x) thus avoid using it in that case. */
1673 if (accum_pos == 1
1674 && GET_CODE (src) == MINUS)
1675 return NULL;
1677 something = (accum_pos == 0)? op2 : op1;
1679 if (!referenced_in_one_insn_in_loop_p (loop, dest))
1680 return NULL;
1682 if (rtx_referenced_p (dest, something))
1683 return NULL;
1685 mode1 = GET_MODE (dest);
1686 mode2 = GET_MODE (something);
1687 if ((FLOAT_MODE_P (mode1)
1688 || FLOAT_MODE_P (mode2))
1689 && !flag_associative_math)
1690 return NULL;
1692 if (dump_file)
1694 fprintf (dump_file,
1695 "\n;; Expanding Accumulator ");
1696 print_rtl (dump_file, dest);
1697 fprintf (dump_file, "\n");
1700 /* Record the accumulator to expand. */
1701 ves = XNEW (struct var_to_expand);
1702 ves->insn = insn;
1703 ves->reg = copy_rtx (dest);
1704 ves->var_expansions = VEC_alloc (rtx, heap, 1);
1705 ves->next = NULL;
1706 ves->op = GET_CODE (src);
1707 ves->expansion_count = 0;
1708 ves->reuse_expansion = 0;
1709 ves->accum_pos = accum_pos;
1710 return ves;
1713 /* Determine whether there is an induction variable in INSN that
1714 we would like to split during unrolling.
1716 I.e. replace
1718 i = i + 1;
1720 i = i + 1;
1722 i = i + 1;
1725 type chains by
1727 i0 = i + 1
1729 i = i0 + 1
1731 i = i0 + 2
1734 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1735 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1736 pointer to it. */
1738 static struct iv_to_split *
1739 analyze_iv_to_split_insn (rtx insn)
1741 rtx set, dest;
1742 struct rtx_iv iv;
1743 struct iv_to_split *ivts;
1744 bool ok;
1746 /* For now we just split the basic induction variables. Later this may be
1747 extended for example by selecting also addresses of memory references. */
1748 set = single_set (insn);
1749 if (!set)
1750 return NULL;
1752 dest = SET_DEST (set);
1753 if (!REG_P (dest))
1754 return NULL;
1756 if (!biv_p (insn, dest))
1757 return NULL;
1759 ok = iv_analyze_result (insn, dest, &iv);
1761 /* This used to be an assert under the assumption that if biv_p returns
1762 true that iv_analyze_result must also return true. However, that
1763 assumption is not strictly correct as evidenced by pr25569.
1765 Returning NULL when iv_analyze_result returns false is safe and
1766 avoids the problems in pr25569 until the iv_analyze_* routines
1767 can be fixed, which is apparently hard and time consuming
1768 according to their author. */
1769 if (! ok)
1770 return NULL;
1772 if (iv.step == const0_rtx
1773 || iv.mode != iv.extend_mode)
1774 return NULL;
1776 /* Record the insn to split. */
1777 ivts = XNEW (struct iv_to_split);
1778 ivts->insn = insn;
1779 ivts->base_var = NULL_RTX;
1780 ivts->step = iv.step;
1781 ivts->next = NULL;
1782 ivts->n_loc = 1;
1783 ivts->loc[0] = 1;
1785 return ivts;
1788 /* Determines which of insns in LOOP can be optimized.
1789 Return a OPT_INFO struct with the relevant hash tables filled
1790 with all insns to be optimized. The FIRST_NEW_BLOCK field
1791 is undefined for the return value. */
1793 static struct opt_info *
1794 analyze_insns_in_loop (struct loop *loop)
1796 basic_block *body, bb;
1797 unsigned i;
1798 struct opt_info *opt_info = XCNEW (struct opt_info);
1799 rtx insn;
1800 struct iv_to_split *ivts = NULL;
1801 struct var_to_expand *ves = NULL;
1802 PTR *slot1;
1803 PTR *slot2;
1804 VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1805 edge exit;
1806 bool can_apply = false;
1808 iv_analysis_loop_init (loop);
1810 body = get_loop_body (loop);
1812 if (flag_split_ivs_in_unroller)
1814 opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1815 si_info_hash, si_info_eq, free);
1816 opt_info->iv_to_split_head = NULL;
1817 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1820 /* Record the loop exit bb and loop preheader before the unrolling. */
1821 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1823 if (VEC_length (edge, edges) == 1)
1825 exit = VEC_index (edge, edges, 0);
1826 if (!(exit->flags & EDGE_COMPLEX))
1828 opt_info->loop_exit = split_edge (exit);
1829 can_apply = true;
1833 if (flag_variable_expansion_in_unroller
1834 && can_apply)
1836 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1837 ve_info_hash,
1838 ve_info_eq, free);
1839 opt_info->var_to_expand_head = NULL;
1840 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1843 for (i = 0; i < loop->num_nodes; i++)
1845 bb = body[i];
1846 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1847 continue;
1849 FOR_BB_INSNS (bb, insn)
1851 if (!INSN_P (insn))
1852 continue;
1854 if (opt_info->insns_to_split)
1855 ivts = analyze_iv_to_split_insn (insn);
1857 if (ivts)
1859 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1860 gcc_assert (*slot1 == NULL);
1861 *slot1 = ivts;
1862 *opt_info->iv_to_split_tail = ivts;
1863 opt_info->iv_to_split_tail = &ivts->next;
1864 continue;
1867 if (opt_info->insns_with_var_to_expand)
1868 ves = analyze_insn_to_expand_var (loop, insn);
1870 if (ves)
1872 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1873 gcc_assert (*slot2 == NULL);
1874 *slot2 = ves;
1875 *opt_info->var_to_expand_tail = ves;
1876 opt_info->var_to_expand_tail = &ves->next;
1881 VEC_free (edge, heap, edges);
1882 free (body);
1883 return opt_info;
1886 /* Called just before loop duplication. Records start of duplicated area
1887 to OPT_INFO. */
1889 static void
1890 opt_info_start_duplication (struct opt_info *opt_info)
1892 if (opt_info)
1893 opt_info->first_new_block = last_basic_block;
1896 /* Determine the number of iterations between initialization of the base
1897 variable and the current copy (N_COPY). N_COPIES is the total number
1898 of newly created copies. UNROLLING is true if we are unrolling
1899 (not peeling) the loop. */
1901 static unsigned
1902 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1904 if (unrolling)
1906 /* If we are unrolling, initialization is done in the original loop
1907 body (number 0). */
1908 return n_copy;
1910 else
1912 /* If we are peeling, the copy in that the initialization occurs has
1913 number 1. The original loop (number 0) is the last. */
1914 if (n_copy)
1915 return n_copy - 1;
1916 else
1917 return n_copies;
1921 /* Locate in EXPR the expression corresponding to the location recorded
1922 in IVTS, and return a pointer to the RTX for this location. */
1924 static rtx *
1925 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1927 unsigned i;
1928 rtx *ret = &expr;
1930 for (i = 0; i < ivts->n_loc; i++)
1931 ret = &XEXP (*ret, ivts->loc[i]);
1933 return ret;
1936 /* Allocate basic variable for the induction variable chain. */
1938 static void
1939 allocate_basic_variable (struct iv_to_split *ivts)
1941 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1943 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1946 /* Insert initialization of basic variable of IVTS before INSN, taking
1947 the initial value from INSN. */
1949 static void
1950 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1952 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1953 rtx seq;
1955 start_sequence ();
1956 expr = force_operand (expr, ivts->base_var);
1957 if (expr != ivts->base_var)
1958 emit_move_insn (ivts->base_var, expr);
1959 seq = get_insns ();
1960 end_sequence ();
1962 emit_insn_before (seq, insn);
1965 /* Replace the use of induction variable described in IVTS in INSN
1966 by base variable + DELTA * step. */
1968 static void
1969 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1971 rtx expr, *loc, seq, incr, var;
1972 enum machine_mode mode = GET_MODE (ivts->base_var);
1973 rtx src, dest, set;
1975 /* Construct base + DELTA * step. */
1976 if (!delta)
1977 expr = ivts->base_var;
1978 else
1980 incr = simplify_gen_binary (MULT, mode,
1981 ivts->step, gen_int_mode (delta, mode));
1982 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1983 ivts->base_var, incr);
1986 /* Figure out where to do the replacement. */
1987 loc = get_ivts_expr (single_set (insn), ivts);
1989 /* If we can make the replacement right away, we're done. */
1990 if (validate_change (insn, loc, expr, 0))
1991 return;
1993 /* Otherwise, force EXPR into a register and try again. */
1994 start_sequence ();
1995 var = gen_reg_rtx (mode);
1996 expr = force_operand (expr, var);
1997 if (expr != var)
1998 emit_move_insn (var, expr);
1999 seq = get_insns ();
2000 end_sequence ();
2001 emit_insn_before (seq, insn);
2003 if (validate_change (insn, loc, var, 0))
2004 return;
2006 /* The last chance. Try recreating the assignment in insn
2007 completely from scratch. */
2008 set = single_set (insn);
2009 gcc_assert (set);
2011 start_sequence ();
2012 *loc = var;
2013 src = copy_rtx (SET_SRC (set));
2014 dest = copy_rtx (SET_DEST (set));
2015 src = force_operand (src, dest);
2016 if (src != dest)
2017 emit_move_insn (dest, src);
2018 seq = get_insns ();
2019 end_sequence ();
2021 emit_insn_before (seq, insn);
2022 delete_insn (insn);
2026 /* Return one expansion of the accumulator recorded in struct VE. */
2028 static rtx
2029 get_expansion (struct var_to_expand *ve)
2031 rtx reg;
2033 if (ve->reuse_expansion == 0)
2034 reg = ve->reg;
2035 else
2036 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2038 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2039 ve->reuse_expansion = 0;
2040 else
2041 ve->reuse_expansion++;
2043 return reg;
2047 /* Given INSN replace the uses of the accumulator recorded in VE
2048 with a new register. */
2050 static void
2051 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2053 rtx new_reg, set;
2054 bool really_new_expansion = false;
2056 set = single_set (insn);
2057 gcc_assert (set);
2059 /* Generate a new register only if the expansion limit has not been
2060 reached. Else reuse an already existing expansion. */
2061 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2063 really_new_expansion = true;
2064 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2066 else
2067 new_reg = get_expansion (ve);
2069 validate_change (insn, &SET_DEST (set), new_reg, 1);
2070 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2072 if (apply_change_group ())
2073 if (really_new_expansion)
2075 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2076 ve->expansion_count++;
2080 /* Initialize the variable expansions in loop preheader. PLACE is the
2081 loop-preheader basic block where the initialization of the
2082 expansions should take place. The expansions are initialized with
2083 (-0) when the operation is plus or minus to honor sign zero. This
2084 way we can prevent cases where the sign of the final result is
2085 effected by the sign of the expansion. Here is an example to
2086 demonstrate this:
2088 for (i = 0 ; i < n; i++)
2089 sum += something;
2093 sum += something
2094 ....
2095 i = i+1;
2096 sum1 += something
2097 ....
2098 i = i+1
2099 sum2 += something;
2100 ....
2102 When SUM is initialized with -zero and SOMETHING is also -zero; the
2103 final result of sum should be -zero thus the expansions sum1 and sum2
2104 should be initialized with -zero as well (otherwise we will get +zero
2105 as the final result). */
2107 static void
2108 insert_var_expansion_initialization (struct var_to_expand *ve,
2109 basic_block place)
2111 rtx seq, var, zero_init, insn;
2112 unsigned i;
2113 enum machine_mode mode = GET_MODE (ve->reg);
2114 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2116 if (VEC_length (rtx, ve->var_expansions) == 0)
2117 return;
2119 start_sequence ();
2120 if (ve->op == PLUS || ve->op == MINUS)
2121 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2123 if (honor_signed_zero_p)
2124 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2125 else
2126 zero_init = CONST0_RTX (mode);
2128 emit_move_insn (var, zero_init);
2130 else if (ve->op == MULT)
2131 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2133 zero_init = CONST1_RTX (GET_MODE (var));
2134 emit_move_insn (var, zero_init);
2137 seq = get_insns ();
2138 end_sequence ();
2140 insn = BB_HEAD (place);
2141 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2142 insn = NEXT_INSN (insn);
2144 emit_insn_after (seq, insn);
2147 /* Combine the variable expansions at the loop exit. PLACE is the
2148 loop exit basic block where the summation of the expansions should
2149 take place. */
2151 static void
2152 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2154 rtx sum = ve->reg;
2155 rtx expr, seq, var, insn;
2156 unsigned i;
2158 if (VEC_length (rtx, ve->var_expansions) == 0)
2159 return;
2161 start_sequence ();
2162 if (ve->op == PLUS || ve->op == MINUS)
2163 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2165 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2166 var, sum);
2168 else if (ve->op == MULT)
2169 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2171 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2172 var, sum);
2175 expr = force_operand (sum, ve->reg);
2176 if (expr != ve->reg)
2177 emit_move_insn (ve->reg, expr);
2178 seq = get_insns ();
2179 end_sequence ();
2181 insn = BB_HEAD (place);
2182 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2183 insn = NEXT_INSN (insn);
2185 emit_insn_after (seq, insn);
2188 /* Apply loop optimizations in loop copies using the
2189 data which gathered during the unrolling. Structure
2190 OPT_INFO record that data.
2192 UNROLLING is true if we unrolled (not peeled) the loop.
2193 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2194 the loop (as it should happen in complete unrolling, but not in ordinary
2195 peeling of the loop). */
2197 static void
2198 apply_opt_in_copies (struct opt_info *opt_info,
2199 unsigned n_copies, bool unrolling,
2200 bool rewrite_original_loop)
2202 unsigned i, delta;
2203 basic_block bb, orig_bb;
2204 rtx insn, orig_insn, next;
2205 struct iv_to_split ivts_templ, *ivts;
2206 struct var_to_expand ve_templ, *ves;
2208 /* Sanity check -- we need to put initialization in the original loop
2209 body. */
2210 gcc_assert (!unrolling || rewrite_original_loop);
2212 /* Allocate the basic variables (i0). */
2213 if (opt_info->insns_to_split)
2214 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2215 allocate_basic_variable (ivts);
2217 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2219 bb = BASIC_BLOCK (i);
2220 orig_bb = get_bb_original (bb);
2222 /* bb->aux holds position in copy sequence initialized by
2223 duplicate_loop_to_header_edge. */
2224 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2225 unrolling);
2226 bb->aux = 0;
2227 orig_insn = BB_HEAD (orig_bb);
2228 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2230 next = NEXT_INSN (insn);
2231 if (!INSN_P (insn))
2232 continue;
2234 while (!INSN_P (orig_insn))
2235 orig_insn = NEXT_INSN (orig_insn);
2237 ivts_templ.insn = orig_insn;
2238 ve_templ.insn = orig_insn;
2240 /* Apply splitting iv optimization. */
2241 if (opt_info->insns_to_split)
2243 ivts = (struct iv_to_split *)
2244 htab_find (opt_info->insns_to_split, &ivts_templ);
2246 if (ivts)
2248 gcc_assert (GET_CODE (PATTERN (insn))
2249 == GET_CODE (PATTERN (orig_insn)));
2251 if (!delta)
2252 insert_base_initialization (ivts, insn);
2253 split_iv (ivts, insn, delta);
2256 /* Apply variable expansion optimization. */
2257 if (unrolling && opt_info->insns_with_var_to_expand)
2259 ves = (struct var_to_expand *)
2260 htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2261 if (ves)
2263 gcc_assert (GET_CODE (PATTERN (insn))
2264 == GET_CODE (PATTERN (orig_insn)));
2265 expand_var_during_unrolling (ves, insn);
2268 orig_insn = NEXT_INSN (orig_insn);
2272 if (!rewrite_original_loop)
2273 return;
2275 /* Initialize the variable expansions in the loop preheader
2276 and take care of combining them at the loop exit. */
2277 if (opt_info->insns_with_var_to_expand)
2279 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2280 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2281 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2282 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2285 /* Rewrite also the original loop body. Find them as originals of the blocks
2286 in the last copied iteration, i.e. those that have
2287 get_bb_copy (get_bb_original (bb)) == bb. */
2288 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2290 bb = BASIC_BLOCK (i);
2291 orig_bb = get_bb_original (bb);
2292 if (get_bb_copy (orig_bb) != bb)
2293 continue;
2295 delta = determine_split_iv_delta (0, n_copies, unrolling);
2296 for (orig_insn = BB_HEAD (orig_bb);
2297 orig_insn != NEXT_INSN (BB_END (bb));
2298 orig_insn = next)
2300 next = NEXT_INSN (orig_insn);
2302 if (!INSN_P (orig_insn))
2303 continue;
2305 ivts_templ.insn = orig_insn;
2306 if (opt_info->insns_to_split)
2308 ivts = (struct iv_to_split *)
2309 htab_find (opt_info->insns_to_split, &ivts_templ);
2310 if (ivts)
2312 if (!delta)
2313 insert_base_initialization (ivts, orig_insn);
2314 split_iv (ivts, orig_insn, delta);
2315 continue;
2323 /* Release OPT_INFO. */
2325 static void
2326 free_opt_info (struct opt_info *opt_info)
2328 if (opt_info->insns_to_split)
2329 htab_delete (opt_info->insns_to_split);
2330 if (opt_info->insns_with_var_to_expand)
2332 struct var_to_expand *ves;
2334 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2335 VEC_free (rtx, heap, ves->var_expansions);
2336 htab_delete (opt_info->insns_with_var_to_expand);
2338 free (opt_info);