2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "hard-reg-set.h"
29 #include "basic-block.h"
33 #include "hash-table.h"
38 /* This pass performs loop unrolling. We only perform this
39 optimization on innermost loops (with single exception) because
40 the impact on performance is greatest here, and we want to avoid
41 unnecessary code size growth. The gain is caused by greater sequentiality
42 of code, better code to optimize for further passes and in some cases
43 by fewer testings of exit conditions. The main problem is code growth,
44 that impacts performance negatively due to effect of caches.
48 -- unrolling of loops that roll constant times; this is almost always
49 win, as we get rid of exit condition tests.
50 -- unrolling of loops that roll number of times that we can compute
51 in runtime; we also get rid of exit condition tests here, but there
52 is the extra expense for calculating the number of iterations
53 -- simple unrolling of remaining loops; this is performed only if we
54 are asked to, as the gain is questionable in this case and often
55 it may even slow down the code
56 For more detailed descriptions of each of those, see comments at
57 appropriate function below.
59 There is a lot of parameters (defined and described in params.def) that
60 control how much we unroll.
62 ??? A great problem is that we don't have a good way how to determine
63 how many times we should unroll the loop; the experiments I have made
64 showed that this choice may affect performance in order of several %.
67 /* Information about induction variables to split. */
71 rtx_insn
*insn
; /* The insn in that the induction variable occurs. */
72 rtx orig_var
; /* The variable (register) for the IV before split. */
73 rtx base_var
; /* The variable on that the values in the further
74 iterations are based. */
75 rtx step
; /* Step of the induction variable. */
76 struct iv_to_split
*next
; /* Next entry in walking order. */
79 /* Information about accumulators to expand. */
83 rtx_insn
*insn
; /* The insn in that the variable expansion occurs. */
84 rtx reg
; /* The accumulator which is expanded. */
85 vec
<rtx
> var_expansions
; /* The copies of the accumulator which is expanded. */
86 struct var_to_expand
*next
; /* Next entry in walking order. */
87 enum rtx_code op
; /* The type of the accumulation - addition, subtraction
89 int expansion_count
; /* Count the number of expansions generated so far. */
90 int reuse_expansion
; /* The expansion we intend to reuse to expand
91 the accumulator. If REUSE_EXPANSION is 0 reuse
92 the original accumulator. Else use
93 var_expansions[REUSE_EXPANSION - 1]. */
96 /* Hashtable helper for iv_to_split. */
98 struct iv_split_hasher
: typed_free_remove
<iv_to_split
>
100 typedef iv_to_split value_type
;
101 typedef iv_to_split compare_type
;
102 static inline hashval_t
hash (const value_type
*);
103 static inline bool equal (const value_type
*, const compare_type
*);
107 /* A hash function for information about insns to split. */
110 iv_split_hasher::hash (const value_type
*ivts
)
112 return (hashval_t
) INSN_UID (ivts
->insn
);
115 /* An equality functions for information about insns to split. */
118 iv_split_hasher::equal (const value_type
*i1
, const compare_type
*i2
)
120 return i1
->insn
== i2
->insn
;
123 /* Hashtable helper for iv_to_split. */
125 struct var_expand_hasher
: typed_free_remove
<var_to_expand
>
127 typedef var_to_expand value_type
;
128 typedef var_to_expand compare_type
;
129 static inline hashval_t
hash (const value_type
*);
130 static inline bool equal (const value_type
*, const compare_type
*);
133 /* Return a hash for VES. */
136 var_expand_hasher::hash (const value_type
*ves
)
138 return (hashval_t
) INSN_UID (ves
->insn
);
141 /* Return true if I1 and I2 refer to the same instruction. */
144 var_expand_hasher::equal (const value_type
*i1
, const compare_type
*i2
)
146 return i1
->insn
== i2
->insn
;
149 /* Information about optimization applied in
150 the unrolled loop. */
154 hash_table
<iv_split_hasher
> *insns_to_split
; /* A hashtable of insns to
156 struct iv_to_split
*iv_to_split_head
; /* The first iv to split. */
157 struct iv_to_split
**iv_to_split_tail
; /* Pointer to the tail of the list. */
158 hash_table
<var_expand_hasher
> *insns_with_var_to_expand
; /* A hashtable of
159 insns with accumulators to expand. */
160 struct var_to_expand
*var_to_expand_head
; /* The first var to expand. */
161 struct var_to_expand
**var_to_expand_tail
; /* Pointer to the tail of the list. */
162 unsigned first_new_block
; /* The first basic block that was
164 basic_block loop_exit
; /* The loop exit basic block. */
165 basic_block loop_preheader
; /* The loop preheader basic block. */
168 static void decide_unroll_stupid (struct loop
*, int);
169 static void decide_unroll_constant_iterations (struct loop
*, int);
170 static void decide_unroll_runtime_iterations (struct loop
*, int);
171 static void unroll_loop_stupid (struct loop
*);
172 static void decide_unrolling (int);
173 static void unroll_loop_constant_iterations (struct loop
*);
174 static void unroll_loop_runtime_iterations (struct loop
*);
175 static struct opt_info
*analyze_insns_in_loop (struct loop
*);
176 static void opt_info_start_duplication (struct opt_info
*);
177 static void apply_opt_in_copies (struct opt_info
*, unsigned, bool, bool);
178 static void free_opt_info (struct opt_info
*);
179 static struct var_to_expand
*analyze_insn_to_expand_var (struct loop
*, rtx_insn
*);
180 static bool referenced_in_one_insn_in_loop_p (struct loop
*, rtx
, int *);
181 static struct iv_to_split
*analyze_iv_to_split_insn (rtx_insn
*);
182 static void expand_var_during_unrolling (struct var_to_expand
*, rtx_insn
*);
183 static void insert_var_expansion_initialization (struct var_to_expand
*,
185 static void combine_var_copies_in_loop_exit (struct var_to_expand
*,
187 static rtx
get_expansion (struct var_to_expand
*);
189 /* Emit a message summarizing the unroll that will be
190 performed for LOOP, along with the loop's location LOCUS, if
191 appropriate given the dump or -fopt-info settings. */
194 report_unroll (struct loop
*loop
, location_t locus
)
196 int report_flags
= MSG_OPTIMIZED_LOCATIONS
| TDF_RTL
| TDF_DETAILS
;
198 if (loop
->lpt_decision
.decision
== LPT_NONE
)
201 if (!dump_enabled_p ())
204 dump_printf_loc (report_flags
, locus
,
205 "loop unrolled %d times",
206 loop
->lpt_decision
.times
);
208 dump_printf (report_flags
,
209 " (header execution count %d)",
210 (int)loop
->header
->count
);
212 dump_printf (report_flags
, "\n");
215 /* Decide whether unroll loops and how much. */
217 decide_unrolling (int flags
)
221 /* Scan the loops, inner ones first. */
222 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
224 loop
->lpt_decision
.decision
= LPT_NONE
;
225 location_t locus
= get_loop_location (loop
);
227 if (dump_enabled_p ())
228 dump_printf_loc (TDF_RTL
, locus
,
229 ";; *** Considering loop %d at BB %d for "
231 loop
->num
, loop
->header
->index
);
233 /* Do not peel cold areas. */
234 if (optimize_loop_for_size_p (loop
))
237 fprintf (dump_file
, ";; Not considering loop, cold area\n");
241 /* Can the loop be manipulated? */
242 if (!can_duplicate_loop_p (loop
))
246 ";; Not considering loop, cannot duplicate\n");
250 /* Skip non-innermost loops. */
254 fprintf (dump_file
, ";; Not considering loop, is not innermost\n");
258 loop
->ninsns
= num_loop_insns (loop
);
259 loop
->av_ninsns
= average_num_loop_insns (loop
);
261 /* Try transformations one by one in decreasing order of
264 decide_unroll_constant_iterations (loop
, flags
);
265 if (loop
->lpt_decision
.decision
== LPT_NONE
)
266 decide_unroll_runtime_iterations (loop
, flags
);
267 if (loop
->lpt_decision
.decision
== LPT_NONE
)
268 decide_unroll_stupid (loop
, flags
);
270 report_unroll (loop
, locus
);
276 unroll_loops (int flags
)
279 bool changed
= false;
281 /* Now decide rest of unrolling. */
282 decide_unrolling (flags
);
284 /* Scan the loops, inner ones first. */
285 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
287 /* And perform the appropriate transformations. */
288 switch (loop
->lpt_decision
.decision
)
290 case LPT_UNROLL_CONSTANT
:
291 unroll_loop_constant_iterations (loop
);
294 case LPT_UNROLL_RUNTIME
:
295 unroll_loop_runtime_iterations (loop
);
298 case LPT_UNROLL_STUPID
:
299 unroll_loop_stupid (loop
);
311 calculate_dominance_info (CDI_DOMINATORS
);
312 fix_loop_structure (NULL
);
318 /* Check whether exit of the LOOP is at the end of loop body. */
321 loop_exit_at_end_p (struct loop
*loop
)
323 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
326 /* We should never have conditional in latch block. */
327 gcc_assert (desc
->in_edge
->dest
!= loop
->header
);
329 if (desc
->in_edge
->dest
!= loop
->latch
)
332 /* Check that the latch is empty. */
333 FOR_BB_INSNS (loop
->latch
, insn
)
335 if (INSN_P (insn
) && active_insn_p (insn
))
342 /* Decide whether to unroll LOOP iterating constant number of times
346 decide_unroll_constant_iterations (struct loop
*loop
, int flags
)
348 unsigned nunroll
, nunroll_by_av
, best_copies
, best_unroll
= 0, n_copies
, i
;
349 struct niter_desc
*desc
;
350 widest_int iterations
;
352 if (!(flags
& UAP_UNROLL
))
354 /* We were not asked to, just return back silently. */
360 "\n;; Considering unrolling loop with constant "
361 "number of iterations\n");
363 /* nunroll = total number of copies of the original loop body in
364 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
365 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
367 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
368 if (nunroll
> nunroll_by_av
)
369 nunroll
= nunroll_by_av
;
370 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
371 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
373 if (targetm
.loop_unroll_adjust
)
374 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
376 /* Skip big loops. */
380 fprintf (dump_file
, ";; Not considering loop, is too big\n");
384 /* Check for simple loops. */
385 desc
= get_simple_loop_desc (loop
);
387 /* Check number of iterations. */
388 if (!desc
->simple_p
|| !desc
->const_iter
|| desc
->assumptions
)
392 ";; Unable to prove that the loop iterates constant times\n");
396 /* Check whether the loop rolls enough to consider.
397 Consult also loop bounds and profile; in the case the loop has more
398 than one exit it may well loop less than determined maximal number
400 if (desc
->niter
< 2 * nunroll
401 || ((get_estimated_loop_iterations (loop
, &iterations
)
402 || get_max_loop_iterations (loop
, &iterations
))
403 && wi::ltu_p (iterations
, 2 * nunroll
)))
406 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
410 /* Success; now compute number of iterations to unroll. We alter
411 nunroll so that as few as possible copies of loop body are
412 necessary, while still not decreasing the number of unrollings
413 too much (at most by 1). */
414 best_copies
= 2 * nunroll
+ 10;
417 if (i
- 1 >= desc
->niter
)
420 for (; i
>= nunroll
- 1; i
--)
422 unsigned exit_mod
= desc
->niter
% (i
+ 1);
424 if (!loop_exit_at_end_p (loop
))
425 n_copies
= exit_mod
+ i
+ 1;
426 else if (exit_mod
!= (unsigned) i
427 || desc
->noloop_assumptions
!= NULL_RTX
)
428 n_copies
= exit_mod
+ i
+ 2;
432 if (n_copies
< best_copies
)
434 best_copies
= n_copies
;
439 loop
->lpt_decision
.decision
= LPT_UNROLL_CONSTANT
;
440 loop
->lpt_decision
.times
= best_unroll
;
443 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
444 The transformation does this:
446 for (i = 0; i < 102; i++)
449 ==> (LOOP->LPT_DECISION.TIMES == 3)
463 unroll_loop_constant_iterations (struct loop
*loop
)
465 unsigned HOST_WIDE_INT niter
;
470 unsigned max_unroll
= loop
->lpt_decision
.times
;
471 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
472 bool exit_at_end
= loop_exit_at_end_p (loop
);
473 struct opt_info
*opt_info
= NULL
;
478 /* Should not get here (such loop should be peeled instead). */
479 gcc_assert (niter
> max_unroll
+ 1);
481 exit_mod
= niter
% (max_unroll
+ 1);
483 wont_exit
= sbitmap_alloc (max_unroll
+ 1);
484 bitmap_ones (wont_exit
);
486 auto_vec
<edge
> remove_edges
;
487 if (flag_split_ivs_in_unroller
488 || flag_variable_expansion_in_unroller
)
489 opt_info
= analyze_insns_in_loop (loop
);
493 /* The exit is not at the end of the loop; leave exit test
494 in the first copy, so that the loops that start with test
495 of exit condition have continuous body after unrolling. */
498 fprintf (dump_file
, ";; Condition at beginning of loop.\n");
500 /* Peel exit_mod iterations. */
501 bitmap_clear_bit (wont_exit
, 0);
502 if (desc
->noloop_assumptions
)
503 bitmap_clear_bit (wont_exit
, 1);
507 opt_info_start_duplication (opt_info
);
508 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
510 wont_exit
, desc
->out_edge
,
512 DLTHE_FLAG_UPDATE_FREQ
513 | (opt_info
&& exit_mod
> 1
514 ? DLTHE_RECORD_COPY_NUMBER
518 if (opt_info
&& exit_mod
> 1)
519 apply_opt_in_copies (opt_info
, exit_mod
, false, false);
521 desc
->noloop_assumptions
= NULL_RTX
;
522 desc
->niter
-= exit_mod
;
523 loop
->nb_iterations_upper_bound
-= exit_mod
;
524 if (loop
->any_estimate
525 && wi::leu_p (exit_mod
, loop
->nb_iterations_estimate
))
526 loop
->nb_iterations_estimate
-= exit_mod
;
528 loop
->any_estimate
= false;
531 bitmap_set_bit (wont_exit
, 1);
535 /* Leave exit test in last copy, for the same reason as above if
536 the loop tests the condition at the end of loop body. */
539 fprintf (dump_file
, ";; Condition at end of loop.\n");
541 /* We know that niter >= max_unroll + 2; so we do not need to care of
542 case when we would exit before reaching the loop. So just peel
543 exit_mod + 1 iterations. */
544 if (exit_mod
!= max_unroll
545 || desc
->noloop_assumptions
)
547 bitmap_clear_bit (wont_exit
, 0);
548 if (desc
->noloop_assumptions
)
549 bitmap_clear_bit (wont_exit
, 1);
551 opt_info_start_duplication (opt_info
);
552 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
554 wont_exit
, desc
->out_edge
,
556 DLTHE_FLAG_UPDATE_FREQ
557 | (opt_info
&& exit_mod
> 0
558 ? DLTHE_RECORD_COPY_NUMBER
562 if (opt_info
&& exit_mod
> 0)
563 apply_opt_in_copies (opt_info
, exit_mod
+ 1, false, false);
565 desc
->niter
-= exit_mod
+ 1;
566 loop
->nb_iterations_upper_bound
-= exit_mod
+ 1;
567 if (loop
->any_estimate
568 && wi::leu_p (exit_mod
+ 1, loop
->nb_iterations_estimate
))
569 loop
->nb_iterations_estimate
-= exit_mod
+ 1;
571 loop
->any_estimate
= false;
572 desc
->noloop_assumptions
= NULL_RTX
;
574 bitmap_set_bit (wont_exit
, 0);
575 bitmap_set_bit (wont_exit
, 1);
578 bitmap_clear_bit (wont_exit
, max_unroll
);
581 /* Now unroll the loop. */
583 opt_info_start_duplication (opt_info
);
584 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
586 wont_exit
, desc
->out_edge
,
588 DLTHE_FLAG_UPDATE_FREQ
590 ? DLTHE_RECORD_COPY_NUMBER
596 apply_opt_in_copies (opt_info
, max_unroll
, true, true);
597 free_opt_info (opt_info
);
604 basic_block exit_block
= get_bb_copy (desc
->in_edge
->src
);
605 /* Find a new in and out edge; they are in the last copy we have made. */
607 if (EDGE_SUCC (exit_block
, 0)->dest
== desc
->out_edge
->dest
)
609 desc
->out_edge
= EDGE_SUCC (exit_block
, 0);
610 desc
->in_edge
= EDGE_SUCC (exit_block
, 1);
614 desc
->out_edge
= EDGE_SUCC (exit_block
, 1);
615 desc
->in_edge
= EDGE_SUCC (exit_block
, 0);
619 desc
->niter
/= max_unroll
+ 1;
620 loop
->nb_iterations_upper_bound
621 = wi::udiv_trunc (loop
->nb_iterations_upper_bound
, max_unroll
+ 1);
622 if (loop
->any_estimate
)
623 loop
->nb_iterations_estimate
624 = wi::udiv_trunc (loop
->nb_iterations_estimate
, max_unroll
+ 1);
625 desc
->niter_expr
= GEN_INT (desc
->niter
);
627 /* Remove the edges. */
628 FOR_EACH_VEC_ELT (remove_edges
, i
, e
)
633 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
634 max_unroll
, num_loop_insns (loop
));
637 /* Decide whether to unroll LOOP iterating runtime computable number of times
640 decide_unroll_runtime_iterations (struct loop
*loop
, int flags
)
642 unsigned nunroll
, nunroll_by_av
, i
;
643 struct niter_desc
*desc
;
644 widest_int iterations
;
646 if (!(flags
& UAP_UNROLL
))
648 /* We were not asked to, just return back silently. */
654 "\n;; Considering unrolling loop with runtime "
655 "computable number of iterations\n");
657 /* nunroll = total number of copies of the original loop body in
658 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
659 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
660 nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
661 if (nunroll
> nunroll_by_av
)
662 nunroll
= nunroll_by_av
;
663 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
664 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
666 if (targetm
.loop_unroll_adjust
)
667 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
669 /* Skip big loops. */
673 fprintf (dump_file
, ";; Not considering loop, is too big\n");
677 /* Check for simple loops. */
678 desc
= get_simple_loop_desc (loop
);
680 /* Check simpleness. */
681 if (!desc
->simple_p
|| desc
->assumptions
)
685 ";; Unable to prove that the number of iterations "
686 "can be counted in runtime\n");
690 if (desc
->const_iter
)
693 fprintf (dump_file
, ";; Loop iterates constant times\n");
697 /* Check whether the loop rolls. */
698 if ((get_estimated_loop_iterations (loop
, &iterations
)
699 || get_max_loop_iterations (loop
, &iterations
))
700 && wi::ltu_p (iterations
, 2 * nunroll
))
703 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
707 /* Success; now force nunroll to be power of 2, as we are unable to
708 cope with overflows in computation of number of iterations. */
709 for (i
= 1; 2 * i
<= nunroll
; i
*= 2)
712 loop
->lpt_decision
.decision
= LPT_UNROLL_RUNTIME
;
713 loop
->lpt_decision
.times
= i
- 1;
716 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
717 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
718 and NULL is returned instead. */
721 split_edge_and_insert (edge e
, rtx_insn
*insns
)
728 emit_insn_after (insns
, BB_END (bb
));
730 /* ??? We used to assume that INSNS can contain control flow insns, and
731 that we had to try to find sub basic blocks in BB to maintain a valid
732 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
733 and call break_superblocks when going out of cfglayout mode. But it
734 turns out that this never happens; and that if it does ever happen,
735 the verify_flow_info at the end of the RTL loop passes would fail.
737 There are two reasons why we expected we could have control flow insns
738 in INSNS. The first is when a comparison has to be done in parts, and
739 the second is when the number of iterations is computed for loops with
740 the number of iterations known at runtime. In both cases, test cases
741 to get control flow in INSNS appear to be impossible to construct:
743 * If do_compare_rtx_and_jump needs several branches to do comparison
744 in a mode that needs comparison by parts, we cannot analyze the
745 number of iterations of the loop, and we never get to unrolling it.
747 * The code in expand_divmod that was suspected to cause creation of
748 branching code seems to be only accessed for signed division. The
749 divisions used by # of iterations analysis are always unsigned.
750 Problems might arise on architectures that emits branching code
751 for some operations that may appear in the unroller (especially
752 for division), but we have no such architectures.
754 Considering all this, it was decided that we should for now assume
755 that INSNS can in theory contain control flow insns, but in practice
756 it never does. So we don't handle the theoretical case, and should
757 a real failure ever show up, we have a pretty good clue for how to
763 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
764 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
765 in order to create a jump. */
768 compare_and_jump_seq (rtx op0
, rtx op1
, enum rtx_code comp
, rtx label
, int prob
,
771 rtx_insn
*seq
, *jump
;
773 enum machine_mode mode
;
775 mode
= GET_MODE (op0
);
776 if (mode
== VOIDmode
)
777 mode
= GET_MODE (op1
);
780 if (GET_MODE_CLASS (mode
) == MODE_CC
)
782 /* A hack -- there seems to be no easy generic way how to make a
783 conditional jump from a ccmode comparison. */
785 cond
= XEXP (SET_SRC (pc_set (cinsn
)), 0);
786 gcc_assert (GET_CODE (cond
) == comp
);
787 gcc_assert (rtx_equal_p (op0
, XEXP (cond
, 0)));
788 gcc_assert (rtx_equal_p (op1
, XEXP (cond
, 1)));
789 emit_jump_insn (copy_insn (PATTERN (cinsn
)));
790 jump
= get_last_insn ();
791 gcc_assert (JUMP_P (jump
));
792 JUMP_LABEL (jump
) = JUMP_LABEL (cinsn
);
793 LABEL_NUSES (JUMP_LABEL (jump
))++;
794 redirect_jump (jump
, label
, 0);
800 op0
= force_operand (op0
, NULL_RTX
);
801 op1
= force_operand (op1
, NULL_RTX
);
802 do_compare_rtx_and_jump (op0
, op1
, comp
, 0,
803 mode
, NULL_RTX
, NULL_RTX
, label
, -1);
804 jump
= get_last_insn ();
805 gcc_assert (JUMP_P (jump
));
806 JUMP_LABEL (jump
) = label
;
807 LABEL_NUSES (label
)++;
809 add_int_reg_note (jump
, REG_BR_PROB
, prob
);
817 /* Unroll LOOP for which we are able to count number of iterations in runtime
818 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
819 extra care for case n < 0):
821 for (i = 0; i < n; i++)
824 ==> (LOOP->LPT_DECISION.TIMES == 3)
849 unroll_loop_runtime_iterations (struct loop
*loop
)
851 rtx old_niter
, niter
, tmp
;
852 rtx_insn
*init_code
, *branch_code
;
854 basic_block preheader
, *body
, swtch
, ezc_swtch
;
859 bool extra_zero_check
, last_may_exit
;
860 unsigned max_unroll
= loop
->lpt_decision
.times
;
861 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
862 bool exit_at_end
= loop_exit_at_end_p (loop
);
863 struct opt_info
*opt_info
= NULL
;
866 if (flag_split_ivs_in_unroller
867 || flag_variable_expansion_in_unroller
)
868 opt_info
= analyze_insns_in_loop (loop
);
870 /* Remember blocks whose dominators will have to be updated. */
871 auto_vec
<basic_block
> dom_bbs
;
873 body
= get_loop_body (loop
);
874 for (i
= 0; i
< loop
->num_nodes
; i
++)
876 vec
<basic_block
> ldom
;
879 ldom
= get_dominated_by (CDI_DOMINATORS
, body
[i
]);
880 FOR_EACH_VEC_ELT (ldom
, j
, bb
)
881 if (!flow_bb_inside_loop_p (loop
, bb
))
882 dom_bbs
.safe_push (bb
);
890 /* Leave exit in first copy (for explanation why see comment in
891 unroll_loop_constant_iterations). */
893 n_peel
= max_unroll
- 1;
894 extra_zero_check
= true;
895 last_may_exit
= false;
899 /* Leave exit in last copy (for explanation why see comment in
900 unroll_loop_constant_iterations). */
901 may_exit_copy
= max_unroll
;
903 extra_zero_check
= false;
904 last_may_exit
= true;
907 /* Get expression for number of iterations. */
909 old_niter
= niter
= gen_reg_rtx (desc
->mode
);
910 tmp
= force_operand (copy_rtx (desc
->niter_expr
), niter
);
912 emit_move_insn (niter
, tmp
);
914 /* Count modulo by ANDing it with max_unroll; we use the fact that
915 the number of unrollings is a power of two, and thus this is correct
916 even if there is overflow in the computation. */
917 niter
= expand_simple_binop (desc
->mode
, AND
,
918 niter
, gen_int_mode (max_unroll
, desc
->mode
),
919 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
921 init_code
= get_insns ();
923 unshare_all_rtl_in_chain (init_code
);
925 /* Precondition the loop. */
926 split_edge_and_insert (loop_preheader_edge (loop
), init_code
);
928 auto_vec
<edge
> remove_edges
;
930 wont_exit
= sbitmap_alloc (max_unroll
+ 2);
932 /* Peel the first copy of loop body (almost always we must leave exit test
933 here; the only exception is when we have extra zero check and the number
934 of iterations is reliable. Also record the place of (possible) extra
936 bitmap_clear (wont_exit
);
938 && !desc
->noloop_assumptions
)
939 bitmap_set_bit (wont_exit
, 1);
940 ezc_swtch
= loop_preheader_edge (loop
)->src
;
941 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
942 1, wont_exit
, desc
->out_edge
,
944 DLTHE_FLAG_UPDATE_FREQ
);
947 /* Record the place where switch will be built for preconditioning. */
948 swtch
= split_edge (loop_preheader_edge (loop
));
950 for (i
= 0; i
< n_peel
; i
++)
953 bitmap_clear (wont_exit
);
954 if (i
!= n_peel
- 1 || !last_may_exit
)
955 bitmap_set_bit (wont_exit
, 1);
956 ok
= duplicate_loop_to_header_edge (loop
, loop_preheader_edge (loop
),
957 1, wont_exit
, desc
->out_edge
,
959 DLTHE_FLAG_UPDATE_FREQ
);
962 /* Create item for switch. */
963 j
= n_peel
- i
- (extra_zero_check
? 0 : 1);
964 p
= REG_BR_PROB_BASE
/ (i
+ 2);
966 preheader
= split_edge (loop_preheader_edge (loop
));
967 branch_code
= compare_and_jump_seq (copy_rtx (niter
), GEN_INT (j
), EQ
,
968 block_label (preheader
), p
,
971 /* We rely on the fact that the compare and jump cannot be optimized out,
972 and hence the cfg we create is correct. */
973 gcc_assert (branch_code
!= NULL_RTX
);
975 swtch
= split_edge_and_insert (single_pred_edge (swtch
), branch_code
);
976 set_immediate_dominator (CDI_DOMINATORS
, preheader
, swtch
);
977 single_pred_edge (swtch
)->probability
= REG_BR_PROB_BASE
- p
;
978 e
= make_edge (swtch
, preheader
,
979 single_succ_edge (swtch
)->flags
& EDGE_IRREDUCIBLE_LOOP
);
980 e
->count
= RDIV (preheader
->count
* REG_BR_PROB_BASE
, p
);
984 if (extra_zero_check
)
986 /* Add branch for zero iterations. */
987 p
= REG_BR_PROB_BASE
/ (max_unroll
+ 1);
989 preheader
= split_edge (loop_preheader_edge (loop
));
990 branch_code
= compare_and_jump_seq (copy_rtx (niter
), const0_rtx
, EQ
,
991 block_label (preheader
), p
,
993 gcc_assert (branch_code
!= NULL_RTX
);
995 swtch
= split_edge_and_insert (single_succ_edge (swtch
), branch_code
);
996 set_immediate_dominator (CDI_DOMINATORS
, preheader
, swtch
);
997 single_succ_edge (swtch
)->probability
= REG_BR_PROB_BASE
- p
;
998 e
= make_edge (swtch
, preheader
,
999 single_succ_edge (swtch
)->flags
& EDGE_IRREDUCIBLE_LOOP
);
1000 e
->count
= RDIV (preheader
->count
* REG_BR_PROB_BASE
, p
);
1004 /* Recount dominators for outer blocks. */
1005 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, false);
1007 /* And unroll loop. */
1009 bitmap_ones (wont_exit
);
1010 bitmap_clear_bit (wont_exit
, may_exit_copy
);
1011 opt_info_start_duplication (opt_info
);
1013 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1015 wont_exit
, desc
->out_edge
,
1017 DLTHE_FLAG_UPDATE_FREQ
1019 ? DLTHE_RECORD_COPY_NUMBER
1025 apply_opt_in_copies (opt_info
, max_unroll
, true, true);
1026 free_opt_info (opt_info
);
1033 basic_block exit_block
= get_bb_copy (desc
->in_edge
->src
);
1034 /* Find a new in and out edge; they are in the last copy we have
1037 if (EDGE_SUCC (exit_block
, 0)->dest
== desc
->out_edge
->dest
)
1039 desc
->out_edge
= EDGE_SUCC (exit_block
, 0);
1040 desc
->in_edge
= EDGE_SUCC (exit_block
, 1);
1044 desc
->out_edge
= EDGE_SUCC (exit_block
, 1);
1045 desc
->in_edge
= EDGE_SUCC (exit_block
, 0);
1049 /* Remove the edges. */
1050 FOR_EACH_VEC_ELT (remove_edges
, i
, e
)
1053 /* We must be careful when updating the number of iterations due to
1054 preconditioning and the fact that the value must be valid at entry
1055 of the loop. After passing through the above code, we see that
1056 the correct new number of iterations is this: */
1057 gcc_assert (!desc
->const_iter
);
1059 simplify_gen_binary (UDIV
, desc
->mode
, old_niter
,
1060 gen_int_mode (max_unroll
+ 1, desc
->mode
));
1061 loop
->nb_iterations_upper_bound
1062 = wi::udiv_trunc (loop
->nb_iterations_upper_bound
, max_unroll
+ 1);
1063 if (loop
->any_estimate
)
1064 loop
->nb_iterations_estimate
1065 = wi::udiv_trunc (loop
->nb_iterations_estimate
, max_unroll
+ 1);
1069 simplify_gen_binary (MINUS
, desc
->mode
, desc
->niter_expr
, const1_rtx
);
1070 desc
->noloop_assumptions
= NULL_RTX
;
1071 --loop
->nb_iterations_upper_bound
;
1072 if (loop
->any_estimate
1073 && loop
->nb_iterations_estimate
!= 0)
1074 --loop
->nb_iterations_estimate
;
1076 loop
->any_estimate
= false;
1081 ";; Unrolled loop %d times, counting # of iterations "
1082 "in runtime, %i insns\n",
1083 max_unroll
, num_loop_insns (loop
));
1086 /* Decide whether to unroll LOOP stupidly and how much. */
1088 decide_unroll_stupid (struct loop
*loop
, int flags
)
1090 unsigned nunroll
, nunroll_by_av
, i
;
1091 struct niter_desc
*desc
;
1092 widest_int iterations
;
1094 if (!(flags
& UAP_UNROLL_ALL
))
1096 /* We were not asked to, just return back silently. */
1101 fprintf (dump_file
, "\n;; Considering unrolling loop stupidly\n");
1103 /* nunroll = total number of copies of the original loop body in
1104 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1105 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS
) / loop
->ninsns
;
1107 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS
) / loop
->av_ninsns
;
1108 if (nunroll
> nunroll_by_av
)
1109 nunroll
= nunroll_by_av
;
1110 if (nunroll
> (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
))
1111 nunroll
= PARAM_VALUE (PARAM_MAX_UNROLL_TIMES
);
1113 if (targetm
.loop_unroll_adjust
)
1114 nunroll
= targetm
.loop_unroll_adjust (nunroll
, loop
);
1116 /* Skip big loops. */
1120 fprintf (dump_file
, ";; Not considering loop, is too big\n");
1124 /* Check for simple loops. */
1125 desc
= get_simple_loop_desc (loop
);
1127 /* Check simpleness. */
1128 if (desc
->simple_p
&& !desc
->assumptions
)
1131 fprintf (dump_file
, ";; The loop is simple\n");
1135 /* Do not unroll loops with branches inside -- it increases number
1137 TODO: this heuristic needs tunning; call inside the loop body
1138 is also relatively good reason to not unroll. */
1139 if (num_loop_branches (loop
) > 1)
1142 fprintf (dump_file
, ";; Not unrolling, contains branches\n");
1146 /* Check whether the loop rolls. */
1147 if ((get_estimated_loop_iterations (loop
, &iterations
)
1148 || get_max_loop_iterations (loop
, &iterations
))
1149 && wi::ltu_p (iterations
, 2 * nunroll
))
1152 fprintf (dump_file
, ";; Not unrolling loop, doesn't roll\n");
1156 /* Success. Now force nunroll to be power of 2, as it seems that this
1157 improves results (partially because of better alignments, partially
1158 because of some dark magic). */
1159 for (i
= 1; 2 * i
<= nunroll
; i
*= 2)
1162 loop
->lpt_decision
.decision
= LPT_UNROLL_STUPID
;
1163 loop
->lpt_decision
.times
= i
- 1;
1166 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1171 ==> (LOOP->LPT_DECISION.TIMES == 3)
1185 unroll_loop_stupid (struct loop
*loop
)
1188 unsigned nunroll
= loop
->lpt_decision
.times
;
1189 struct niter_desc
*desc
= get_simple_loop_desc (loop
);
1190 struct opt_info
*opt_info
= NULL
;
1193 if (flag_split_ivs_in_unroller
1194 || flag_variable_expansion_in_unroller
)
1195 opt_info
= analyze_insns_in_loop (loop
);
1198 wont_exit
= sbitmap_alloc (nunroll
+ 1);
1199 bitmap_clear (wont_exit
);
1200 opt_info_start_duplication (opt_info
);
1202 ok
= duplicate_loop_to_header_edge (loop
, loop_latch_edge (loop
),
1205 DLTHE_FLAG_UPDATE_FREQ
1207 ? DLTHE_RECORD_COPY_NUMBER
1213 apply_opt_in_copies (opt_info
, nunroll
, true, true);
1214 free_opt_info (opt_info
);
1221 /* We indeed may get here provided that there are nontrivial assumptions
1222 for a loop to be really simple. We could update the counts, but the
1223 problem is that we are unable to decide which exit will be taken
1224 (not really true in case the number of iterations is constant,
1225 but no one will do anything with this information, so we do not
1227 desc
->simple_p
= false;
1231 fprintf (dump_file
, ";; Unrolled loop %d times, %i insns\n",
1232 nunroll
, num_loop_insns (loop
));
1235 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1236 Set *DEBUG_USES to the number of debug insns that reference the
1240 referenced_in_one_insn_in_loop_p (struct loop
*loop
, rtx reg
,
1243 basic_block
*body
, bb
;
1248 body
= get_loop_body (loop
);
1249 for (i
= 0; i
< loop
->num_nodes
; i
++)
1253 FOR_BB_INSNS (bb
, insn
)
1254 if (!rtx_referenced_p (reg
, insn
))
1256 else if (DEBUG_INSN_P (insn
))
1258 else if (++count_ref
> 1)
1262 return (count_ref
== 1);
1265 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1268 reset_debug_uses_in_loop (struct loop
*loop
, rtx reg
, int debug_uses
)
1270 basic_block
*body
, bb
;
1274 body
= get_loop_body (loop
);
1275 for (i
= 0; debug_uses
&& i
< loop
->num_nodes
; i
++)
1279 FOR_BB_INSNS (bb
, insn
)
1280 if (!DEBUG_INSN_P (insn
) || !rtx_referenced_p (reg
, insn
))
1284 validate_change (insn
, &INSN_VAR_LOCATION_LOC (insn
),
1285 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1293 /* Determine whether INSN contains an accumulator
1294 which can be expanded into separate copies,
1295 one for each copy of the LOOP body.
1297 for (i = 0 ; i < n; i++)
1311 Return NULL if INSN contains no opportunity for expansion of accumulator.
1312 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1313 information and return a pointer to it.
1316 static struct var_to_expand
*
1317 analyze_insn_to_expand_var (struct loop
*loop
, rtx_insn
*insn
)
1320 struct var_to_expand
*ves
;
1325 set
= single_set (insn
);
1329 dest
= SET_DEST (set
);
1330 src
= SET_SRC (set
);
1331 code
= GET_CODE (src
);
1333 if (code
!= PLUS
&& code
!= MINUS
&& code
!= MULT
&& code
!= FMA
)
1336 if (FLOAT_MODE_P (GET_MODE (dest
)))
1338 if (!flag_associative_math
)
1340 /* In the case of FMA, we're also changing the rounding. */
1341 if (code
== FMA
&& !flag_unsafe_math_optimizations
)
1345 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1346 in MD. But if there is no optab to generate the insn, we can not
1347 perform the variable expansion. This can happen if an MD provides
1348 an insn but not a named pattern to generate it, for example to avoid
1349 producing code that needs additional mode switches like for x87/mmx.
1351 So we check have_insn_for which looks for an optab for the operation
1352 in SRC. If it doesn't exist, we can't perform the expansion even
1353 though INSN is valid. */
1354 if (!have_insn_for (code
, GET_MODE (src
)))
1358 && !(GET_CODE (dest
) == SUBREG
1359 && REG_P (SUBREG_REG (dest
))))
1362 /* Find the accumulator use within the operation. */
1365 /* We only support accumulation via FMA in the ADD position. */
1366 if (!rtx_equal_p (dest
, XEXP (src
, 2)))
1370 else if (rtx_equal_p (dest
, XEXP (src
, 0)))
1372 else if (rtx_equal_p (dest
, XEXP (src
, 1)))
1374 /* The method of expansion that we are using; which includes the
1375 initialization of the expansions with zero and the summation of
1376 the expansions at the end of the computation will yield wrong
1377 results for (x = something - x) thus avoid using it in that case. */
1385 /* It must not otherwise be used. */
1388 if (rtx_referenced_p (dest
, XEXP (src
, 0))
1389 || rtx_referenced_p (dest
, XEXP (src
, 1)))
1392 else if (rtx_referenced_p (dest
, XEXP (src
, 1 - accum_pos
)))
1395 /* It must be used in exactly one insn. */
1396 if (!referenced_in_one_insn_in_loop_p (loop
, dest
, &debug_uses
))
1401 fprintf (dump_file
, "\n;; Expanding Accumulator ");
1402 print_rtl (dump_file
, dest
);
1403 fprintf (dump_file
, "\n");
1407 /* Instead of resetting the debug insns, we could replace each
1408 debug use in the loop with the sum or product of all expanded
1409 accummulators. Since we'll only know of all expansions at the
1410 end, we'd have to keep track of which vars_to_expand a debug
1411 insn in the loop references, take note of each copy of the
1412 debug insn during unrolling, and when it's all done, compute
1413 the sum or product of each variable and adjust the original
1414 debug insn and each copy thereof. What a pain! */
1415 reset_debug_uses_in_loop (loop
, dest
, debug_uses
);
1417 /* Record the accumulator to expand. */
1418 ves
= XNEW (struct var_to_expand
);
1420 ves
->reg
= copy_rtx (dest
);
1421 ves
->var_expansions
.create (1);
1423 ves
->op
= GET_CODE (src
);
1424 ves
->expansion_count
= 0;
1425 ves
->reuse_expansion
= 0;
1429 /* Determine whether there is an induction variable in INSN that
1430 we would like to split during unrolling.
1450 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1451 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1454 static struct iv_to_split
*
1455 analyze_iv_to_split_insn (rtx_insn
*insn
)
1459 struct iv_to_split
*ivts
;
1462 /* For now we just split the basic induction variables. Later this may be
1463 extended for example by selecting also addresses of memory references. */
1464 set
= single_set (insn
);
1468 dest
= SET_DEST (set
);
1472 if (!biv_p (insn
, dest
))
1475 ok
= iv_analyze_result (insn
, dest
, &iv
);
1477 /* This used to be an assert under the assumption that if biv_p returns
1478 true that iv_analyze_result must also return true. However, that
1479 assumption is not strictly correct as evidenced by pr25569.
1481 Returning NULL when iv_analyze_result returns false is safe and
1482 avoids the problems in pr25569 until the iv_analyze_* routines
1483 can be fixed, which is apparently hard and time consuming
1484 according to their author. */
1488 if (iv
.step
== const0_rtx
1489 || iv
.mode
!= iv
.extend_mode
)
1492 /* Record the insn to split. */
1493 ivts
= XNEW (struct iv_to_split
);
1495 ivts
->orig_var
= dest
;
1496 ivts
->base_var
= NULL_RTX
;
1497 ivts
->step
= iv
.step
;
1503 /* Determines which of insns in LOOP can be optimized.
1504 Return a OPT_INFO struct with the relevant hash tables filled
1505 with all insns to be optimized. The FIRST_NEW_BLOCK field
1506 is undefined for the return value. */
1508 static struct opt_info
*
1509 analyze_insns_in_loop (struct loop
*loop
)
1511 basic_block
*body
, bb
;
1513 struct opt_info
*opt_info
= XCNEW (struct opt_info
);
1515 struct iv_to_split
*ivts
= NULL
;
1516 struct var_to_expand
*ves
= NULL
;
1517 iv_to_split
**slot1
;
1518 var_to_expand
**slot2
;
1519 vec
<edge
> edges
= get_loop_exit_edges (loop
);
1521 bool can_apply
= false;
1523 iv_analysis_loop_init (loop
);
1525 body
= get_loop_body (loop
);
1527 if (flag_split_ivs_in_unroller
)
1529 opt_info
->insns_to_split
1530 = new hash_table
<iv_split_hasher
> (5 * loop
->num_nodes
);
1531 opt_info
->iv_to_split_head
= NULL
;
1532 opt_info
->iv_to_split_tail
= &opt_info
->iv_to_split_head
;
1535 /* Record the loop exit bb and loop preheader before the unrolling. */
1536 opt_info
->loop_preheader
= loop_preheader_edge (loop
)->src
;
1538 if (edges
.length () == 1)
1541 if (!(exit
->flags
& EDGE_COMPLEX
))
1543 opt_info
->loop_exit
= split_edge (exit
);
1548 if (flag_variable_expansion_in_unroller
1551 opt_info
->insns_with_var_to_expand
1552 = new hash_table
<var_expand_hasher
> (5 * loop
->num_nodes
);
1553 opt_info
->var_to_expand_head
= NULL
;
1554 opt_info
->var_to_expand_tail
= &opt_info
->var_to_expand_head
;
1557 for (i
= 0; i
< loop
->num_nodes
; i
++)
1560 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
1563 FOR_BB_INSNS (bb
, insn
)
1568 if (opt_info
->insns_to_split
)
1569 ivts
= analyze_iv_to_split_insn (insn
);
1573 slot1
= opt_info
->insns_to_split
->find_slot (ivts
, INSERT
);
1574 gcc_assert (*slot1
== NULL
);
1576 *opt_info
->iv_to_split_tail
= ivts
;
1577 opt_info
->iv_to_split_tail
= &ivts
->next
;
1581 if (opt_info
->insns_with_var_to_expand
)
1582 ves
= analyze_insn_to_expand_var (loop
, insn
);
1586 slot2
= opt_info
->insns_with_var_to_expand
->find_slot (ves
, INSERT
);
1587 gcc_assert (*slot2
== NULL
);
1589 *opt_info
->var_to_expand_tail
= ves
;
1590 opt_info
->var_to_expand_tail
= &ves
->next
;
1600 /* Called just before loop duplication. Records start of duplicated area
1604 opt_info_start_duplication (struct opt_info
*opt_info
)
1607 opt_info
->first_new_block
= last_basic_block_for_fn (cfun
);
1610 /* Determine the number of iterations between initialization of the base
1611 variable and the current copy (N_COPY). N_COPIES is the total number
1612 of newly created copies. UNROLLING is true if we are unrolling
1613 (not peeling) the loop. */
1616 determine_split_iv_delta (unsigned n_copy
, unsigned n_copies
, bool unrolling
)
1620 /* If we are unrolling, initialization is done in the original loop
1626 /* If we are peeling, the copy in that the initialization occurs has
1627 number 1. The original loop (number 0) is the last. */
1635 /* Allocate basic variable for the induction variable chain. */
1638 allocate_basic_variable (struct iv_to_split
*ivts
)
1640 rtx expr
= SET_SRC (single_set (ivts
->insn
));
1642 ivts
->base_var
= gen_reg_rtx (GET_MODE (expr
));
1645 /* Insert initialization of basic variable of IVTS before INSN, taking
1646 the initial value from INSN. */
1649 insert_base_initialization (struct iv_to_split
*ivts
, rtx_insn
*insn
)
1651 rtx expr
= copy_rtx (SET_SRC (single_set (insn
)));
1655 expr
= force_operand (expr
, ivts
->base_var
);
1656 if (expr
!= ivts
->base_var
)
1657 emit_move_insn (ivts
->base_var
, expr
);
1661 emit_insn_before (seq
, insn
);
1664 /* Replace the use of induction variable described in IVTS in INSN
1665 by base variable + DELTA * step. */
1668 split_iv (struct iv_to_split
*ivts
, rtx_insn
*insn
, unsigned delta
)
1670 rtx expr
, *loc
, incr
, var
;
1672 enum machine_mode mode
= GET_MODE (ivts
->base_var
);
1675 /* Construct base + DELTA * step. */
1677 expr
= ivts
->base_var
;
1680 incr
= simplify_gen_binary (MULT
, mode
,
1681 ivts
->step
, gen_int_mode (delta
, mode
));
1682 expr
= simplify_gen_binary (PLUS
, GET_MODE (ivts
->base_var
),
1683 ivts
->base_var
, incr
);
1686 /* Figure out where to do the replacement. */
1687 loc
= &SET_SRC (single_set (insn
));
1689 /* If we can make the replacement right away, we're done. */
1690 if (validate_change (insn
, loc
, expr
, 0))
1693 /* Otherwise, force EXPR into a register and try again. */
1695 var
= gen_reg_rtx (mode
);
1696 expr
= force_operand (expr
, var
);
1698 emit_move_insn (var
, expr
);
1701 emit_insn_before (seq
, insn
);
1703 if (validate_change (insn
, loc
, var
, 0))
1706 /* The last chance. Try recreating the assignment in insn
1707 completely from scratch. */
1708 set
= single_set (insn
);
1713 src
= copy_rtx (SET_SRC (set
));
1714 dest
= copy_rtx (SET_DEST (set
));
1715 src
= force_operand (src
, dest
);
1717 emit_move_insn (dest
, src
);
1721 emit_insn_before (seq
, insn
);
1726 /* Return one expansion of the accumulator recorded in struct VE. */
1729 get_expansion (struct var_to_expand
*ve
)
1733 if (ve
->reuse_expansion
== 0)
1736 reg
= ve
->var_expansions
[ve
->reuse_expansion
- 1];
1738 if (ve
->var_expansions
.length () == (unsigned) ve
->reuse_expansion
)
1739 ve
->reuse_expansion
= 0;
1741 ve
->reuse_expansion
++;
1747 /* Given INSN replace the uses of the accumulator recorded in VE
1748 with a new register. */
1751 expand_var_during_unrolling (struct var_to_expand
*ve
, rtx_insn
*insn
)
1754 bool really_new_expansion
= false;
1756 set
= single_set (insn
);
1759 /* Generate a new register only if the expansion limit has not been
1760 reached. Else reuse an already existing expansion. */
1761 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS
) > ve
->expansion_count
)
1763 really_new_expansion
= true;
1764 new_reg
= gen_reg_rtx (GET_MODE (ve
->reg
));
1767 new_reg
= get_expansion (ve
);
1769 validate_replace_rtx_group (SET_DEST (set
), new_reg
, insn
);
1770 if (apply_change_group ())
1771 if (really_new_expansion
)
1773 ve
->var_expansions
.safe_push (new_reg
);
1774 ve
->expansion_count
++;
1778 /* Initialize the variable expansions in loop preheader. PLACE is the
1779 loop-preheader basic block where the initialization of the
1780 expansions should take place. The expansions are initialized with
1781 (-0) when the operation is plus or minus to honor sign zero. This
1782 way we can prevent cases where the sign of the final result is
1783 effected by the sign of the expansion. Here is an example to
1786 for (i = 0 ; i < n; i++)
1800 When SUM is initialized with -zero and SOMETHING is also -zero; the
1801 final result of sum should be -zero thus the expansions sum1 and sum2
1802 should be initialized with -zero as well (otherwise we will get +zero
1803 as the final result). */
1806 insert_var_expansion_initialization (struct var_to_expand
*ve
,
1812 enum machine_mode mode
= GET_MODE (ve
->reg
);
1813 bool honor_signed_zero_p
= HONOR_SIGNED_ZEROS (mode
);
1815 if (ve
->var_expansions
.length () == 0)
1822 /* Note that we only accumulate FMA via the ADD operand. */
1825 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1827 if (honor_signed_zero_p
)
1828 zero_init
= simplify_gen_unary (NEG
, mode
, CONST0_RTX (mode
), mode
);
1830 zero_init
= CONST0_RTX (mode
);
1831 emit_move_insn (var
, zero_init
);
1836 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1838 zero_init
= CONST1_RTX (GET_MODE (var
));
1839 emit_move_insn (var
, zero_init
);
1850 emit_insn_after (seq
, BB_END (place
));
1853 /* Combine the variable expansions at the loop exit. PLACE is the
1854 loop exit basic block where the summation of the expansions should
1858 combine_var_copies_in_loop_exit (struct var_to_expand
*ve
, basic_block place
)
1862 rtx_insn
*seq
, *insn
;
1865 if (ve
->var_expansions
.length () == 0)
1872 /* Note that we only accumulate FMA via the ADD operand. */
1875 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1876 sum
= simplify_gen_binary (PLUS
, GET_MODE (ve
->reg
), var
, sum
);
1880 FOR_EACH_VEC_ELT (ve
->var_expansions
, i
, var
)
1881 sum
= simplify_gen_binary (MULT
, GET_MODE (ve
->reg
), var
, sum
);
1888 expr
= force_operand (sum
, ve
->reg
);
1889 if (expr
!= ve
->reg
)
1890 emit_move_insn (ve
->reg
, expr
);
1894 insn
= BB_HEAD (place
);
1895 while (!NOTE_INSN_BASIC_BLOCK_P (insn
))
1896 insn
= NEXT_INSN (insn
);
1898 emit_insn_after (seq
, insn
);
1901 /* Strip away REG_EQUAL notes for IVs we're splitting.
1903 Updating REG_EQUAL notes for IVs we split is tricky: We
1904 cannot tell until after unrolling, DF-rescanning, and liveness
1905 updating, whether an EQ_USE is reached by the split IV while
1906 the IV reg is still live. See PR55006.
1908 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1909 because RTL loop-iv requires us to defer rescanning insns and
1910 any notes attached to them. So resort to old techniques... */
1913 maybe_strip_eq_note_for_split_iv (struct opt_info
*opt_info
, rtx_insn
*insn
)
1915 struct iv_to_split
*ivts
;
1916 rtx note
= find_reg_equal_equiv_note (insn
);
1919 for (ivts
= opt_info
->iv_to_split_head
; ivts
; ivts
= ivts
->next
)
1920 if (reg_mentioned_p (ivts
->orig_var
, note
))
1922 remove_note (insn
, note
);
1927 /* Apply loop optimizations in loop copies using the
1928 data which gathered during the unrolling. Structure
1929 OPT_INFO record that data.
1931 UNROLLING is true if we unrolled (not peeled) the loop.
1932 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1933 the loop (as it should happen in complete unrolling, but not in ordinary
1934 peeling of the loop). */
1937 apply_opt_in_copies (struct opt_info
*opt_info
,
1938 unsigned n_copies
, bool unrolling
,
1939 bool rewrite_original_loop
)
1942 basic_block bb
, orig_bb
;
1943 rtx_insn
*insn
, *orig_insn
, *next
;
1944 struct iv_to_split ivts_templ
, *ivts
;
1945 struct var_to_expand ve_templ
, *ves
;
1947 /* Sanity check -- we need to put initialization in the original loop
1949 gcc_assert (!unrolling
|| rewrite_original_loop
);
1951 /* Allocate the basic variables (i0). */
1952 if (opt_info
->insns_to_split
)
1953 for (ivts
= opt_info
->iv_to_split_head
; ivts
; ivts
= ivts
->next
)
1954 allocate_basic_variable (ivts
);
1956 for (i
= opt_info
->first_new_block
;
1957 i
< (unsigned) last_basic_block_for_fn (cfun
);
1960 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
1961 orig_bb
= get_bb_original (bb
);
1963 /* bb->aux holds position in copy sequence initialized by
1964 duplicate_loop_to_header_edge. */
1965 delta
= determine_split_iv_delta ((size_t)bb
->aux
, n_copies
,
1968 orig_insn
= BB_HEAD (orig_bb
);
1969 FOR_BB_INSNS_SAFE (bb
, insn
, next
)
1972 || (DEBUG_INSN_P (insn
)
1973 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn
)) == LABEL_DECL
))
1976 while (!INSN_P (orig_insn
)
1977 || (DEBUG_INSN_P (orig_insn
)
1978 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn
))
1980 orig_insn
= NEXT_INSN (orig_insn
);
1982 ivts_templ
.insn
= orig_insn
;
1983 ve_templ
.insn
= orig_insn
;
1985 /* Apply splitting iv optimization. */
1986 if (opt_info
->insns_to_split
)
1988 maybe_strip_eq_note_for_split_iv (opt_info
, insn
);
1990 ivts
= opt_info
->insns_to_split
->find (&ivts_templ
);
1994 gcc_assert (GET_CODE (PATTERN (insn
))
1995 == GET_CODE (PATTERN (orig_insn
)));
1998 insert_base_initialization (ivts
, insn
);
1999 split_iv (ivts
, insn
, delta
);
2002 /* Apply variable expansion optimization. */
2003 if (unrolling
&& opt_info
->insns_with_var_to_expand
)
2005 ves
= (struct var_to_expand
*)
2006 opt_info
->insns_with_var_to_expand
->find (&ve_templ
);
2009 gcc_assert (GET_CODE (PATTERN (insn
))
2010 == GET_CODE (PATTERN (orig_insn
)));
2011 expand_var_during_unrolling (ves
, insn
);
2014 orig_insn
= NEXT_INSN (orig_insn
);
2018 if (!rewrite_original_loop
)
2021 /* Initialize the variable expansions in the loop preheader
2022 and take care of combining them at the loop exit. */
2023 if (opt_info
->insns_with_var_to_expand
)
2025 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2026 insert_var_expansion_initialization (ves
, opt_info
->loop_preheader
);
2027 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2028 combine_var_copies_in_loop_exit (ves
, opt_info
->loop_exit
);
2031 /* Rewrite also the original loop body. Find them as originals of the blocks
2032 in the last copied iteration, i.e. those that have
2033 get_bb_copy (get_bb_original (bb)) == bb. */
2034 for (i
= opt_info
->first_new_block
;
2035 i
< (unsigned) last_basic_block_for_fn (cfun
);
2038 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2039 orig_bb
= get_bb_original (bb
);
2040 if (get_bb_copy (orig_bb
) != bb
)
2043 delta
= determine_split_iv_delta (0, n_copies
, unrolling
);
2044 for (orig_insn
= BB_HEAD (orig_bb
);
2045 orig_insn
!= NEXT_INSN (BB_END (bb
));
2048 next
= NEXT_INSN (orig_insn
);
2050 if (!INSN_P (orig_insn
))
2053 ivts_templ
.insn
= orig_insn
;
2054 if (opt_info
->insns_to_split
)
2056 maybe_strip_eq_note_for_split_iv (opt_info
, orig_insn
);
2058 ivts
= (struct iv_to_split
*)
2059 opt_info
->insns_to_split
->find (&ivts_templ
);
2063 insert_base_initialization (ivts
, orig_insn
);
2064 split_iv (ivts
, orig_insn
, delta
);
2073 /* Release OPT_INFO. */
2076 free_opt_info (struct opt_info
*opt_info
)
2078 delete opt_info
->insns_to_split
;
2079 opt_info
->insns_to_split
= NULL
;
2080 if (opt_info
->insns_with_var_to_expand
)
2082 struct var_to_expand
*ves
;
2084 for (ves
= opt_info
->var_to_expand_head
; ves
; ves
= ves
->next
)
2085 ves
->var_expansions
.release ();
2086 delete opt_info
->insns_with_var_to_expand
;
2087 opt_info
->insns_with_var_to_expand
= NULL
;