1 /* Natural loop analysis code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
31 /* Needed for doloop_condition_get(). */
34 struct unmark_altered_insn_data
;
35 static void unmark_altered (rtx
, rtx
, regset
);
36 static void blocks_invariant_registers (basic_block
*, int, regset
);
37 static void unmark_altered_insn (rtx
, rtx
, struct unmark_altered_insn_data
*);
38 static void blocks_single_set_registers (basic_block
*, int, rtx
*);
39 static int invariant_rtx_wrto_regs_p_helper (rtx
*, regset
);
40 static bool invariant_rtx_wrto_regs_p (rtx
, regset
);
41 static rtx
test_for_iteration (struct loop_desc
*desc
, unsigned HOST_WIDE_INT
);
42 static bool constant_iterations (struct loop_desc
*, unsigned HOST_WIDE_INT
*,
44 static bool simple_loop_exit_p (struct loop
*, edge
, regset
,
45 rtx
*, struct loop_desc
*);
46 static rtx
variable_initial_value (rtx
, regset
, rtx
, rtx
*, enum machine_mode
);
47 static rtx
variable_initial_values (edge
, rtx
, enum machine_mode
);
48 static bool simple_condition_p (struct loop
*, rtx
, regset
,
50 static basic_block
simple_increment (struct loop
*, rtx
*, struct loop_desc
*);
51 static rtx
count_strange_loop_iterations (rtx
, rtx
, enum rtx_code
,
52 int, rtx
, enum machine_mode
,
54 static unsigned HOST_WIDEST_INT
inverse (unsigned HOST_WIDEST_INT
, int);
55 static bool fits_in_mode_p (enum machine_mode mode
, rtx expr
);
57 /* Computes inverse to X modulo (1 << MOD). */
58 static unsigned HOST_WIDEST_INT
59 inverse (unsigned HOST_WIDEST_INT x
, int mod
)
61 unsigned HOST_WIDEST_INT mask
=
62 ((unsigned HOST_WIDEST_INT
) 1 << (mod
- 1) << 1) - 1;
63 unsigned HOST_WIDEST_INT rslt
= 1;
66 for (i
= 0; i
< mod
- 1; i
++)
68 rslt
= (rslt
* x
) & mask
;
75 /* Checks whether BB is executed exactly once in each LOOP iteration. */
77 just_once_each_iteration_p (struct loop
*loop
, basic_block bb
)
79 /* It must be executed at least once each iteration. */
80 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
))
84 if (bb
->loop_father
!= loop
)
87 /* But this was not enough. We might have some irreducible loop here. */
88 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
95 /* Unmarks modified registers; helper to blocks_invariant_registers. */
97 unmark_altered (rtx what
, rtx by ATTRIBUTE_UNUSED
, regset regs
)
99 if (GET_CODE (what
) == SUBREG
)
100 what
= SUBREG_REG (what
);
103 CLEAR_REGNO_REG_SET (regs
, REGNO (what
));
106 /* Marks registers that are invariant inside blocks BBS. */
108 blocks_invariant_registers (basic_block
*bbs
, int nbbs
, regset regs
)
113 for (i
= 0; i
< max_reg_num (); i
++)
114 SET_REGNO_REG_SET (regs
, i
);
115 for (i
= 0; i
< nbbs
; i
++)
116 for (insn
= BB_HEAD (bbs
[i
]);
117 insn
!= NEXT_INSN (BB_END (bbs
[i
]));
118 insn
= NEXT_INSN (insn
))
120 note_stores (PATTERN (insn
),
121 (void (*) (rtx
, rtx
, void *)) unmark_altered
,
125 /* Unmarks modified registers; helper to blocks_single_set_registers. */
126 struct unmark_altered_insn_data
133 unmark_altered_insn (rtx what
, rtx by ATTRIBUTE_UNUSED
,
134 struct unmark_altered_insn_data
*data
)
138 if (GET_CODE (what
) == SUBREG
)
139 what
= SUBREG_REG (what
);
143 if (data
->regs
[rn
] == data
->insn
)
145 data
->regs
[rn
] = NULL
;
148 /* Marks registers that have just single simple set in BBS; the relevant
149 insn is returned in REGS. */
151 blocks_single_set_registers (basic_block
*bbs
, int nbbs
, rtx
*regs
)
155 struct unmark_altered_insn_data data
;
157 for (i
= 0; i
< max_reg_num (); i
++)
160 for (i
= 0; i
< nbbs
; i
++)
161 for (insn
= BB_HEAD (bbs
[i
]);
162 insn
!= NEXT_INSN (BB_END (bbs
[i
]));
163 insn
= NEXT_INSN (insn
))
165 rtx set
= single_set (insn
);
167 if (!set
&& is_bct_cond (insn
))
168 set
= get_var_set_from_bct(insn
);
172 if (!REG_P (SET_DEST (set
)))
174 regs
[REGNO (SET_DEST (set
))] = insn
;
178 for (i
= 0; i
< nbbs
; i
++)
179 for (insn
= BB_HEAD (bbs
[i
]);
180 insn
!= NEXT_INSN (BB_END (bbs
[i
]));
181 insn
= NEXT_INSN (insn
))
186 note_stores (PATTERN (insn
),
187 (void (*) (rtx
, rtx
, void *)) unmark_altered_insn
,
192 /* Helper for invariant_rtx_wrto_regs_p. */
194 invariant_rtx_wrto_regs_p_helper (rtx
*expr
, regset invariant_regs
)
196 switch (GET_CODE (*expr
))
200 case UNSPEC_VOLATILE
:
211 return MEM_VOLATILE_P (*expr
);
214 /* If the memory is not constant, assume it is modified. If it is
215 constant, we still have to check the address. */
216 return !RTX_UNCHANGING_P (*expr
);
219 return !REGNO_REG_SET_P (invariant_regs
, REGNO (*expr
));
226 /* Checks that EXPR is invariant provided that INVARIANT_REGS are invariant. */
228 invariant_rtx_wrto_regs_p (rtx expr
, regset invariant_regs
)
230 return !for_each_rtx (&expr
, (rtx_function
) invariant_rtx_wrto_regs_p_helper
,
234 /* Checks whether CONDITION is a simple comparison in that one of operands
235 is register and the other one is invariant in the LOOP. Fills var, lim
236 and cond fields in DESC. */
238 simple_condition_p (struct loop
*loop ATTRIBUTE_UNUSED
, rtx condition
,
239 regset invariant_regs
, struct loop_desc
*desc
)
243 /* Check condition. */
244 switch (GET_CODE (condition
))
261 /* Of integers or pointers. */
262 if (GET_MODE_CLASS (GET_MODE (XEXP (condition
, 0))) != MODE_INT
263 && GET_MODE_CLASS (GET_MODE (XEXP (condition
, 0))) != MODE_PARTIAL_INT
)
266 /* One of operands must be a simple register. */
267 op0
= XEXP (condition
, 0);
268 op1
= XEXP (condition
, 1);
270 /* One of operands must be invariant. */
271 if (invariant_rtx_wrto_regs_p (op0
, invariant_regs
))
273 /* And the other one must be a register. */
279 desc
->cond
= swap_condition (GET_CODE (condition
));
280 if (desc
->cond
== UNKNOWN
)
285 /* Check the other operand. */
286 if (!invariant_rtx_wrto_regs_p (op1
, invariant_regs
))
294 desc
->cond
= GET_CODE (condition
);
299 /* Checks whether DESC->var is incremented/decremented exactly once each
300 iteration. Fills in DESC->stride and returns block in that DESC->var is
303 simple_increment (struct loop
*loop
, rtx
*simple_increment_regs
,
304 struct loop_desc
*desc
)
306 rtx mod_insn
, mod_insn1
, set
, set_src
, set_add
;
307 basic_block mod_bb
, mod_bb1
;
309 /* Find insn that modifies var. */
310 mod_insn
= simple_increment_regs
[REGNO (desc
->var
)];
313 mod_bb
= BLOCK_FOR_INSN (mod_insn
);
315 /* Check that it is executed exactly once each iteration. */
316 if (!just_once_each_iteration_p (loop
, mod_bb
))
319 /* mod_insn must be a simple increment/decrement. */
320 set
= single_set (mod_insn
);
322 if (!set
&& is_bct_cond (mod_insn
))
323 set
= get_var_set_from_bct(mod_insn
);
327 if (!rtx_equal_p (SET_DEST (set
), desc
->var
))
330 set_src
= find_reg_equal_equiv_note (mod_insn
);
332 set_src
= SET_SRC (set
);
334 /* Check for variables that iterate in narrower mode. */
335 if (GET_CODE (set_src
) == SIGN_EXTEND
336 || GET_CODE (set_src
) == ZERO_EXTEND
)
338 /* If we are sign extending variable that is then compared unsigned
339 or vice versa, there is something weird happening. */
342 && ((desc
->cond
== LEU
345 || desc
->cond
== GTU
)
346 ^ (GET_CODE (set_src
) == ZERO_EXTEND
)))
349 if (GET_CODE (XEXP (set_src
, 0)) != SUBREG
350 || SUBREG_BYTE (XEXP (set_src
, 0)) != 0
351 || GET_MODE (SUBREG_REG (XEXP (set_src
, 0))) != GET_MODE (desc
->var
))
354 desc
->inner_mode
= GET_MODE (XEXP (set_src
, 0));
355 desc
->extend
= GET_CODE (set_src
);
356 set_src
= SUBREG_REG (XEXP (set_src
, 0));
358 if (GET_CODE (set_src
) != REG
)
361 /* Find where the reg is set. */
362 mod_insn1
= simple_increment_regs
[REGNO (set_src
)];
366 mod_bb1
= BLOCK_FOR_INSN (mod_insn1
);
367 if (!dominated_by_p (CDI_DOMINATORS
, mod_bb
, mod_bb1
))
369 if (mod_bb1
== mod_bb
)
372 mod_insn
!= PREV_INSN (BB_HEAD (mod_bb
));
373 mod_insn
= PREV_INSN (mod_insn
))
374 if (mod_insn
== mod_insn1
)
377 if (mod_insn
== PREV_INSN (BB_HEAD (mod_bb
)))
381 /* Replace the source with the possible place of increment. */
382 set
= single_set (mod_insn1
);
385 if (!rtx_equal_p (SET_DEST (set
), set_src
))
388 set_src
= find_reg_equal_equiv_note (mod_insn1
);
390 set_src
= SET_SRC (set
);
394 desc
->inner_mode
= GET_MODE (desc
->var
);
398 if (GET_CODE (set_src
) != PLUS
)
400 if (!rtx_equal_p (XEXP (set_src
, 0), desc
->var
))
403 /* Set desc->stride. */
404 set_add
= XEXP (set_src
, 1);
405 if (CONSTANT_P (set_add
))
406 desc
->stride
= set_add
;
413 /* Tries to find initial value of VAR in INSN. This value must be invariant
414 wrto INVARIANT_REGS. If SET_INSN is not NULL, insn in that var is set is
415 placed here. INNER_MODE is mode in that induction variable VAR iterates. */
417 variable_initial_value (rtx insn
, regset invariant_regs
,
418 rtx var
, rtx
*set_insn
, enum machine_mode inner_mode
)
424 /* Go back through cfg. */
425 bb
= BLOCK_FOR_INSN (insn
);
428 for (; insn
!= BB_HEAD (bb
); insn
= PREV_INSN (insn
))
431 note_stores (PATTERN (insn
),
432 (void (*) (rtx
, rtx
, void *)) unmark_altered
,
434 if (modified_between_p (var
, PREV_INSN (insn
), NEXT_INSN (insn
)))
438 if (insn
!= BB_HEAD (bb
))
440 /* We found place where var is set. */
445 set
= single_set (insn
);
448 set_dest
= SET_DEST (set
);
449 if (!rtx_equal_p (set_dest
, var
))
452 note
= find_reg_equal_equiv_note (insn
);
453 if (note
&& GET_CODE (XEXP (note
, 0)) != EXPR_LIST
)
454 val
= XEXP (note
, 0);
458 /* If we know that the initial value is indeed in range of
459 the inner mode, record the fact even in case the value itself
461 if ((GET_CODE (val
) == SIGN_EXTEND
462 || GET_CODE (val
) == ZERO_EXTEND
)
463 && GET_MODE (XEXP (val
, 0)) == inner_mode
)
464 ret
= gen_rtx_fmt_e (GET_CODE (val
),
466 gen_rtx_fmt_ei (SUBREG
,
470 if (!invariant_rtx_wrto_regs_p (val
, invariant_regs
))
479 if (bb
->pred
->pred_next
|| bb
->pred
->src
== ENTRY_BLOCK_PTR
)
489 /* Returns list of definitions of initial value of VAR at edge E. INNER_MODE
490 is mode in that induction variable VAR really iterates. */
492 variable_initial_values (edge e
, rtx var
, enum machine_mode inner_mode
)
495 regset invariant_regs
;
496 regset_head invariant_regs_head
;
499 invariant_regs
= INITIALIZE_REG_SET (invariant_regs_head
);
500 for (i
= 0; i
< max_reg_num (); i
++)
501 SET_REGNO_REG_SET (invariant_regs
, i
);
503 list
= alloc_EXPR_LIST (0, copy_rtx (var
), NULL
);
505 if (e
->src
== ENTRY_BLOCK_PTR
)
508 set_insn
= BB_END (e
->src
);
510 && (var
= variable_initial_value (set_insn
, invariant_regs
, var
,
511 &set_insn
, inner_mode
)))
512 list
= alloc_EXPR_LIST (0, copy_rtx (var
), list
);
514 FREE_REG_SET (invariant_regs
);
518 /* Counts constant number of iterations of the loop described by DESC;
519 returns false if impossible. */
521 constant_iterations (struct loop_desc
*desc
, unsigned HOST_WIDE_INT
*niter
,
527 test
= test_for_iteration (desc
, 0);
528 if (test
== const0_rtx
)
531 *may_be_zero
= false;
535 *may_be_zero
= (test
!= const_true_rtx
);
537 /* It would make a little sense to check every with every when we
538 know that all but the first alternative are simply registers. */
539 for (ainit
= desc
->var_alts
; ainit
; ainit
= XEXP (ainit
, 1))
541 alim
= XEXP (desc
->lim_alts
, 0);
542 if (!(expr
= count_loop_iterations (desc
, XEXP (ainit
, 0), alim
)))
544 if (GET_CODE (expr
) == CONST_INT
)
546 *niter
= INTVAL (expr
);
550 for (alim
= XEXP (desc
->lim_alts
, 1); alim
; alim
= XEXP (alim
, 1))
552 ainit
= XEXP (desc
->var_alts
, 0);
553 if (!(expr
= count_loop_iterations (desc
, ainit
, XEXP (alim
, 0))))
555 if (GET_CODE (expr
) == CONST_INT
)
557 *niter
= INTVAL (expr
);
565 /* Attempts to determine a number of iterations of a "strange" loop.
566 Its induction variable starts with value INIT, is compared by COND
567 with LIM. If POSTINCR, it is incremented after the test. It is incremented
568 by STRIDE each iteration, has mode MODE but iterates in INNER_MODE.
570 By "strange" we mean loops where induction variable increases in the wrong
571 direction wrto comparison, i.e. for (i = 6; i > 5; i++). */
573 count_strange_loop_iterations (rtx init
, rtx lim
, enum rtx_code cond
,
574 int postincr
, rtx stride
, enum machine_mode mode
,
575 enum machine_mode inner_mode
)
577 rtx rqmt
, n_to_wrap
, before_wrap
, after_wrap
;
578 rtx mode_min
, mode_max
;
581 /* This could be handled, but it is not important enough to lose time with
583 if (mode
!= inner_mode
)
587 init
= simplify_gen_binary (PLUS
, mode
, init
, stride
);
589 /* If we are able to prove that we don't pass the first test, we are
591 rqmt
= simplify_relational_operation (cond
, mode
, init
, lim
);
592 if (rqmt
== const0_rtx
)
595 /* And if we don't know we pass it, the things are too complicated for us. */
596 if (rqmt
!= const_true_rtx
)
605 size
= GET_MODE_BITSIZE (mode
);
606 mode_min
= gen_int_mode (-((unsigned HOST_WIDEST_INT
) 1 << (size
- 1)),
608 mode_max
= gen_int_mode (((unsigned HOST_WIDEST_INT
) 1 << (size
- 1)) - 1,
618 mode_min
= const0_rtx
;
619 mode_max
= simplify_gen_binary (MINUS
, mode
, const0_rtx
, const1_rtx
);
629 /* This iterates once, as init == lim. */
632 /* The behavior is undefined in signed cases. Never mind, we still
633 try to behave sanely. */
638 if (INTVAL (stride
) <= 0)
640 n_to_wrap
= simplify_gen_binary (MINUS
, mode
, mode_max
, copy_rtx (init
));
641 n_to_wrap
= simplify_gen_binary (UDIV
, mode
, n_to_wrap
, stride
);
642 before_wrap
= simplify_gen_binary (MULT
, mode
,
643 copy_rtx (n_to_wrap
), stride
);
644 before_wrap
= simplify_gen_binary (PLUS
, mode
,
645 before_wrap
, copy_rtx (init
));
646 after_wrap
= simplify_gen_binary (PLUS
, mode
,
647 before_wrap
, stride
);
648 if (GET_CODE (after_wrap
) != CONST_INT
)
650 after_wrap
= simplify_gen_binary (PLUS
, mode
, mode_min
, stride
);
651 after_wrap
= simplify_gen_binary (MINUS
, mode
, after_wrap
, const1_rtx
);
659 if (INTVAL (stride
) >= 0)
661 stride
= simplify_gen_unary (NEG
, mode
, stride
, mode
);
662 n_to_wrap
= simplify_gen_binary (MINUS
, mode
, copy_rtx (init
), mode_min
);
663 n_to_wrap
= simplify_gen_binary (UDIV
, mode
, n_to_wrap
, stride
);
664 before_wrap
= simplify_gen_binary (MULT
, mode
,
665 copy_rtx (n_to_wrap
), stride
);
666 before_wrap
= simplify_gen_binary (MINUS
, mode
,
667 copy_rtx (init
), before_wrap
);
668 after_wrap
= simplify_gen_binary (MINUS
, mode
,
669 before_wrap
, stride
);
670 if (GET_CODE (after_wrap
) != CONST_INT
)
672 after_wrap
= simplify_gen_binary (MINUS
, mode
, mode_max
, stride
);
673 after_wrap
= simplify_gen_binary (PLUS
, mode
, after_wrap
, const1_rtx
);
680 /* If this is const_true_rtx and we did not take a conservative approximation
681 of after_wrap above, we might iterate the calculation (but of course we
682 would have to take care about infinite cases). Ignore this for now. */
683 rqmt
= simplify_relational_operation (cond
, mode
, after_wrap
, lim
);
684 if (rqmt
!= const0_rtx
)
687 return simplify_gen_binary (PLUS
, mode
, n_to_wrap
, const1_rtx
);
690 /* Checks whether value of EXPR fits into range of MODE. */
692 fits_in_mode_p (enum machine_mode mode
, rtx expr
)
694 unsigned HOST_WIDEST_INT val
;
697 if (GET_CODE (expr
) == CONST_INT
)
699 for (val
= INTVAL (expr
); val
; val
>>= 1)
702 return n_bits
<= GET_MODE_BITSIZE (mode
);
705 if (GET_CODE (expr
) == SIGN_EXTEND
706 || GET_CODE (expr
) == ZERO_EXTEND
)
707 return GET_MODE (XEXP (expr
, 0)) == mode
;
712 /* Return RTX expression representing number of iterations of loop as bounded
713 by test described by DESC (in the case loop really has multiple exit
714 edges, fewer iterations may happen in the practice).
716 Return NULL if it is unknown. Additionally the value may be invalid for
717 paradoxical loop (lets define paradoxical loops as loops whose test is
718 failing at -1th iteration, for instance "for (i=5;i<1;i++);").
720 These cases needs to be either cared by copying the loop test in the front
721 of loop or keeping the test in first iteration of loop.
723 When INIT/LIM are set, they are used instead of var/lim of DESC. */
725 count_loop_iterations (struct loop_desc
*desc
, rtx init
, rtx lim
)
727 enum rtx_code cond
= desc
->cond
;
728 rtx stride
= desc
->stride
;
729 rtx mod
, exp
, ainit
, bound
;
730 rtx overflow_check
, mx
, mxp
;
731 enum machine_mode mode
= GET_MODE (desc
->var
);
732 unsigned HOST_WIDEST_INT s
, size
, d
;
734 /* Give up on floating point modes and friends. It can be possible to do
735 the job for constant loop bounds, but it is probably not worthwhile. */
736 if (!INTEGRAL_MODE_P (mode
))
739 init
= copy_rtx (init
? init
: desc
->var
);
740 lim
= copy_rtx (lim
? lim
: desc
->lim
);
742 /* Ensure that we always handle the condition to stay inside loop. */
744 cond
= reverse_condition (cond
);
746 if (desc
->inner_mode
!= mode
)
748 /* We have a case when the variable in fact iterates in the narrower
749 mode. This has following consequences:
751 For induction variable itself, if !desc->postincr, it does not mean
752 anything too special, since we know the variable is already in range
753 of the inner mode when we compare it (so it is just needed to shorten
754 it into the mode before calculations are done, so that we don't risk
755 wrong results). More complicated case is when desc->postincr; then
756 the first two iterations are special (the first one because the value
757 may be out of range, the second one because after shortening it to the
758 range it may have absolutely any value), and we do not handle this in
759 unrolling. So if we aren't able to prove that the initial value is in
760 the range, we fail in this case.
762 Step is just moduled to fit into inner mode.
764 If lim is out of range, then either the loop is infinite (and then
765 we may unroll however we like to), or exits in the first iteration
766 (this is also ok, since we handle it specially for this case anyway).
767 So we may safely assume that it fits into the inner mode. */
769 for (ainit
= desc
->var_alts
; ainit
; ainit
= XEXP (ainit
, 1))
770 if (fits_in_mode_p (desc
->inner_mode
, XEXP (ainit
, 0)))
778 init
= simplify_gen_unary (desc
->extend
,
780 simplify_gen_subreg (desc
->inner_mode
,
787 stride
= simplify_gen_subreg (desc
->inner_mode
, stride
, mode
, 0);
788 if (stride
== const0_rtx
)
792 /* Prepare condition to verify that we do not risk overflow. */
793 if (stride
== const1_rtx
794 || stride
== constm1_rtx
798 /* Overflow at NE conditions does not occur. EQ condition
799 is weird and is handled in count_strange_loop_iterations.
800 If stride is 1, overflow may occur only for <= and >= conditions,
801 and then they are infinite, so it does not bother us. */
802 overflow_check
= const0_rtx
;
806 if (cond
== LT
|| cond
== LTU
)
807 mx
= simplify_gen_binary (MINUS
, mode
, lim
, const1_rtx
);
808 else if (cond
== GT
|| cond
== GTU
)
809 mx
= simplify_gen_binary (PLUS
, mode
, lim
, const1_rtx
);
812 if (mode
!= desc
->inner_mode
)
813 mxp
= simplify_gen_subreg (desc
->inner_mode
, mx
, mode
, 0);
816 mxp
= simplify_gen_binary (PLUS
, desc
->inner_mode
, mxp
, stride
);
817 if (mode
!= desc
->inner_mode
)
818 mxp
= simplify_gen_unary (desc
->extend
, mode
, mxp
, desc
->inner_mode
);
819 overflow_check
= simplify_gen_relational (cond
, SImode
, mode
, mx
, mxp
);
822 /* Compute absolute value of the difference of initial and final value. */
823 if (INTVAL (stride
) > 0)
825 /* Handle strange tests specially. */
826 if (cond
== EQ
|| cond
== GE
|| cond
== GT
|| cond
== GEU
828 return count_strange_loop_iterations (init
, lim
, cond
, desc
->postincr
,
829 stride
, mode
, desc
->inner_mode
);
830 exp
= simplify_gen_binary (MINUS
, mode
, lim
, init
);
834 if (cond
== EQ
|| cond
== LE
|| cond
== LT
|| cond
== LEU
836 return count_strange_loop_iterations (init
, lim
, cond
, desc
->postincr
,
837 stride
, mode
, desc
->inner_mode
);
838 exp
= simplify_gen_binary (MINUS
, mode
, init
, lim
);
839 stride
= simplify_gen_unary (NEG
, mode
, stride
, mode
);
842 /* If there is a risk of overflow (i.e. when we increment value satisfying
843 a condition, we may again obtain a value satisfying the condition),
845 if (overflow_check
!= const0_rtx
)
848 /* Normalize difference so the value is always first examined
849 and later incremented. Do not do this for a loop ending with a branch
850 and count register. */
851 if (!is_bct_cond (BB_END (desc
->out_edge
->src
)) && (!desc
->postincr
))
852 exp
= simplify_gen_binary (MINUS
, mode
, exp
, stride
);
854 /* Determine delta caused by exit condition. */
858 /* NE tests are easy to handle, because we just perform simple
859 arithmetics modulo power of 2. Let's use the fact to compute the
860 number of iterations exactly. We are now in situation when we want to
861 solve an equation stride * i = c (mod size of inner_mode).
862 Let nsd (stride, size of mode) = d. If d does not divide c, the
863 loop is infinite. Otherwise, the number of iterations is
864 (inverse(s/d) * (c/d)) mod (size of mode/d). */
865 size
= GET_MODE_BITSIZE (desc
->inner_mode
);
874 bound
= gen_int_mode (((unsigned HOST_WIDEST_INT
) 1 << (size
- 1 ) << 1) - 1,
876 exp
= simplify_gen_binary (UDIV
, mode
, exp
, gen_int_mode (d
, mode
));
877 exp
= simplify_gen_binary (MULT
, mode
,
878 exp
, gen_int_mode (inverse (s
, size
), mode
));
879 exp
= simplify_gen_binary (AND
, mode
, exp
, bound
);
891 exp
= simplify_gen_binary (PLUS
, mode
, exp
, const1_rtx
);
897 if (cond
!= NE
&& stride
!= const1_rtx
)
899 /* Number of iterations is now (EXP + STRIDE - 1 / STRIDE),
900 but we need to take care for overflows. */
902 mod
= simplify_gen_binary (UMOD
, mode
, exp
, stride
);
904 /* This is dirty trick. When we can't compute number of iterations
905 to be constant, we simply ignore the possible overflow, as
906 runtime unroller always use power of 2 amounts and does not
907 care about possible lost bits. */
909 if (GET_CODE (mod
) != CONST_INT
)
911 rtx stridem1
= simplify_gen_binary (PLUS
, mode
, stride
, constm1_rtx
);
912 exp
= simplify_gen_binary (PLUS
, mode
, exp
, stridem1
);
913 exp
= simplify_gen_binary (UDIV
, mode
, exp
, stride
);
917 exp
= simplify_gen_binary (UDIV
, mode
, exp
, stride
);
918 if (mod
!= const0_rtx
)
919 exp
= simplify_gen_binary (PLUS
, mode
, exp
, const1_rtx
);
925 fprintf (rtl_dump_file
, "; Number of iterations: ");
926 print_simple_rtl (rtl_dump_file
, exp
);
927 fprintf (rtl_dump_file
, "\n");
933 /* Return simplified RTX expression representing the value of test
934 described of DESC at given iteration of loop. */
937 test_for_iteration (struct loop_desc
*desc
, unsigned HOST_WIDE_INT iter
)
939 enum rtx_code cond
= desc
->cond
;
940 rtx exp
= XEXP (desc
->var_alts
, 0);
943 /* Give up on floating point modes and friends. It can be possible to do
944 the job for constant loop bounds, but it is probably not worthwhile. */
945 if (!INTEGRAL_MODE_P (GET_MODE (desc
->var
)))
948 /* Ensure that we always handle the condition to stay inside loop. */
950 cond
= reverse_condition (cond
);
952 /* Compute the value of induction variable. */
953 addval
= simplify_gen_binary (MULT
, GET_MODE (desc
->var
),
955 gen_int_mode (desc
->postincr
957 GET_MODE (desc
->var
)));
958 exp
= simplify_gen_binary (PLUS
, GET_MODE (desc
->var
), exp
, addval
);
959 /* Test at given condition. */
960 exp
= simplify_gen_relational (cond
, SImode
,
961 GET_MODE (desc
->var
), exp
, desc
->lim
);
965 fprintf (rtl_dump_file
, "; Conditional to continue loop at "
966 HOST_WIDE_INT_PRINT_UNSIGNED
"th iteration: ", iter
);
967 print_simple_rtl (rtl_dump_file
, exp
);
968 fprintf (rtl_dump_file
, "\n");
974 /* Tests whether exit at EXIT_EDGE from LOOP is simple. Returns simple loop
975 description joined to it in in DESC. INVARIANT_REGS and SINGLE_SET_REGS
976 are results of blocks_{invariant,single_set}_regs over BODY. */
978 simple_loop_exit_p (struct loop
*loop
, edge exit_edge
,
979 regset invariant_regs
, rtx
*single_set_regs
,
980 struct loop_desc
*desc
)
982 basic_block mod_bb
, exit_bb
;
987 exit_bb
= exit_edge
->src
;
989 fallthru_out
= (exit_edge
->flags
& EDGE_FALLTHRU
);
994 /* It must be tested (at least) once during any iteration. */
995 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, exit_bb
))
998 /* It must end in a simple conditional jump. */
999 if (!any_condjump_p (BB_END (exit_bb
)))
1003 if (ei
== exit_edge
)
1006 desc
->out_edge
= exit_edge
;
1009 /* Condition must be a simple comparison in that one of operands
1010 is register and the other one is invariant. */
1011 if (!(condition
= get_condition (BB_END (exit_bb
), NULL
, false)))
1014 if (!simple_condition_p (loop
, condition
, invariant_regs
, desc
))
1017 /* Var must be simply incremented or decremented in exactly one insn that
1018 is executed just once every iteration. */
1019 if (!(mod_bb
= simple_increment (loop
, single_set_regs
, desc
)))
1022 /* OK, it is simple loop. Now just fill in remaining info. */
1023 desc
->postincr
= !dominated_by_p (CDI_DOMINATORS
, exit_bb
, mod_bb
);
1024 desc
->neg
= !fallthru_out
;
1026 /* Find initial value of var and alternative values for lim. */
1027 e
= loop_preheader_edge (loop
);
1028 desc
->var_alts
= variable_initial_values (e
, desc
->var
, desc
->inner_mode
);
1029 desc
->lim_alts
= variable_initial_values (e
, desc
->lim
, desc
->inner_mode
);
1031 /* Number of iterations. */
1033 constant_iterations (desc
, &desc
->niter
, &desc
->may_be_zero
);
1034 if (!desc
->const_iter
&& !count_loop_iterations (desc
, NULL
, NULL
))
1039 /* Tests whether LOOP is simple for loop. Returns simple loop description
1042 simple_loop_p (struct loop
*loop
, struct loop_desc
*desc
)
1047 struct loop_desc act
;
1049 regset invariant_regs
;
1050 regset_head invariant_regs_head
;
1051 rtx
*single_set_regs
;
1054 body
= get_loop_body (loop
);
1056 invariant_regs
= INITIALIZE_REG_SET (invariant_regs_head
);
1057 single_set_regs
= xmalloc (max_reg_num () * sizeof (rtx
));
1059 blocks_invariant_registers (body
, loop
->num_nodes
, invariant_regs
);
1060 blocks_single_set_registers (body
, loop
->num_nodes
, single_set_regs
);
1063 for (i
= 0; i
< loop
->num_nodes
; i
++)
1065 for (e
= body
[i
]->succ
; e
; e
= e
->succ_next
)
1066 if (!flow_bb_inside_loop_p (loop
, e
->dest
)
1067 && simple_loop_exit_p (loop
, e
,
1068 invariant_regs
, single_set_regs
, &act
))
1070 /* Prefer constant iterations; the less the better. */
1073 else if (!act
.const_iter
1074 || (desc
->const_iter
&& act
.niter
>= desc
->niter
))
1079 if (body
[i
]->succ
&& body
[i
]->succ
->succ_next
)
1082 desc
->n_branches
= n_branches
;
1084 if (rtl_dump_file
&& any
)
1086 fprintf (rtl_dump_file
, "; Simple loop %i\n", loop
->num
);
1088 fprintf (rtl_dump_file
,
1089 "; does postincrement after loop exit condition\n");
1091 fprintf (rtl_dump_file
, "; Induction variable:");
1092 print_simple_rtl (rtl_dump_file
, desc
->var
);
1093 fputc ('\n', rtl_dump_file
);
1095 fprintf (rtl_dump_file
, "; Initial values:");
1096 print_simple_rtl (rtl_dump_file
, desc
->var_alts
);
1097 fputc ('\n', rtl_dump_file
);
1099 fprintf (rtl_dump_file
, "; Stride:");
1100 print_simple_rtl (rtl_dump_file
, desc
->stride
);
1101 fputc ('\n', rtl_dump_file
);
1103 fprintf (rtl_dump_file
, "; Compared with:");
1104 print_simple_rtl (rtl_dump_file
, desc
->lim
);
1105 fputc ('\n', rtl_dump_file
);
1107 fprintf (rtl_dump_file
, "; Alternative values:");
1108 print_simple_rtl (rtl_dump_file
, desc
->lim_alts
);
1109 fputc ('\n', rtl_dump_file
);
1111 fprintf (rtl_dump_file
, "; Exit condition:");
1113 fprintf (rtl_dump_file
, "(negated)");
1114 fprintf (rtl_dump_file
, "%s\n", GET_RTX_NAME (desc
->cond
));
1116 fprintf (rtl_dump_file
, "; Number of branches:");
1117 fprintf (rtl_dump_file
, "%d\n", desc
->n_branches
);
1119 fputc ('\n', rtl_dump_file
);
1123 FREE_REG_SET (invariant_regs
);
1124 free (single_set_regs
);
1128 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
1129 throw away all latch edges and mark blocks inside any remaining cycle.
1130 Everything is a bit complicated due to fact we do not want to do this
1131 for parts of cycles that only "pass" through some loop -- i.e. for
1132 each cycle, we want to mark blocks that belong directly to innermost
1133 loop containing the whole cycle. */
1135 mark_irreducible_loops (struct loops
*loops
)
1137 int *dfs_in
, *closed
, *mr
, *mri
, *n_edges
, *stack
;
1142 int stack_top
, tick
, depth
;
1145 /* Reset the flags. */
1146 FOR_BB_BETWEEN (act
, ENTRY_BLOCK_PTR
, EXIT_BLOCK_PTR
, next_bb
)
1148 act
->flags
&= ~BB_IRREDUCIBLE_LOOP
;
1149 for (e
= act
->succ
; e
; e
= e
->succ_next
)
1150 e
->flags
&= ~EDGE_IRREDUCIBLE_LOOP
;
1153 /* The first last_basic_block + 1 entries are for real blocks (including
1154 entry); then we have loops->num - 1 fake blocks for loops to that we
1155 assign edges leading from loops (fake loop 0 is not interesting). */
1156 dfs_in
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (int));
1157 closed
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (int));
1158 mr
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (int));
1159 mri
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (int));
1160 n_edges
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (int));
1161 edges
= xmalloc ((last_basic_block
+ loops
->num
) * sizeof (edge
*));
1162 stack
= xmalloc ((n_basic_blocks
+ loops
->num
) * sizeof (int));
1163 estack
= xmalloc ((n_basic_blocks
+ loops
->num
) * sizeof (edge
));
1165 /* Create the edge lists. */
1166 for (i
= 0; i
< last_basic_block
+ loops
->num
; i
++)
1168 FOR_BB_BETWEEN (act
, ENTRY_BLOCK_PTR
, EXIT_BLOCK_PTR
, next_bb
)
1169 for (e
= act
->succ
; e
; e
= e
->succ_next
)
1171 /* Ignore edges to exit. */
1172 if (e
->dest
== EXIT_BLOCK_PTR
)
1174 /* And latch edges. */
1175 if (e
->dest
->loop_father
->header
== e
->dest
1176 && e
->dest
->loop_father
->latch
== act
)
1178 /* Edges inside a single loop should be left where they are. Edges
1179 to subloop headers should lead to representative of the subloop,
1180 but from the same place. */
1181 if (act
->loop_father
== e
->dest
->loop_father
1182 || act
->loop_father
== e
->dest
->loop_father
->outer
)
1184 n_edges
[act
->index
+ 1]++;
1187 /* Edges exiting loops remain. They should lead from representative
1188 of the son of nearest common ancestor of the loops in that
1190 depth
= find_common_loop (act
->loop_father
, e
->dest
->loop_father
)->depth
+ 1;
1191 if (depth
== act
->loop_father
->depth
)
1192 cloop
= act
->loop_father
;
1194 cloop
= act
->loop_father
->pred
[depth
];
1195 n_edges
[cloop
->num
+ last_basic_block
]++;
1198 for (i
= 0; i
< last_basic_block
+ loops
->num
; i
++)
1200 edges
[i
] = xmalloc (n_edges
[i
] * sizeof (edge
));
1204 FOR_BB_BETWEEN (act
, ENTRY_BLOCK_PTR
, EXIT_BLOCK_PTR
, next_bb
)
1205 for (e
= act
->succ
; e
; e
= e
->succ_next
)
1207 if (e
->dest
== EXIT_BLOCK_PTR
)
1209 if (e
->dest
->loop_father
->header
== e
->dest
1210 && e
->dest
->loop_father
->latch
== act
)
1212 if (act
->loop_father
== e
->dest
->loop_father
1213 || act
->loop_father
== e
->dest
->loop_father
->outer
)
1215 edges
[act
->index
+ 1][n_edges
[act
->index
+ 1]++] = e
;
1218 depth
= find_common_loop (act
->loop_father
, e
->dest
->loop_father
)->depth
+ 1;
1219 if (depth
== act
->loop_father
->depth
)
1220 cloop
= act
->loop_father
;
1222 cloop
= act
->loop_father
->pred
[depth
];
1223 i
= cloop
->num
+ last_basic_block
;
1224 edges
[i
][n_edges
[i
]++] = e
;
1227 /* Compute dfs numbering, starting from loop headers, and mark found
1230 for (i
= 0; i
< last_basic_block
+ loops
->num
; i
++)
1234 mr
[i
] = last_basic_block
+ loops
->num
;
1239 for (i
= 0; i
< loops
->num
; i
++)
1240 if (loops
->parray
[i
])
1242 stack
[stack_top
] = loops
->parray
[i
]->header
->index
+ 1;
1243 estack
[stack_top
] = NULL
;
1251 idx
= stack
[stack_top
- 1];
1252 if (dfs_in
[idx
] < 0)
1253 dfs_in
[idx
] = tick
++;
1255 while (n_edges
[idx
])
1257 e
= edges
[idx
][--n_edges
[idx
]];
1258 sidx
= e
->dest
->loop_father
->header
== e
->dest
1259 ? e
->dest
->loop_father
->num
+ last_basic_block
1260 : e
->dest
->index
+ 1;
1263 if (mri
[sidx
] != -1 && !closed
[mri
[sidx
]])
1265 if (mr
[sidx
] < mr
[idx
])
1268 mri
[idx
] = mri
[sidx
];
1271 if (mr
[sidx
] <= dfs_in
[idx
])
1272 e
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1276 if (dfs_in
[sidx
] < 0)
1278 stack
[stack_top
] = sidx
;
1279 estack
[stack_top
] = e
;
1283 if (dfs_in
[sidx
] < mr
[idx
])
1285 mr
[idx
] = dfs_in
[sidx
];
1288 e
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1293 e
= estack
[stack_top
- 1];
1297 /* Propagate information back. */
1298 sidx
= stack
[stack_top
- 1];
1299 if (mr
[sidx
] > mr
[idx
])
1302 mri
[sidx
] = mri
[idx
];
1304 if (mr
[idx
] <= dfs_in
[sidx
])
1305 e
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1307 /* Mark the block if relevant. */
1308 if (idx
&& idx
<= last_basic_block
&& mr
[idx
] <= dfs_in
[idx
])
1309 BASIC_BLOCK (idx
- 1)->flags
|= BB_IRREDUCIBLE_LOOP
;
1319 for (i
= 0; i
< last_basic_block
+ loops
->num
; i
++)
1323 loops
->state
|= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
;
1326 /* Counts number of insns inside LOOP. */
1328 num_loop_insns (struct loop
*loop
)
1330 basic_block
*bbs
, bb
;
1331 unsigned i
, ninsns
= 0;
1334 bbs
= get_loop_body (loop
);
1335 for (i
= 0; i
< loop
->num_nodes
; i
++)
1339 for (insn
= BB_HEAD (bb
); insn
!= BB_END (bb
); insn
= NEXT_INSN (insn
))
1348 /* Counts number of insns executed on average per iteration LOOP. */
1350 average_num_loop_insns (struct loop
*loop
)
1352 basic_block
*bbs
, bb
;
1353 unsigned i
, binsns
, ninsns
, ratio
;
1357 bbs
= get_loop_body (loop
);
1358 for (i
= 0; i
< loop
->num_nodes
; i
++)
1363 for (insn
= BB_HEAD (bb
); insn
!= BB_END (bb
); insn
= NEXT_INSN (insn
))
1367 ratio
= loop
->header
->frequency
== 0
1369 : (bb
->frequency
* BB_FREQ_MAX
) / loop
->header
->frequency
;
1370 ninsns
+= binsns
* ratio
;
1374 ninsns
/= BB_FREQ_MAX
;
1376 ninsns
= 1; /* To avoid division by zero. */
1381 /* Returns expected number of LOOP iterations.
1382 Compute upper bound on number of iterations in case they do not fit integer
1383 to help loop peeling heuristics. Use exact counts if at all possible. */
1385 expected_loop_iterations (const struct loop
*loop
)
1389 if (loop
->header
->count
)
1391 gcov_type count_in
, count_latch
, expected
;
1396 for (e
= loop
->header
->pred
; e
; e
= e
->pred_next
)
1397 if (e
->src
== loop
->latch
)
1398 count_latch
= e
->count
;
1400 count_in
+= e
->count
;
1405 expected
= (count_latch
+ count_in
- 1) / count_in
;
1407 /* Avoid overflows. */
1408 return (expected
> REG_BR_PROB_BASE
? REG_BR_PROB_BASE
: expected
);
1412 int freq_in
, freq_latch
;
1417 for (e
= loop
->header
->pred
; e
; e
= e
->pred_next
)
1418 if (e
->src
== loop
->latch
)
1419 freq_latch
= EDGE_FREQUENCY (e
);
1421 freq_in
+= EDGE_FREQUENCY (e
);
1426 return (freq_latch
+ freq_in
- 1) / freq_in
;
1430 /* This function checks if an instruction is a branch and count instruction
1431 no matter if the flag HAVE_doloop_end is enabled or not. An alternative
1432 would be the modification of doloop_condition_get function itself. */
1434 is_bct_cond (rtx insn
)
1436 if (GET_CODE (insn
) != JUMP_INSN
)
1439 #ifdef HAVE_doloop_end
1440 if (!doloop_condition_get (PATTERN(insn
)))
1449 /* Extract the increment of the count register from the branch and count
1452 get_var_set_from_bct (rtx insn
)
1457 pattern
= PATTERN (insn
);
1459 if (!is_bct_cond (insn
))
1462 set
= XVECEXP (pattern
, 0, 1);
1464 /* IA64 has the decrement conditional, i.e. done only when the loop does not
1465 end. We match (set (x (if_then_else (ne x 0) (plus x -1) x))) here. */
1467 lhs
= XEXP (set
, 0);
1468 rhs
= XEXP (set
, 1);
1469 if (GET_CODE (set
) != IF_THEN_ELSE
)
1472 cond
= XEXP (rhs
, 0);
1473 if (GET_CODE (cond
) != NE
1474 || !rtx_equal_p (XEXP (cond
, 0), lhs
)
1475 || !rtx_equal_p (XEXP (cond
, 1), const0_rtx
))
1478 rhs
= XEXP (rhs
, 1);
1480 return gen_rtx_SET (GET_MODE (lhs
), lhs
, rhs
);