2004-02-11 Eric Christopher <echristo@redhat.com>
[official-gcc.git] / gcc / cfgloopanal.c
blobfb5f8ae6c6b820a3d609499bcc77b00dab3bb202
1 /* Natural loop analysis code for GNU compiler.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "expr.h"
30 #include "output.h"
32 struct unmark_altered_insn_data;
33 static void unmark_altered (rtx, rtx, regset);
34 static void blocks_invariant_registers (basic_block *, int, regset);
35 static void unmark_altered_insn (rtx, rtx, struct unmark_altered_insn_data *);
36 static void blocks_single_set_registers (basic_block *, int, rtx *);
37 static int invariant_rtx_wrto_regs_p_helper (rtx *, regset);
38 static bool invariant_rtx_wrto_regs_p (rtx, regset);
39 static rtx test_for_iteration (struct loop_desc *desc, unsigned HOST_WIDE_INT);
40 static bool constant_iterations (struct loop_desc *, unsigned HOST_WIDE_INT *,
41 bool *);
42 static bool simple_loop_exit_p (struct loop *, edge, regset,
43 rtx *, struct loop_desc *);
44 static rtx variable_initial_value (rtx, regset, rtx, rtx *, enum machine_mode);
45 static rtx variable_initial_values (edge, rtx, enum machine_mode);
46 static bool simple_condition_p (struct loop *, rtx, regset,
47 struct loop_desc *);
48 static basic_block simple_increment (struct loop *, rtx *, struct loop_desc *);
49 static rtx count_strange_loop_iterations (rtx, rtx, enum rtx_code,
50 int, rtx, enum machine_mode,
51 enum machine_mode);
52 static unsigned HOST_WIDEST_INT inverse (unsigned HOST_WIDEST_INT, int);
53 static bool fits_in_mode_p (enum machine_mode mode, rtx expr);
55 /* Computes inverse to X modulo (1 << MOD). */
56 static unsigned HOST_WIDEST_INT
57 inverse (unsigned HOST_WIDEST_INT x, int mod)
59 unsigned HOST_WIDEST_INT mask =
60 ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1;
61 unsigned HOST_WIDEST_INT rslt = 1;
62 int i;
64 for (i = 0; i < mod - 1; i++)
66 rslt = (rslt * x) & mask;
67 x = (x * x) & mask;
70 return rslt;
73 /* Checks whether BB is executed exactly once in each LOOP iteration. */
74 bool
75 just_once_each_iteration_p (struct loop *loop, basic_block bb)
77 /* It must be executed at least once each iteration. */
78 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
79 return false;
81 /* And just once. */
82 if (bb->loop_father != loop)
83 return false;
85 /* But this was not enough. We might have some irreducible loop here. */
86 if (bb->flags & BB_IRREDUCIBLE_LOOP)
87 return false;
89 return true;
93 /* Unmarks modified registers; helper to blocks_invariant_registers. */
94 static void
95 unmark_altered (rtx what, rtx by ATTRIBUTE_UNUSED, regset regs)
97 if (GET_CODE (what) == SUBREG)
98 what = SUBREG_REG (what);
99 if (!REG_P (what))
100 return;
101 CLEAR_REGNO_REG_SET (regs, REGNO (what));
104 /* Marks registers that are invariant inside blocks BBS. */
105 static void
106 blocks_invariant_registers (basic_block *bbs, int nbbs, regset regs)
108 rtx insn;
109 int i;
111 for (i = 0; i < max_reg_num (); i++)
112 SET_REGNO_REG_SET (regs, i);
113 for (i = 0; i < nbbs; i++)
114 for (insn = BB_HEAD (bbs[i]);
115 insn != NEXT_INSN (BB_END (bbs[i]));
116 insn = NEXT_INSN (insn))
117 if (INSN_P (insn))
118 note_stores (PATTERN (insn),
119 (void (*) (rtx, rtx, void *)) unmark_altered,
120 regs);
123 /* Unmarks modified registers; helper to blocks_single_set_registers. */
124 struct unmark_altered_insn_data
126 rtx *regs;
127 rtx insn;
130 static void
131 unmark_altered_insn (rtx what, rtx by ATTRIBUTE_UNUSED,
132 struct unmark_altered_insn_data *data)
134 int rn;
136 if (GET_CODE (what) == SUBREG)
137 what = SUBREG_REG (what);
138 if (!REG_P (what))
139 return;
140 rn = REGNO (what);
141 if (data->regs[rn] == data->insn)
142 return;
143 data->regs[rn] = NULL;
146 /* Marks registers that have just single simple set in BBS; the relevant
147 insn is returned in REGS. */
148 static void
149 blocks_single_set_registers (basic_block *bbs, int nbbs, rtx *regs)
151 rtx insn;
152 int i;
153 struct unmark_altered_insn_data data;
155 for (i = 0; i < max_reg_num (); i++)
156 regs[i] = NULL;
158 for (i = 0; i < nbbs; i++)
159 for (insn = BB_HEAD (bbs[i]);
160 insn != NEXT_INSN (BB_END (bbs[i]));
161 insn = NEXT_INSN (insn))
163 rtx set = single_set (insn);
164 if (!set)
165 continue;
166 if (!REG_P (SET_DEST (set)))
167 continue;
168 regs[REGNO (SET_DEST (set))] = insn;
171 data.regs = regs;
172 for (i = 0; i < nbbs; i++)
173 for (insn = BB_HEAD (bbs[i]);
174 insn != NEXT_INSN (BB_END (bbs[i]));
175 insn = NEXT_INSN (insn))
177 if (!INSN_P (insn))
178 continue;
179 data.insn = insn;
180 note_stores (PATTERN (insn),
181 (void (*) (rtx, rtx, void *)) unmark_altered_insn,
182 &data);
186 /* Helper for invariant_rtx_wrto_regs_p. */
187 static int
188 invariant_rtx_wrto_regs_p_helper (rtx *expr, regset invariant_regs)
190 switch (GET_CODE (*expr))
192 case CC0:
193 case PC:
194 case UNSPEC_VOLATILE:
195 return 1;
197 case CONST_INT:
198 case CONST_DOUBLE:
199 case CONST:
200 case SYMBOL_REF:
201 case LABEL_REF:
202 return 0;
204 case ASM_OPERANDS:
205 return MEM_VOLATILE_P (*expr);
207 case MEM:
208 /* If the memory is not constant, assume it is modified. If it is
209 constant, we still have to check the address. */
210 return !RTX_UNCHANGING_P (*expr);
212 case REG:
213 return !REGNO_REG_SET_P (invariant_regs, REGNO (*expr));
215 default:
216 return 0;
220 /* Checks that EXPR is invariant provided that INVARIANT_REGS are invariant. */
221 static bool
222 invariant_rtx_wrto_regs_p (rtx expr, regset invariant_regs)
224 return !for_each_rtx (&expr, (rtx_function) invariant_rtx_wrto_regs_p_helper,
225 invariant_regs);
228 /* Checks whether CONDITION is a simple comparison in that one of operands
229 is register and the other one is invariant in the LOOP. Fills var, lim
230 and cond fields in DESC. */
231 static bool
232 simple_condition_p (struct loop *loop ATTRIBUTE_UNUSED, rtx condition,
233 regset invariant_regs, struct loop_desc *desc)
235 rtx op0, op1;
237 /* Check condition. */
238 switch (GET_CODE (condition))
240 case EQ:
241 case NE:
242 case LE:
243 case LT:
244 case GE:
245 case GT:
246 case GEU:
247 case GTU:
248 case LEU:
249 case LTU:
250 break;
251 default:
252 return false;
255 /* Of integers or pointers. */
256 if (GET_MODE_CLASS (GET_MODE (XEXP (condition, 0))) != MODE_INT
257 && GET_MODE_CLASS (GET_MODE (XEXP (condition, 0))) != MODE_PARTIAL_INT)
258 return false;
260 /* One of operands must be a simple register. */
261 op0 = XEXP (condition, 0);
262 op1 = XEXP (condition, 1);
264 /* One of operands must be invariant. */
265 if (invariant_rtx_wrto_regs_p (op0, invariant_regs))
267 /* And the other one must be a register. */
268 if (!REG_P (op1))
269 return false;
270 desc->var = op1;
271 desc->lim = op0;
273 desc->cond = swap_condition (GET_CODE (condition));
274 if (desc->cond == UNKNOWN)
275 return false;
276 return true;
279 /* Check the other operand. */
280 if (!invariant_rtx_wrto_regs_p (op1, invariant_regs))
281 return false;
282 if (!REG_P (op0))
283 return false;
285 desc->var = op0;
286 desc->lim = op1;
288 desc->cond = GET_CODE (condition);
290 return true;
293 /* Checks whether DESC->var is incremented/decremented exactly once each
294 iteration. Fills in DESC->stride and returns block in that DESC->var is
295 modified. */
296 static basic_block
297 simple_increment (struct loop *loop, rtx *simple_increment_regs,
298 struct loop_desc *desc)
300 rtx mod_insn, mod_insn1, set, set_src, set_add;
301 basic_block mod_bb, mod_bb1;
303 /* Find insn that modifies var. */
304 mod_insn = simple_increment_regs[REGNO (desc->var)];
305 if (!mod_insn)
306 return NULL;
307 mod_bb = BLOCK_FOR_INSN (mod_insn);
309 /* Check that it is executed exactly once each iteration. */
310 if (!just_once_each_iteration_p (loop, mod_bb))
311 return NULL;
313 /* mod_insn must be a simple increment/decrement. */
314 set = single_set (mod_insn);
315 if (!set)
316 abort ();
317 if (!rtx_equal_p (SET_DEST (set), desc->var))
318 abort ();
320 set_src = find_reg_equal_equiv_note (mod_insn);
321 if (!set_src)
322 set_src = SET_SRC (set);
324 /* Check for variables that iterate in narrower mode. */
325 if (GET_CODE (set_src) == SIGN_EXTEND
326 || GET_CODE (set_src) == ZERO_EXTEND)
328 /* If we are sign extending variable that is then compared unsigned
329 or vice versa, there is something weird happening. */
330 if (desc->cond != EQ
331 && desc->cond != NE
332 && ((desc->cond == LEU
333 || desc->cond == LTU
334 || desc->cond == GEU
335 || desc->cond == GTU)
336 ^ (GET_CODE (set_src) == ZERO_EXTEND)))
337 return NULL;
339 if (GET_CODE (XEXP (set_src, 0)) != SUBREG
340 || SUBREG_BYTE (XEXP (set_src, 0)) != 0
341 || GET_MODE (SUBREG_REG (XEXP (set_src, 0))) != GET_MODE (desc->var))
342 return NULL;
344 desc->inner_mode = GET_MODE (XEXP (set_src, 0));
345 desc->extend = GET_CODE (set_src);
346 set_src = SUBREG_REG (XEXP (set_src, 0));
348 if (GET_CODE (set_src) != REG)
349 return NULL;
351 /* Find where the reg is set. */
352 mod_insn1 = simple_increment_regs[REGNO (set_src)];
353 if (!mod_insn1)
354 return NULL;
356 mod_bb1 = BLOCK_FOR_INSN (mod_insn1);
357 if (!dominated_by_p (CDI_DOMINATORS, mod_bb, mod_bb1))
358 return NULL;
359 if (mod_bb1 == mod_bb)
361 for (;
362 mod_insn != PREV_INSN (BB_HEAD (mod_bb));
363 mod_insn = PREV_INSN (mod_insn))
364 if (mod_insn == mod_insn1)
365 break;
367 if (mod_insn == PREV_INSN (BB_HEAD (mod_bb)))
368 return NULL;
371 /* Replace the source with the possible place of increment. */
372 set = single_set (mod_insn1);
373 if (!set)
374 abort ();
375 if (!rtx_equal_p (SET_DEST (set), set_src))
376 abort ();
378 set_src = find_reg_equal_equiv_note (mod_insn1);
379 if (!set_src)
380 set_src = SET_SRC (set);
382 else
384 desc->inner_mode = GET_MODE (desc->var);
385 desc->extend = NIL;
388 if (GET_CODE (set_src) != PLUS)
389 return NULL;
390 if (!rtx_equal_p (XEXP (set_src, 0), desc->var))
391 return NULL;
393 /* Set desc->stride. */
394 set_add = XEXP (set_src, 1);
395 if (CONSTANT_P (set_add))
396 desc->stride = set_add;
397 else
398 return NULL;
400 return mod_bb;
403 /* Tries to find initial value of VAR in INSN. This value must be invariant
404 wrto INVARIANT_REGS. If SET_INSN is not NULL, insn in that var is set is
405 placed here. INNER_MODE is mode in that induction variable VAR iterates. */
406 static rtx
407 variable_initial_value (rtx insn, regset invariant_regs,
408 rtx var, rtx *set_insn, enum machine_mode inner_mode)
410 basic_block bb;
411 rtx set;
412 rtx ret = NULL;
414 /* Go back through cfg. */
415 bb = BLOCK_FOR_INSN (insn);
416 while (1)
418 for (; insn != BB_HEAD (bb); insn = PREV_INSN (insn))
420 if (INSN_P (insn))
421 note_stores (PATTERN (insn),
422 (void (*) (rtx, rtx, void *)) unmark_altered,
423 invariant_regs);
424 if (modified_between_p (var, PREV_INSN (insn), NEXT_INSN (insn)))
425 break;
428 if (insn != BB_HEAD (bb))
430 /* We found place where var is set. */
431 rtx set_dest;
432 rtx val;
433 rtx note;
435 set = single_set (insn);
436 if (!set)
437 return NULL;
438 set_dest = SET_DEST (set);
439 if (!rtx_equal_p (set_dest, var))
440 return NULL;
442 note = find_reg_equal_equiv_note (insn);
443 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST)
444 val = XEXP (note, 0);
445 else
446 val = SET_SRC (set);
448 /* If we know that the initial value is indeed in range of
449 the inner mode, record the fact even in case the value itself
450 is useless. */
451 if ((GET_CODE (val) == SIGN_EXTEND
452 || GET_CODE (val) == ZERO_EXTEND)
453 && GET_MODE (XEXP (val, 0)) == inner_mode)
454 ret = gen_rtx_fmt_e (GET_CODE (val),
455 GET_MODE (var),
456 gen_rtx_fmt_ei (SUBREG,
457 inner_mode,
458 var, 0));
460 if (!invariant_rtx_wrto_regs_p (val, invariant_regs))
461 return ret;
463 if (set_insn)
464 *set_insn = insn;
465 return val;
469 if (bb->pred->pred_next || bb->pred->src == ENTRY_BLOCK_PTR)
470 return NULL;
472 bb = bb->pred->src;
473 insn = BB_END (bb);
476 return NULL;
479 /* Returns list of definitions of initial value of VAR at edge E. INNER_MODE
480 is mode in that induction variable VAR really iterates. */
481 static rtx
482 variable_initial_values (edge e, rtx var, enum machine_mode inner_mode)
484 rtx set_insn, list;
485 regset invariant_regs;
486 regset_head invariant_regs_head;
487 int i;
489 invariant_regs = INITIALIZE_REG_SET (invariant_regs_head);
490 for (i = 0; i < max_reg_num (); i++)
491 SET_REGNO_REG_SET (invariant_regs, i);
493 list = alloc_EXPR_LIST (0, copy_rtx (var), NULL);
495 if (e->src == ENTRY_BLOCK_PTR)
496 return list;
498 set_insn = BB_END (e->src);
499 while (REG_P (var)
500 && (var = variable_initial_value (set_insn, invariant_regs, var,
501 &set_insn, inner_mode)))
502 list = alloc_EXPR_LIST (0, copy_rtx (var), list);
504 FREE_REG_SET (invariant_regs);
505 return list;
508 /* Counts constant number of iterations of the loop described by DESC;
509 returns false if impossible. */
510 static bool
511 constant_iterations (struct loop_desc *desc, unsigned HOST_WIDE_INT *niter,
512 bool *may_be_zero)
514 rtx test, expr;
515 rtx ainit, alim;
517 test = test_for_iteration (desc, 0);
518 if (test == const0_rtx)
520 *niter = 0;
521 *may_be_zero = false;
522 return true;
525 *may_be_zero = (test != const_true_rtx);
527 /* It would make a little sense to check every with every when we
528 know that all but the first alternative are simply registers. */
529 for (ainit = desc->var_alts; ainit; ainit = XEXP (ainit, 1))
531 alim = XEXP (desc->lim_alts, 0);
532 if (!(expr = count_loop_iterations (desc, XEXP (ainit, 0), alim)))
533 continue;
534 if (GET_CODE (expr) == CONST_INT)
536 *niter = INTVAL (expr);
537 return true;
540 for (alim = XEXP (desc->lim_alts, 1); alim; alim = XEXP (alim, 1))
542 ainit = XEXP (desc->var_alts, 0);
543 if (!(expr = count_loop_iterations (desc, ainit, XEXP (alim, 0))))
544 continue;
545 if (GET_CODE (expr) == CONST_INT)
547 *niter = INTVAL (expr);
548 return true;
552 return false;
555 /* Attempts to determine a number of iterations of a "strange" loop.
556 Its induction variable starts with value INIT, is compared by COND
557 with LIM. If POSTINCR, it is incremented after the test. It is incremented
558 by STRIDE each iteration, has mode MODE but iterates in INNER_MODE.
560 By "strange" we mean loops where induction variable increases in the wrong
561 direction wrto comparison, i.e. for (i = 6; i > 5; i++). */
562 static rtx
563 count_strange_loop_iterations (rtx init, rtx lim, enum rtx_code cond,
564 int postincr, rtx stride, enum machine_mode mode,
565 enum machine_mode inner_mode)
567 rtx rqmt, n_to_wrap, before_wrap, after_wrap;
568 rtx mode_min, mode_max;
569 int size;
571 /* This could be handled, but it is not important enough to lose time with
572 it just now. */
573 if (mode != inner_mode)
574 return NULL_RTX;
576 if (!postincr)
577 init = simplify_gen_binary (PLUS, mode, init, stride);
579 /* If we are able to prove that we don't pass the first test, we are
580 done. */
581 rqmt = simplify_relational_operation (cond, mode, init, lim);
582 if (rqmt == const0_rtx)
583 return const0_rtx;
585 /* And if we don't know we pass it, the things are too complicated for us. */
586 if (rqmt != const_true_rtx)
587 return NULL_RTX;
589 switch (cond)
591 case GE:
592 case GT:
593 case LE:
594 case LT:
595 size = GET_MODE_BITSIZE (mode);
596 mode_min = GEN_INT (-((unsigned HOST_WIDEST_INT) 1 << (size - 1)));
597 mode_max = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1)) - 1);
599 break;
601 case GEU:
602 case GTU:
603 case LEU:
604 case LTU:
605 case EQ:
606 mode_min = const0_rtx;
607 mode_max = simplify_gen_binary (MINUS, mode, const0_rtx, const1_rtx);
608 break;
610 default:
611 abort ();
614 switch (cond)
616 case EQ:
617 /* This iterates once, as init == lim. */
618 return const1_rtx;
620 /* The behavior is undefined in signed cases. Never mind, we still
621 try to behave sanely. */
622 case GE:
623 case GT:
624 case GEU:
625 case GTU:
626 if (INTVAL (stride) <= 0)
627 abort ();
628 n_to_wrap = simplify_gen_binary (MINUS, mode, mode_max, copy_rtx (init));
629 n_to_wrap = simplify_gen_binary (UDIV, mode, n_to_wrap, stride);
630 before_wrap = simplify_gen_binary (MULT, mode,
631 copy_rtx (n_to_wrap), stride);
632 before_wrap = simplify_gen_binary (PLUS, mode,
633 before_wrap, copy_rtx (init));
634 after_wrap = simplify_gen_binary (PLUS, mode,
635 before_wrap, stride);
636 if (GET_CODE (after_wrap) != CONST_INT)
638 after_wrap = simplify_gen_binary (PLUS, mode, mode_min, stride);
639 after_wrap = simplify_gen_binary (MINUS, mode, after_wrap, const1_rtx);
641 break;
643 case LE:
644 case LT:
645 case LEU:
646 case LTU:
647 if (INTVAL (stride) >= 0)
648 abort ();
649 stride = simplify_gen_unary (NEG, mode, stride, mode);
650 n_to_wrap = simplify_gen_binary (MINUS, mode, copy_rtx (init), mode_min);
651 n_to_wrap = simplify_gen_binary (UDIV, mode, n_to_wrap, stride);
652 before_wrap = simplify_gen_binary (MULT, mode,
653 copy_rtx (n_to_wrap), stride);
654 before_wrap = simplify_gen_binary (MINUS, mode,
655 copy_rtx (init), before_wrap);
656 after_wrap = simplify_gen_binary (MINUS, mode,
657 before_wrap, stride);
658 if (GET_CODE (after_wrap) != CONST_INT)
660 after_wrap = simplify_gen_binary (MINUS, mode, mode_max, stride);
661 after_wrap = simplify_gen_binary (PLUS, mode, after_wrap, const1_rtx);
663 break;
664 default:
665 abort ();
668 /* If this is const_true_rtx and we did not take a conservative approximation
669 of after_wrap above, we might iterate the calculation (but of course we
670 would have to take care about infinite cases). Ignore this for now. */
671 rqmt = simplify_relational_operation (cond, mode, after_wrap, lim);
672 if (rqmt != const0_rtx)
673 return NULL_RTX;
675 return simplify_gen_binary (PLUS, mode, n_to_wrap, const1_rtx);
678 /* Checks whether value of EXPR fits into range of MODE. */
679 static bool
680 fits_in_mode_p (enum machine_mode mode, rtx expr)
682 unsigned HOST_WIDEST_INT val;
683 int n_bits = 0;
685 if (GET_CODE (expr) == CONST_INT)
687 for (val = INTVAL (expr); val; val >>= 1)
688 n_bits++;
690 return n_bits <= GET_MODE_BITSIZE (mode);
693 if (GET_CODE (expr) == SIGN_EXTEND
694 || GET_CODE (expr) == ZERO_EXTEND)
695 return GET_MODE (XEXP (expr, 0)) == mode;
697 return false;
700 /* Return RTX expression representing number of iterations of loop as bounded
701 by test described by DESC (in the case loop really has multiple exit
702 edges, fewer iterations may happen in the practice).
704 Return NULL if it is unknown. Additionally the value may be invalid for
705 paradoxical loop (lets define paradoxical loops as loops whose test is
706 failing at -1th iteration, for instance "for (i=5;i<1;i++);").
708 These cases needs to be either cared by copying the loop test in the front
709 of loop or keeping the test in first iteration of loop.
711 When INIT/LIM are set, they are used instead of var/lim of DESC. */
713 count_loop_iterations (struct loop_desc *desc, rtx init, rtx lim)
715 enum rtx_code cond = desc->cond;
716 rtx stride = desc->stride;
717 rtx mod, exp, ainit, bound;
718 rtx overflow_check, mx, mxp;
719 enum machine_mode mode = GET_MODE (desc->var);
720 unsigned HOST_WIDEST_INT s, size, d;
722 /* Give up on floating point modes and friends. It can be possible to do
723 the job for constant loop bounds, but it is probably not worthwhile. */
724 if (!INTEGRAL_MODE_P (mode))
725 return NULL;
727 init = copy_rtx (init ? init : desc->var);
728 lim = copy_rtx (lim ? lim : desc->lim);
730 /* Ensure that we always handle the condition to stay inside loop. */
731 if (desc->neg)
732 cond = reverse_condition (cond);
734 if (desc->inner_mode != mode)
736 /* We have a case when the variable in fact iterates in the narrower
737 mode. This has following consequences:
739 For induction variable itself, if !desc->postincr, it does not mean
740 anything too special, since we know the variable is already in range
741 of the inner mode when we compare it (so it is just needed to shorten
742 it into the mode before calculations are done, so that we don't risk
743 wrong results). More complicated case is when desc->postincr; then
744 the first two iterations are special (the first one because the value
745 may be out of range, the second one because after shortening it to the
746 range it may have absolutely any value), and we do not handle this in
747 unrolling. So if we aren't able to prove that the initial value is in
748 the range, we fail in this case.
750 Step is just moduled to fit into inner mode.
752 If lim is out of range, then either the loop is infinite (and then
753 we may unroll however we like to), or exits in the first iteration
754 (this is also ok, since we handle it specially for this case anyway).
755 So we may safely assume that it fits into the inner mode. */
757 for (ainit = desc->var_alts; ainit; ainit = XEXP (ainit, 1))
758 if (fits_in_mode_p (desc->inner_mode, XEXP (ainit, 0)))
759 break;
761 if (!ainit)
763 if (desc->postincr)
764 return NULL_RTX;
766 init = simplify_gen_unary (desc->extend,
767 mode,
768 simplify_gen_subreg (desc->inner_mode,
769 init,
770 mode,
772 desc->inner_mode);
775 stride = simplify_gen_subreg (desc->inner_mode, stride, mode, 0);
776 if (stride == const0_rtx)
777 return NULL_RTX;
780 /* Prepare condition to verify that we do not risk overflow. */
781 if (stride == const1_rtx
782 || stride == constm1_rtx
783 || cond == NE
784 || cond == EQ)
786 /* Overflow at NE conditions does not occur. EQ condition
787 is weird and is handled in count_strange_loop_iterations.
788 If stride is 1, overflow may occur only for <= and >= conditions,
789 and then they are infinite, so it does not bother us. */
790 overflow_check = const0_rtx;
792 else
794 if (cond == LT || cond == LTU)
795 mx = simplify_gen_binary (MINUS, mode, lim, const1_rtx);
796 else if (cond == GT || cond == GTU)
797 mx = simplify_gen_binary (PLUS, mode, lim, const1_rtx);
798 else
799 mx = lim;
800 if (mode != desc->inner_mode)
801 mxp = simplify_gen_subreg (desc->inner_mode, mx, mode, 0);
802 else
803 mxp = mx;
804 mxp = simplify_gen_binary (PLUS, desc->inner_mode, mxp, stride);
805 if (mode != desc->inner_mode)
806 mxp = simplify_gen_unary (desc->extend, mode, mxp, desc->inner_mode);
807 overflow_check = simplify_gen_relational (cond, SImode, mode, mx, mxp);
810 /* Compute absolute value of the difference of initial and final value. */
811 if (INTVAL (stride) > 0)
813 /* Handle strange tests specially. */
814 if (cond == EQ || cond == GE || cond == GT || cond == GEU
815 || cond == GTU)
816 return count_strange_loop_iterations (init, lim, cond, desc->postincr,
817 stride, mode, desc->inner_mode);
818 exp = simplify_gen_binary (MINUS, mode, lim, init);
820 else
822 if (cond == EQ || cond == LE || cond == LT || cond == LEU
823 || cond == LTU)
824 return count_strange_loop_iterations (init, lim, cond, desc->postincr,
825 stride, mode, desc->inner_mode);
826 exp = simplify_gen_binary (MINUS, mode, init, lim);
827 stride = simplify_gen_unary (NEG, mode, stride, mode);
830 /* If there is a risk of overflow (i.e. when we increment value satisfying
831 a condition, we may again obtain a value satisfying the condition),
832 fail. */
833 if (overflow_check != const0_rtx)
834 return NULL_RTX;
836 /* Normalize difference so the value is always first examined
837 and later incremented. */
838 if (!desc->postincr)
839 exp = simplify_gen_binary (MINUS, mode, exp, stride);
841 /* Determine delta caused by exit condition. */
842 switch (cond)
844 case NE:
845 /* NE tests are easy to handle, because we just perform simple
846 arithmetics modulo power of 2. Let's use the fact to compute the
847 number of iterations exactly. We are now in situation when we want to
848 solve an equation stride * i = c (mod size of inner_mode).
849 Let nsd (stride, size of mode) = d. If d does not divide c, the
850 loop is infinite. Otherwise, the number of iterations is
851 (inverse(s/d) * (c/d)) mod (size of mode/d). */
852 size = GET_MODE_BITSIZE (desc->inner_mode);
853 s = INTVAL (stride);
854 d = 1;
855 while (s % 2 != 1)
857 s /= 2;
858 d *= 2;
859 size--;
861 bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1);
862 exp = simplify_gen_binary (UDIV, mode, exp, GEN_INT (d));
863 exp = simplify_gen_binary (MULT, mode,
864 exp, GEN_INT (inverse (s, size)));
865 exp = simplify_gen_binary (AND, mode, exp, bound);
866 break;
868 case LT:
869 case GT:
870 case LTU:
871 case GTU:
872 break;
873 case LE:
874 case GE:
875 case LEU:
876 case GEU:
877 exp = simplify_gen_binary (PLUS, mode, exp, const1_rtx);
878 break;
879 default:
880 abort ();
883 if (cond != NE && stride != const1_rtx)
885 /* Number of iterations is now (EXP + STRIDE - 1 / STRIDE),
886 but we need to take care for overflows. */
888 mod = simplify_gen_binary (UMOD, mode, exp, stride);
890 /* This is dirty trick. When we can't compute number of iterations
891 to be constant, we simply ignore the possible overflow, as
892 runtime unroller always use power of 2 amounts and does not
893 care about possible lost bits. */
895 if (GET_CODE (mod) != CONST_INT)
897 rtx stridem1 = simplify_gen_binary (PLUS, mode, stride, constm1_rtx);
898 exp = simplify_gen_binary (PLUS, mode, exp, stridem1);
899 exp = simplify_gen_binary (UDIV, mode, exp, stride);
901 else
903 exp = simplify_gen_binary (UDIV, mode, exp, stride);
904 if (mod != const0_rtx)
905 exp = simplify_gen_binary (PLUS, mode, exp, const1_rtx);
909 if (rtl_dump_file)
911 fprintf (rtl_dump_file, "; Number of iterations: ");
912 print_simple_rtl (rtl_dump_file, exp);
913 fprintf (rtl_dump_file, "\n");
916 return exp;
919 /* Return simplified RTX expression representing the value of test
920 described of DESC at given iteration of loop. */
922 static rtx
923 test_for_iteration (struct loop_desc *desc, unsigned HOST_WIDE_INT iter)
925 enum rtx_code cond = desc->cond;
926 rtx exp = XEXP (desc->var_alts, 0);
927 rtx addval;
929 /* Give up on floating point modes and friends. It can be possible to do
930 the job for constant loop bounds, but it is probably not worthwhile. */
931 if (!INTEGRAL_MODE_P (GET_MODE (desc->var)))
932 return NULL;
934 /* Ensure that we always handle the condition to stay inside loop. */
935 if (desc->neg)
936 cond = reverse_condition (cond);
938 /* Compute the value of induction variable. */
939 addval = simplify_gen_binary (MULT, GET_MODE (desc->var),
940 desc->stride,
941 gen_int_mode (desc->postincr
942 ? iter : iter + 1,
943 GET_MODE (desc->var)));
944 exp = simplify_gen_binary (PLUS, GET_MODE (desc->var), exp, addval);
945 /* Test at given condition. */
946 exp = simplify_gen_relational (cond, SImode,
947 GET_MODE (desc->var), exp, desc->lim);
949 if (rtl_dump_file)
951 fprintf (rtl_dump_file, "; Conditional to continue loop at "
952 HOST_WIDE_INT_PRINT_UNSIGNED "th iteration: ", iter);
953 print_simple_rtl (rtl_dump_file, exp);
954 fprintf (rtl_dump_file, "\n");
956 return exp;
960 /* Tests whether exit at EXIT_EDGE from LOOP is simple. Returns simple loop
961 description joined to it in in DESC. INVARIANT_REGS and SINGLE_SET_REGS
962 are results of blocks_{invariant,single_set}_regs over BODY. */
963 static bool
964 simple_loop_exit_p (struct loop *loop, edge exit_edge,
965 regset invariant_regs, rtx *single_set_regs,
966 struct loop_desc *desc)
968 basic_block mod_bb, exit_bb;
969 int fallthru_out;
970 rtx condition;
971 edge ei, e;
973 exit_bb = exit_edge->src;
975 fallthru_out = (exit_edge->flags & EDGE_FALLTHRU);
977 if (!exit_bb)
978 return false;
980 /* It must be tested (at least) once during any iteration. */
981 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
982 return false;
984 /* It must end in a simple conditional jump. */
985 if (!any_condjump_p (BB_END (exit_bb)))
986 return false;
988 ei = exit_bb->succ;
989 if (ei == exit_edge)
990 ei = ei->succ_next;
992 desc->out_edge = exit_edge;
993 desc->in_edge = ei;
995 /* Condition must be a simple comparison in that one of operands
996 is register and the other one is invariant. */
997 if (!(condition = get_condition (BB_END (exit_bb), NULL, false)))
998 return false;
1000 if (!simple_condition_p (loop, condition, invariant_regs, desc))
1001 return false;
1003 /* Var must be simply incremented or decremented in exactly one insn that
1004 is executed just once every iteration. */
1005 if (!(mod_bb = simple_increment (loop, single_set_regs, desc)))
1006 return false;
1008 /* OK, it is simple loop. Now just fill in remaining info. */
1009 desc->postincr = !dominated_by_p (CDI_DOMINATORS, exit_bb, mod_bb);
1010 desc->neg = !fallthru_out;
1012 /* Find initial value of var and alternative values for lim. */
1013 e = loop_preheader_edge (loop);
1014 desc->var_alts = variable_initial_values (e, desc->var, desc->inner_mode);
1015 desc->lim_alts = variable_initial_values (e, desc->lim, desc->inner_mode);
1017 /* Number of iterations. */
1018 desc->const_iter =
1019 constant_iterations (desc, &desc->niter, &desc->may_be_zero);
1020 if (!desc->const_iter && !count_loop_iterations (desc, NULL, NULL))
1021 return false;
1022 return true;
1025 /* Tests whether LOOP is simple for loop. Returns simple loop description
1026 in DESC. */
1027 bool
1028 simple_loop_p (struct loop *loop, struct loop_desc *desc)
1030 unsigned i;
1031 basic_block *body;
1032 edge e;
1033 struct loop_desc act;
1034 bool any = false;
1035 regset invariant_regs;
1036 regset_head invariant_regs_head;
1037 rtx *single_set_regs;
1038 int n_branches;
1040 body = get_loop_body (loop);
1042 invariant_regs = INITIALIZE_REG_SET (invariant_regs_head);
1043 single_set_regs = xmalloc (max_reg_num () * sizeof (rtx));
1045 blocks_invariant_registers (body, loop->num_nodes, invariant_regs);
1046 blocks_single_set_registers (body, loop->num_nodes, single_set_regs);
1048 n_branches = 0;
1049 for (i = 0; i < loop->num_nodes; i++)
1051 for (e = body[i]->succ; e; e = e->succ_next)
1052 if (!flow_bb_inside_loop_p (loop, e->dest)
1053 && simple_loop_exit_p (loop, e,
1054 invariant_regs, single_set_regs, &act))
1056 /* Prefer constant iterations; the less the better. */
1057 if (!any)
1058 any = true;
1059 else if (!act.const_iter
1060 || (desc->const_iter && act.niter >= desc->niter))
1061 continue;
1062 *desc = act;
1065 if (body[i]->succ && body[i]->succ->succ_next)
1066 n_branches++;
1068 desc->n_branches = n_branches;
1070 if (rtl_dump_file && any)
1072 fprintf (rtl_dump_file, "; Simple loop %i\n", loop->num);
1073 if (desc->postincr)
1074 fprintf (rtl_dump_file,
1075 "; does postincrement after loop exit condition\n");
1077 fprintf (rtl_dump_file, "; Induction variable:");
1078 print_simple_rtl (rtl_dump_file, desc->var);
1079 fputc ('\n', rtl_dump_file);
1081 fprintf (rtl_dump_file, "; Initial values:");
1082 print_simple_rtl (rtl_dump_file, desc->var_alts);
1083 fputc ('\n', rtl_dump_file);
1085 fprintf (rtl_dump_file, "; Stride:");
1086 print_simple_rtl (rtl_dump_file, desc->stride);
1087 fputc ('\n', rtl_dump_file);
1089 fprintf (rtl_dump_file, "; Compared with:");
1090 print_simple_rtl (rtl_dump_file, desc->lim);
1091 fputc ('\n', rtl_dump_file);
1093 fprintf (rtl_dump_file, "; Alternative values:");
1094 print_simple_rtl (rtl_dump_file, desc->lim_alts);
1095 fputc ('\n', rtl_dump_file);
1097 fprintf (rtl_dump_file, "; Exit condition:");
1098 if (desc->neg)
1099 fprintf (rtl_dump_file, "(negated)");
1100 fprintf (rtl_dump_file, "%s\n", GET_RTX_NAME (desc->cond));
1102 fprintf (rtl_dump_file, "; Number of branches:");
1103 fprintf (rtl_dump_file, "%d\n", desc->n_branches);
1105 fputc ('\n', rtl_dump_file);
1108 free (body);
1109 FREE_REG_SET (invariant_regs);
1110 free (single_set_regs);
1111 return any;
1114 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
1115 throw away all latch edges and mark blocks inside any remaining cycle.
1116 Everything is a bit complicated due to fact we do not want to do this
1117 for parts of cycles that only "pass" through some loop -- i.e. for
1118 each cycle, we want to mark blocks that belong directly to innermost
1119 loop containing the whole cycle. */
1120 void
1121 mark_irreducible_loops (struct loops *loops)
1123 int *dfs_in, *closed, *mr, *mri, *n_edges, *stack;
1124 unsigned i;
1125 edge **edges, e;
1126 edge *estack;
1127 basic_block act;
1128 int stack_top, tick, depth;
1129 struct loop *cloop;
1131 /* Reset the flags. */
1132 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1134 act->flags &= ~BB_IRREDUCIBLE_LOOP;
1135 for (e = act->succ; e; e = e->succ_next)
1136 e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1139 /* The first last_basic_block + 1 entries are for real blocks (including
1140 entry); then we have loops->num - 1 fake blocks for loops to that we
1141 assign edges leading from loops (fake loop 0 is not interesting). */
1142 dfs_in = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1143 closed = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1144 mr = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1145 mri = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1146 n_edges = xmalloc ((last_basic_block + loops->num) * sizeof (int));
1147 edges = xmalloc ((last_basic_block + loops->num) * sizeof (edge *));
1148 stack = xmalloc ((n_basic_blocks + loops->num) * sizeof (int));
1149 estack = xmalloc ((n_basic_blocks + loops->num) * sizeof (edge));
1151 /* Create the edge lists. */
1152 for (i = 0; i < last_basic_block + loops->num; i++)
1153 n_edges[i] = 0;
1154 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1155 for (e = act->succ; e; e = e->succ_next)
1157 /* Ignore edges to exit. */
1158 if (e->dest == EXIT_BLOCK_PTR)
1159 continue;
1160 /* And latch edges. */
1161 if (e->dest->loop_father->header == e->dest
1162 && e->dest->loop_father->latch == act)
1163 continue;
1164 /* Edges inside a single loop should be left where they are. Edges
1165 to subloop headers should lead to representative of the subloop,
1166 but from the same place. */
1167 if (act->loop_father == e->dest->loop_father
1168 || act->loop_father == e->dest->loop_father->outer)
1170 n_edges[act->index + 1]++;
1171 continue;
1173 /* Edges exiting loops remain. They should lead from representative
1174 of the son of nearest common ancestor of the loops in that
1175 act lays. */
1176 depth = find_common_loop (act->loop_father, e->dest->loop_father)->depth + 1;
1177 if (depth == act->loop_father->depth)
1178 cloop = act->loop_father;
1179 else
1180 cloop = act->loop_father->pred[depth];
1181 n_edges[cloop->num + last_basic_block]++;
1184 for (i = 0; i < last_basic_block + loops->num; i++)
1186 edges[i] = xmalloc (n_edges[i] * sizeof (edge));
1187 n_edges[i] = 0;
1190 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1191 for (e = act->succ; e; e = e->succ_next)
1193 if (e->dest == EXIT_BLOCK_PTR)
1194 continue;
1195 if (e->dest->loop_father->header == e->dest
1196 && e->dest->loop_father->latch == act)
1197 continue;
1198 if (act->loop_father == e->dest->loop_father
1199 || act->loop_father == e->dest->loop_father->outer)
1201 edges[act->index + 1][n_edges[act->index + 1]++] = e;
1202 continue;
1204 depth = find_common_loop (act->loop_father, e->dest->loop_father)->depth + 1;
1205 if (depth == act->loop_father->depth)
1206 cloop = act->loop_father;
1207 else
1208 cloop = act->loop_father->pred[depth];
1209 i = cloop->num + last_basic_block;
1210 edges[i][n_edges[i]++] = e;
1213 /* Compute dfs numbering, starting from loop headers, and mark found
1214 loops. */
1215 tick = 0;
1216 for (i = 0; i < last_basic_block + loops->num; i++)
1218 dfs_in[i] = -1;
1219 closed[i] = 0;
1220 mr[i] = last_basic_block + loops->num;
1221 mri[i] = -1;
1224 stack_top = 0;
1225 for (i = 0; i < loops->num; i++)
1226 if (loops->parray[i])
1228 stack[stack_top] = loops->parray[i]->header->index + 1;
1229 estack[stack_top] = NULL;
1230 stack_top++;
1233 while (stack_top)
1235 int idx, sidx;
1237 idx = stack[stack_top - 1];
1238 if (dfs_in[idx] < 0)
1239 dfs_in[idx] = tick++;
1241 while (n_edges[idx])
1243 e = edges[idx][--n_edges[idx]];
1244 sidx = e->dest->loop_father->header == e->dest
1245 ? e->dest->loop_father->num + last_basic_block
1246 : e->dest->index + 1;
1247 if (closed[sidx])
1249 if (mri[sidx] != -1 && !closed[mri[sidx]])
1251 if (mr[sidx] < mr[idx])
1253 mr[idx] = mr[sidx];
1254 mri[idx] = mri[sidx];
1257 if (mr[sidx] <= dfs_in[idx])
1258 e->flags |= EDGE_IRREDUCIBLE_LOOP;
1260 continue;
1262 if (dfs_in[sidx] < 0)
1264 stack[stack_top] = sidx;
1265 estack[stack_top] = e;
1266 stack_top++;
1267 goto next;
1269 if (dfs_in[sidx] < mr[idx])
1271 mr[idx] = dfs_in[sidx];
1272 mri[idx] = sidx;
1274 e->flags |= EDGE_IRREDUCIBLE_LOOP;
1277 /* Return back. */
1278 closed[idx] = 1;
1279 e = estack[stack_top - 1];
1280 stack_top--;
1281 if (e)
1283 /* Propagate information back. */
1284 sidx = stack[stack_top - 1];
1285 if (mr[sidx] > mr[idx])
1287 mr[sidx] = mr[idx];
1288 mri[sidx] = mri[idx];
1290 if (mr[idx] <= dfs_in[sidx])
1291 e->flags |= EDGE_IRREDUCIBLE_LOOP;
1293 /* Mark the block if relevant. */
1294 if (idx && idx <= last_basic_block && mr[idx] <= dfs_in[idx])
1295 BASIC_BLOCK (idx - 1)->flags |= BB_IRREDUCIBLE_LOOP;
1296 next:;
1299 free (stack);
1300 free (estack);
1301 free (dfs_in);
1302 free (closed);
1303 free (mr);
1304 free (mri);
1305 for (i = 0; i < last_basic_block + loops->num; i++)
1306 free (edges[i]);
1307 free (edges);
1308 free (n_edges);
1309 loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
1312 /* Counts number of insns inside LOOP. */
1314 num_loop_insns (struct loop *loop)
1316 basic_block *bbs, bb;
1317 unsigned i, ninsns = 0;
1318 rtx insn;
1320 bbs = get_loop_body (loop);
1321 for (i = 0; i < loop->num_nodes; i++)
1323 bb = bbs[i];
1324 ninsns++;
1325 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
1326 if (INSN_P (insn))
1327 ninsns++;
1329 free(bbs);
1331 return ninsns;
1334 /* Counts number of insns executed on average per iteration LOOP. */
1336 average_num_loop_insns (struct loop *loop)
1338 basic_block *bbs, bb;
1339 unsigned i, binsns, ninsns, ratio;
1340 rtx insn;
1342 ninsns = 0;
1343 bbs = get_loop_body (loop);
1344 for (i = 0; i < loop->num_nodes; i++)
1346 bb = bbs[i];
1348 binsns = 1;
1349 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
1350 if (INSN_P (insn))
1351 binsns++;
1353 ratio = loop->header->frequency == 0
1354 ? BB_FREQ_MAX
1355 : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
1356 ninsns += binsns * ratio;
1358 free(bbs);
1360 ninsns /= BB_FREQ_MAX;
1361 if (!ninsns)
1362 ninsns = 1; /* To avoid division by zero. */
1364 return ninsns;
1367 /* Returns expected number of LOOP iterations.
1368 Compute upper bound on number of iterations in case they do not fit integer
1369 to help loop peeling heuristics. Use exact counts if at all possible. */
1370 unsigned
1371 expected_loop_iterations (const struct loop *loop)
1373 edge e;
1375 if (loop->header->count)
1377 gcov_type count_in, count_latch, expected;
1379 count_in = 0;
1380 count_latch = 0;
1382 for (e = loop->header->pred; e; e = e->pred_next)
1383 if (e->src == loop->latch)
1384 count_latch = e->count;
1385 else
1386 count_in += e->count;
1388 if (count_in == 0)
1389 return 0;
1391 expected = (count_latch + count_in - 1) / count_in;
1393 /* Avoid overflows. */
1394 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
1396 else
1398 int freq_in, freq_latch;
1400 freq_in = 0;
1401 freq_latch = 0;
1403 for (e = loop->header->pred; e; e = e->pred_next)
1404 if (e->src == loop->latch)
1405 freq_latch = EDGE_FREQUENCY (e);
1406 else
1407 freq_in += EDGE_FREQUENCY (e);
1409 if (freq_in == 0)
1410 return 0;
1412 return (freq_latch + freq_in - 1) / freq_in;