2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-loop-niter.c
blob7104e60cf95a072a934629489ecc9eb8a21094a5
1 /* Functions to determine/estimate number of iterations of a loop.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "input.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "fold-const.h"
30 #include "calls.h"
31 #include "hard-reg-set.h"
32 #include "function.h"
33 #include "rtl.h"
34 #include "flags.h"
35 #include "insn-config.h"
36 #include "expmed.h"
37 #include "dojump.h"
38 #include "explow.h"
39 #include "emit-rtl.h"
40 #include "varasm.h"
41 #include "stmt.h"
42 #include "expr.h"
43 #include "tm_p.h"
44 #include "predict.h"
45 #include "dominance.h"
46 #include "cfg.h"
47 #include "basic-block.h"
48 #include "gimple-pretty-print.h"
49 #include "intl.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-expr.h"
53 #include "is-a.h"
54 #include "gimple.h"
55 #include "gimplify.h"
56 #include "gimple-iterator.h"
57 #include "gimple-ssa.h"
58 #include "tree-cfg.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "tree-ssa-loop-ivopts.h"
62 #include "tree-ssa-loop-niter.h"
63 #include "tree-ssa-loop.h"
64 #include "dumpfile.h"
65 #include "cfgloop.h"
66 #include "tree-chrec.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-data-ref.h"
69 #include "params.h"
70 #include "diagnostic-core.h"
71 #include "tree-inline.h"
72 #include "tree-pass.h"
73 #include "stringpool.h"
74 #include "tree-ssanames.h"
75 #include "wide-int-print.h"
78 #define SWAP(X, Y) do { affine_iv *tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
80 /* The maximum number of dominator BBs we search for conditions
81 of loop header copies we use for simplifying a conditional
82 expression. */
83 #define MAX_DOMINATORS_TO_WALK 8
87 Analysis of number of iterations of an affine exit test.
91 /* Bounds on some value, BELOW <= X <= UP. */
93 typedef struct
95 mpz_t below, up;
96 } bounds;
99 /* Splits expression EXPR to a variable part VAR and constant OFFSET. */
101 static void
102 split_to_var_and_offset (tree expr, tree *var, mpz_t offset)
104 tree type = TREE_TYPE (expr);
105 tree op0, op1;
106 bool negate = false;
108 *var = expr;
109 mpz_set_ui (offset, 0);
111 switch (TREE_CODE (expr))
113 case MINUS_EXPR:
114 negate = true;
115 /* Fallthru. */
117 case PLUS_EXPR:
118 case POINTER_PLUS_EXPR:
119 op0 = TREE_OPERAND (expr, 0);
120 op1 = TREE_OPERAND (expr, 1);
122 if (TREE_CODE (op1) != INTEGER_CST)
123 break;
125 *var = op0;
126 /* Always sign extend the offset. */
127 wi::to_mpz (op1, offset, SIGNED);
128 if (negate)
129 mpz_neg (offset, offset);
130 break;
132 case INTEGER_CST:
133 *var = build_int_cst_type (type, 0);
134 wi::to_mpz (expr, offset, TYPE_SIGN (type));
135 break;
137 default:
138 break;
142 /* Stores estimate on the minimum/maximum value of the expression VAR + OFF
143 in TYPE to MIN and MAX. */
145 static void
146 determine_value_range (struct loop *loop, tree type, tree var, mpz_t off,
147 mpz_t min, mpz_t max)
149 wide_int minv, maxv;
150 enum value_range_type rtype = VR_VARYING;
152 /* If the expression is a constant, we know its value exactly. */
153 if (integer_zerop (var))
155 mpz_set (min, off);
156 mpz_set (max, off);
157 return;
160 get_type_static_bounds (type, min, max);
162 /* See if we have some range info from VRP. */
163 if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type))
165 edge e = loop_preheader_edge (loop);
166 signop sgn = TYPE_SIGN (type);
167 gphi_iterator gsi;
169 /* Either for VAR itself... */
170 rtype = get_range_info (var, &minv, &maxv);
171 /* Or for PHI results in loop->header where VAR is used as
172 PHI argument from the loop preheader edge. */
173 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
175 gphi *phi = gsi.phi ();
176 wide_int minc, maxc;
177 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var
178 && (get_range_info (gimple_phi_result (phi), &minc, &maxc)
179 == VR_RANGE))
181 if (rtype != VR_RANGE)
183 rtype = VR_RANGE;
184 minv = minc;
185 maxv = maxc;
187 else
189 minv = wi::max (minv, minc, sgn);
190 maxv = wi::min (maxv, maxc, sgn);
191 /* If the PHI result range are inconsistent with
192 the VAR range, give up on looking at the PHI
193 results. This can happen if VR_UNDEFINED is
194 involved. */
195 if (wi::gt_p (minv, maxv, sgn))
197 rtype = get_range_info (var, &minv, &maxv);
198 break;
203 if (rtype == VR_RANGE)
205 mpz_t minm, maxm;
206 gcc_assert (wi::le_p (minv, maxv, sgn));
207 mpz_init (minm);
208 mpz_init (maxm);
209 wi::to_mpz (minv, minm, sgn);
210 wi::to_mpz (maxv, maxm, sgn);
211 mpz_add (minm, minm, off);
212 mpz_add (maxm, maxm, off);
213 /* If the computation may not wrap or off is zero, then this
214 is always fine. If off is negative and minv + off isn't
215 smaller than type's minimum, or off is positive and
216 maxv + off isn't bigger than type's maximum, use the more
217 precise range too. */
218 if (nowrap_type_p (type)
219 || mpz_sgn (off) == 0
220 || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0)
221 || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0))
223 mpz_set (min, minm);
224 mpz_set (max, maxm);
225 mpz_clear (minm);
226 mpz_clear (maxm);
227 return;
229 mpz_clear (minm);
230 mpz_clear (maxm);
234 /* If the computation may wrap, we know nothing about the value, except for
235 the range of the type. */
236 if (!nowrap_type_p (type))
237 return;
239 /* Since the addition of OFF does not wrap, if OFF is positive, then we may
240 add it to MIN, otherwise to MAX. */
241 if (mpz_sgn (off) < 0)
242 mpz_add (max, max, off);
243 else
244 mpz_add (min, min, off);
247 /* Stores the bounds on the difference of the values of the expressions
248 (var + X) and (var + Y), computed in TYPE, to BNDS. */
250 static void
251 bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y,
252 bounds *bnds)
254 int rel = mpz_cmp (x, y);
255 bool may_wrap = !nowrap_type_p (type);
256 mpz_t m;
258 /* If X == Y, then the expressions are always equal.
259 If X > Y, there are the following possibilities:
260 a) neither of var + X and var + Y overflow or underflow, or both of
261 them do. Then their difference is X - Y.
262 b) var + X overflows, and var + Y does not. Then the values of the
263 expressions are var + X - M and var + Y, where M is the range of
264 the type, and their difference is X - Y - M.
265 c) var + Y underflows and var + X does not. Their difference again
266 is M - X + Y.
267 Therefore, if the arithmetics in type does not overflow, then the
268 bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y)
269 Similarly, if X < Y, the bounds are either (X - Y, X - Y) or
270 (X - Y, X - Y + M). */
272 if (rel == 0)
274 mpz_set_ui (bnds->below, 0);
275 mpz_set_ui (bnds->up, 0);
276 return;
279 mpz_init (m);
280 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED);
281 mpz_add_ui (m, m, 1);
282 mpz_sub (bnds->up, x, y);
283 mpz_set (bnds->below, bnds->up);
285 if (may_wrap)
287 if (rel > 0)
288 mpz_sub (bnds->below, bnds->below, m);
289 else
290 mpz_add (bnds->up, bnds->up, m);
293 mpz_clear (m);
296 /* From condition C0 CMP C1 derives information regarding the
297 difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE,
298 and stores it to BNDS. */
300 static void
301 refine_bounds_using_guard (tree type, tree varx, mpz_t offx,
302 tree vary, mpz_t offy,
303 tree c0, enum tree_code cmp, tree c1,
304 bounds *bnds)
306 tree varc0, varc1, tmp, ctype;
307 mpz_t offc0, offc1, loffx, loffy, bnd;
308 bool lbound = false;
309 bool no_wrap = nowrap_type_p (type);
310 bool x_ok, y_ok;
312 switch (cmp)
314 case LT_EXPR:
315 case LE_EXPR:
316 case GT_EXPR:
317 case GE_EXPR:
318 STRIP_SIGN_NOPS (c0);
319 STRIP_SIGN_NOPS (c1);
320 ctype = TREE_TYPE (c0);
321 if (!useless_type_conversion_p (ctype, type))
322 return;
324 break;
326 case EQ_EXPR:
327 /* We could derive quite precise information from EQ_EXPR, however, such
328 a guard is unlikely to appear, so we do not bother with handling
329 it. */
330 return;
332 case NE_EXPR:
333 /* NE_EXPR comparisons do not contain much of useful information, except for
334 special case of comparing with the bounds of the type. */
335 if (TREE_CODE (c1) != INTEGER_CST
336 || !INTEGRAL_TYPE_P (type))
337 return;
339 /* Ensure that the condition speaks about an expression in the same type
340 as X and Y. */
341 ctype = TREE_TYPE (c0);
342 if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
343 return;
344 c0 = fold_convert (type, c0);
345 c1 = fold_convert (type, c1);
347 if (TYPE_MIN_VALUE (type)
348 && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
350 cmp = GT_EXPR;
351 break;
353 if (TYPE_MAX_VALUE (type)
354 && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
356 cmp = LT_EXPR;
357 break;
360 return;
361 default:
362 return;
365 mpz_init (offc0);
366 mpz_init (offc1);
367 split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
368 split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
370 /* We are only interested in comparisons of expressions based on VARX and
371 VARY. TODO -- we might also be able to derive some bounds from
372 expressions containing just one of the variables. */
374 if (operand_equal_p (varx, varc1, 0))
376 tmp = varc0; varc0 = varc1; varc1 = tmp;
377 mpz_swap (offc0, offc1);
378 cmp = swap_tree_comparison (cmp);
381 if (!operand_equal_p (varx, varc0, 0)
382 || !operand_equal_p (vary, varc1, 0))
383 goto end;
385 mpz_init_set (loffx, offx);
386 mpz_init_set (loffy, offy);
388 if (cmp == GT_EXPR || cmp == GE_EXPR)
390 tmp = varx; varx = vary; vary = tmp;
391 mpz_swap (offc0, offc1);
392 mpz_swap (loffx, loffy);
393 cmp = swap_tree_comparison (cmp);
394 lbound = true;
397 /* If there is no overflow, the condition implies that
399 (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0).
401 The overflows and underflows may complicate things a bit; each
402 overflow decreases the appropriate offset by M, and underflow
403 increases it by M. The above inequality would not necessarily be
404 true if
406 -- VARX + OFFX underflows and VARX + OFFC0 does not, or
407 VARX + OFFC0 overflows, but VARX + OFFX does not.
408 This may only happen if OFFX < OFFC0.
409 -- VARY + OFFY overflows and VARY + OFFC1 does not, or
410 VARY + OFFC1 underflows and VARY + OFFY does not.
411 This may only happen if OFFY > OFFC1. */
413 if (no_wrap)
415 x_ok = true;
416 y_ok = true;
418 else
420 x_ok = (integer_zerop (varx)
421 || mpz_cmp (loffx, offc0) >= 0);
422 y_ok = (integer_zerop (vary)
423 || mpz_cmp (loffy, offc1) <= 0);
426 if (x_ok && y_ok)
428 mpz_init (bnd);
429 mpz_sub (bnd, loffx, loffy);
430 mpz_add (bnd, bnd, offc1);
431 mpz_sub (bnd, bnd, offc0);
433 if (cmp == LT_EXPR)
434 mpz_sub_ui (bnd, bnd, 1);
436 if (lbound)
438 mpz_neg (bnd, bnd);
439 if (mpz_cmp (bnds->below, bnd) < 0)
440 mpz_set (bnds->below, bnd);
442 else
444 if (mpz_cmp (bnd, bnds->up) < 0)
445 mpz_set (bnds->up, bnd);
447 mpz_clear (bnd);
450 mpz_clear (loffx);
451 mpz_clear (loffy);
452 end:
453 mpz_clear (offc0);
454 mpz_clear (offc1);
457 /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS.
458 The subtraction is considered to be performed in arbitrary precision,
459 without overflows.
461 We do not attempt to be too clever regarding the value ranges of X and
462 Y; most of the time, they are just integers or ssa names offsetted by
463 integer. However, we try to use the information contained in the
464 comparisons before the loop (usually created by loop header copying). */
466 static void
467 bound_difference (struct loop *loop, tree x, tree y, bounds *bnds)
469 tree type = TREE_TYPE (x);
470 tree varx, vary;
471 mpz_t offx, offy;
472 mpz_t minx, maxx, miny, maxy;
473 int cnt = 0;
474 edge e;
475 basic_block bb;
476 tree c0, c1;
477 gimple cond;
478 enum tree_code cmp;
480 /* Get rid of unnecessary casts, but preserve the value of
481 the expressions. */
482 STRIP_SIGN_NOPS (x);
483 STRIP_SIGN_NOPS (y);
485 mpz_init (bnds->below);
486 mpz_init (bnds->up);
487 mpz_init (offx);
488 mpz_init (offy);
489 split_to_var_and_offset (x, &varx, offx);
490 split_to_var_and_offset (y, &vary, offy);
492 if (!integer_zerop (varx)
493 && operand_equal_p (varx, vary, 0))
495 /* Special case VARX == VARY -- we just need to compare the
496 offsets. The matters are a bit more complicated in the
497 case addition of offsets may wrap. */
498 bound_difference_of_offsetted_base (type, offx, offy, bnds);
500 else
502 /* Otherwise, use the value ranges to determine the initial
503 estimates on below and up. */
504 mpz_init (minx);
505 mpz_init (maxx);
506 mpz_init (miny);
507 mpz_init (maxy);
508 determine_value_range (loop, type, varx, offx, minx, maxx);
509 determine_value_range (loop, type, vary, offy, miny, maxy);
511 mpz_sub (bnds->below, minx, maxy);
512 mpz_sub (bnds->up, maxx, miny);
513 mpz_clear (minx);
514 mpz_clear (maxx);
515 mpz_clear (miny);
516 mpz_clear (maxy);
519 /* If both X and Y are constants, we cannot get any more precise. */
520 if (integer_zerop (varx) && integer_zerop (vary))
521 goto end;
523 /* Now walk the dominators of the loop header and use the entry
524 guards to refine the estimates. */
525 for (bb = loop->header;
526 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
527 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
529 if (!single_pred_p (bb))
530 continue;
531 e = single_pred_edge (bb);
533 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
534 continue;
536 cond = last_stmt (e->src);
537 c0 = gimple_cond_lhs (cond);
538 cmp = gimple_cond_code (cond);
539 c1 = gimple_cond_rhs (cond);
541 if (e->flags & EDGE_FALSE_VALUE)
542 cmp = invert_tree_comparison (cmp, false);
544 refine_bounds_using_guard (type, varx, offx, vary, offy,
545 c0, cmp, c1, bnds);
546 ++cnt;
549 end:
550 mpz_clear (offx);
551 mpz_clear (offy);
554 /* Update the bounds in BNDS that restrict the value of X to the bounds
555 that restrict the value of X + DELTA. X can be obtained as a
556 difference of two values in TYPE. */
558 static void
559 bounds_add (bounds *bnds, const widest_int &delta, tree type)
561 mpz_t mdelta, max;
563 mpz_init (mdelta);
564 wi::to_mpz (delta, mdelta, SIGNED);
566 mpz_init (max);
567 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
569 mpz_add (bnds->up, bnds->up, mdelta);
570 mpz_add (bnds->below, bnds->below, mdelta);
572 if (mpz_cmp (bnds->up, max) > 0)
573 mpz_set (bnds->up, max);
575 mpz_neg (max, max);
576 if (mpz_cmp (bnds->below, max) < 0)
577 mpz_set (bnds->below, max);
579 mpz_clear (mdelta);
580 mpz_clear (max);
583 /* Update the bounds in BNDS that restrict the value of X to the bounds
584 that restrict the value of -X. */
586 static void
587 bounds_negate (bounds *bnds)
589 mpz_t tmp;
591 mpz_init_set (tmp, bnds->up);
592 mpz_neg (bnds->up, bnds->below);
593 mpz_neg (bnds->below, tmp);
594 mpz_clear (tmp);
597 /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */
599 static tree
600 inverse (tree x, tree mask)
602 tree type = TREE_TYPE (x);
603 tree rslt;
604 unsigned ctr = tree_floor_log2 (mask);
606 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT)
608 unsigned HOST_WIDE_INT ix;
609 unsigned HOST_WIDE_INT imask;
610 unsigned HOST_WIDE_INT irslt = 1;
612 gcc_assert (cst_and_fits_in_hwi (x));
613 gcc_assert (cst_and_fits_in_hwi (mask));
615 ix = int_cst_value (x);
616 imask = int_cst_value (mask);
618 for (; ctr; ctr--)
620 irslt *= ix;
621 ix *= ix;
623 irslt &= imask;
625 rslt = build_int_cst_type (type, irslt);
627 else
629 rslt = build_int_cst (type, 1);
630 for (; ctr; ctr--)
632 rslt = int_const_binop (MULT_EXPR, rslt, x);
633 x = int_const_binop (MULT_EXPR, x, x);
635 rslt = int_const_binop (BIT_AND_EXPR, rslt, mask);
638 return rslt;
641 /* Derives the upper bound BND on the number of executions of loop with exit
642 condition S * i <> C. If NO_OVERFLOW is true, then the control variable of
643 the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed
644 that the loop ends through this exit, i.e., the induction variable ever
645 reaches the value of C.
647 The value C is equal to final - base, where final and base are the final and
648 initial value of the actual induction variable in the analysed loop. BNDS
649 bounds the value of this difference when computed in signed type with
650 unbounded range, while the computation of C is performed in an unsigned
651 type with the range matching the range of the type of the induction variable.
652 In particular, BNDS.up contains an upper bound on C in the following cases:
653 -- if the iv must reach its final value without overflow, i.e., if
654 NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or
655 -- if final >= base, which we know to hold when BNDS.below >= 0. */
657 static void
658 number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s,
659 bounds *bnds, bool exit_must_be_taken)
661 widest_int max;
662 mpz_t d;
663 tree type = TREE_TYPE (c);
664 bool bnds_u_valid = ((no_overflow && exit_must_be_taken)
665 || mpz_sgn (bnds->below) >= 0);
667 if (integer_onep (s)
668 || (TREE_CODE (c) == INTEGER_CST
669 && TREE_CODE (s) == INTEGER_CST
670 && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0)
671 || (TYPE_OVERFLOW_UNDEFINED (type)
672 && multiple_of_p (type, c, s)))
674 /* If C is an exact multiple of S, then its value will be reached before
675 the induction variable overflows (unless the loop is exited in some
676 other way before). Note that the actual induction variable in the
677 loop (which ranges from base to final instead of from 0 to C) may
678 overflow, in which case BNDS.up will not be giving a correct upper
679 bound on C; thus, BNDS_U_VALID had to be computed in advance. */
680 no_overflow = true;
681 exit_must_be_taken = true;
684 /* If the induction variable can overflow, the number of iterations is at
685 most the period of the control variable (or infinite, but in that case
686 the whole # of iterations analysis will fail). */
687 if (!no_overflow)
689 max = wi::mask <widest_int> (TYPE_PRECISION (type) - wi::ctz (s), false);
690 wi::to_mpz (max, bnd, UNSIGNED);
691 return;
694 /* Now we know that the induction variable does not overflow, so the loop
695 iterates at most (range of type / S) times. */
696 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED);
698 /* If the induction variable is guaranteed to reach the value of C before
699 overflow, ... */
700 if (exit_must_be_taken)
702 /* ... then we can strengthen this to C / S, and possibly we can use
703 the upper bound on C given by BNDS. */
704 if (TREE_CODE (c) == INTEGER_CST)
705 wi::to_mpz (c, bnd, UNSIGNED);
706 else if (bnds_u_valid)
707 mpz_set (bnd, bnds->up);
710 mpz_init (d);
711 wi::to_mpz (s, d, UNSIGNED);
712 mpz_fdiv_q (bnd, bnd, d);
713 mpz_clear (d);
716 /* Determines number of iterations of loop whose ending condition
717 is IV <> FINAL. TYPE is the type of the iv. The number of
718 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
719 we know that the exit must be taken eventually, i.e., that the IV
720 ever reaches the value FINAL (we derived this earlier, and possibly set
721 NITER->assumptions to make sure this is the case). BNDS contains the
722 bounds on the difference FINAL - IV->base. */
724 static bool
725 number_of_iterations_ne (tree type, affine_iv *iv, tree final,
726 struct tree_niter_desc *niter, bool exit_must_be_taken,
727 bounds *bnds)
729 tree niter_type = unsigned_type_for (type);
730 tree s, c, d, bits, assumption, tmp, bound;
731 mpz_t max;
733 niter->control = *iv;
734 niter->bound = final;
735 niter->cmp = NE_EXPR;
737 /* Rearrange the terms so that we get inequality S * i <> C, with S
738 positive. Also cast everything to the unsigned type. If IV does
739 not overflow, BNDS bounds the value of C. Also, this is the
740 case if the computation |FINAL - IV->base| does not overflow, i.e.,
741 if BNDS->below in the result is nonnegative. */
742 if (tree_int_cst_sign_bit (iv->step))
744 s = fold_convert (niter_type,
745 fold_build1 (NEGATE_EXPR, type, iv->step));
746 c = fold_build2 (MINUS_EXPR, niter_type,
747 fold_convert (niter_type, iv->base),
748 fold_convert (niter_type, final));
749 bounds_negate (bnds);
751 else
753 s = fold_convert (niter_type, iv->step);
754 c = fold_build2 (MINUS_EXPR, niter_type,
755 fold_convert (niter_type, final),
756 fold_convert (niter_type, iv->base));
759 mpz_init (max);
760 number_of_iterations_ne_max (max, iv->no_overflow, c, s, bnds,
761 exit_must_be_taken);
762 niter->max = widest_int::from (wi::from_mpz (niter_type, max, false),
763 TYPE_SIGN (niter_type));
764 mpz_clear (max);
766 /* First the trivial cases -- when the step is 1. */
767 if (integer_onep (s))
769 niter->niter = c;
770 return true;
773 /* Let nsd (step, size of mode) = d. If d does not divide c, the loop
774 is infinite. Otherwise, the number of iterations is
775 (inverse(s/d) * (c/d)) mod (size of mode/d). */
776 bits = num_ending_zeros (s);
777 bound = build_low_bits_mask (niter_type,
778 (TYPE_PRECISION (niter_type)
779 - tree_to_uhwi (bits)));
781 d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
782 build_int_cst (niter_type, 1), bits);
783 s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits);
785 if (!exit_must_be_taken)
787 /* If we cannot assume that the exit is taken eventually, record the
788 assumptions for divisibility of c. */
789 assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d);
790 assumption = fold_build2 (EQ_EXPR, boolean_type_node,
791 assumption, build_int_cst (niter_type, 0));
792 if (!integer_nonzerop (assumption))
793 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
794 niter->assumptions, assumption);
797 c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d);
798 tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound));
799 niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
800 return true;
803 /* Checks whether we can determine the final value of the control variable
804 of the loop with ending condition IV0 < IV1 (computed in TYPE).
805 DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
806 of the step. The assumptions necessary to ensure that the computation
807 of the final value does not overflow are recorded in NITER. If we
808 find the final value, we adjust DELTA and return TRUE. Otherwise
809 we return false. BNDS bounds the value of IV1->base - IV0->base,
810 and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is
811 true if we know that the exit must be taken eventually. */
813 static bool
814 number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
815 struct tree_niter_desc *niter,
816 tree *delta, tree step,
817 bool exit_must_be_taken, bounds *bnds)
819 tree niter_type = TREE_TYPE (step);
820 tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
821 tree tmod;
822 mpz_t mmod;
823 tree assumption = boolean_true_node, bound, noloop;
824 bool ret = false, fv_comp_no_overflow;
825 tree type1 = type;
826 if (POINTER_TYPE_P (type))
827 type1 = sizetype;
829 if (TREE_CODE (mod) != INTEGER_CST)
830 return false;
831 if (integer_nonzerop (mod))
832 mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
833 tmod = fold_convert (type1, mod);
835 mpz_init (mmod);
836 wi::to_mpz (mod, mmod, UNSIGNED);
837 mpz_neg (mmod, mmod);
839 /* If the induction variable does not overflow and the exit is taken,
840 then the computation of the final value does not overflow. This is
841 also obviously the case if the new final value is equal to the
842 current one. Finally, we postulate this for pointer type variables,
843 as the code cannot rely on the object to that the pointer points being
844 placed at the end of the address space (and more pragmatically,
845 TYPE_{MIN,MAX}_VALUE is not defined for pointers). */
846 if (integer_zerop (mod) || POINTER_TYPE_P (type))
847 fv_comp_no_overflow = true;
848 else if (!exit_must_be_taken)
849 fv_comp_no_overflow = false;
850 else
851 fv_comp_no_overflow =
852 (iv0->no_overflow && integer_nonzerop (iv0->step))
853 || (iv1->no_overflow && integer_nonzerop (iv1->step));
855 if (integer_nonzerop (iv0->step))
857 /* The final value of the iv is iv1->base + MOD, assuming that this
858 computation does not overflow, and that
859 iv0->base <= iv1->base + MOD. */
860 if (!fv_comp_no_overflow)
862 bound = fold_build2 (MINUS_EXPR, type1,
863 TYPE_MAX_VALUE (type1), tmod);
864 assumption = fold_build2 (LE_EXPR, boolean_type_node,
865 iv1->base, bound);
866 if (integer_zerop (assumption))
867 goto end;
869 if (mpz_cmp (mmod, bnds->below) < 0)
870 noloop = boolean_false_node;
871 else if (POINTER_TYPE_P (type))
872 noloop = fold_build2 (GT_EXPR, boolean_type_node,
873 iv0->base,
874 fold_build_pointer_plus (iv1->base, tmod));
875 else
876 noloop = fold_build2 (GT_EXPR, boolean_type_node,
877 iv0->base,
878 fold_build2 (PLUS_EXPR, type1,
879 iv1->base, tmod));
881 else
883 /* The final value of the iv is iv0->base - MOD, assuming that this
884 computation does not overflow, and that
885 iv0->base - MOD <= iv1->base. */
886 if (!fv_comp_no_overflow)
888 bound = fold_build2 (PLUS_EXPR, type1,
889 TYPE_MIN_VALUE (type1), tmod);
890 assumption = fold_build2 (GE_EXPR, boolean_type_node,
891 iv0->base, bound);
892 if (integer_zerop (assumption))
893 goto end;
895 if (mpz_cmp (mmod, bnds->below) < 0)
896 noloop = boolean_false_node;
897 else if (POINTER_TYPE_P (type))
898 noloop = fold_build2 (GT_EXPR, boolean_type_node,
899 fold_build_pointer_plus (iv0->base,
900 fold_build1 (NEGATE_EXPR,
901 type1, tmod)),
902 iv1->base);
903 else
904 noloop = fold_build2 (GT_EXPR, boolean_type_node,
905 fold_build2 (MINUS_EXPR, type1,
906 iv0->base, tmod),
907 iv1->base);
910 if (!integer_nonzerop (assumption))
911 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
912 niter->assumptions,
913 assumption);
914 if (!integer_zerop (noloop))
915 niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
916 niter->may_be_zero,
917 noloop);
918 bounds_add (bnds, wi::to_widest (mod), type);
919 *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
921 ret = true;
922 end:
923 mpz_clear (mmod);
924 return ret;
927 /* Add assertions to NITER that ensure that the control variable of the loop
928 with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1
929 are TYPE. Returns false if we can prove that there is an overflow, true
930 otherwise. STEP is the absolute value of the step. */
932 static bool
933 assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1,
934 struct tree_niter_desc *niter, tree step)
936 tree bound, d, assumption, diff;
937 tree niter_type = TREE_TYPE (step);
939 if (integer_nonzerop (iv0->step))
941 /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
942 if (iv0->no_overflow)
943 return true;
945 /* If iv0->base is a constant, we can determine the last value before
946 overflow precisely; otherwise we conservatively assume
947 MAX - STEP + 1. */
949 if (TREE_CODE (iv0->base) == INTEGER_CST)
951 d = fold_build2 (MINUS_EXPR, niter_type,
952 fold_convert (niter_type, TYPE_MAX_VALUE (type)),
953 fold_convert (niter_type, iv0->base));
954 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
956 else
957 diff = fold_build2 (MINUS_EXPR, niter_type, step,
958 build_int_cst (niter_type, 1));
959 bound = fold_build2 (MINUS_EXPR, type,
960 TYPE_MAX_VALUE (type), fold_convert (type, diff));
961 assumption = fold_build2 (LE_EXPR, boolean_type_node,
962 iv1->base, bound);
964 else
966 /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
967 if (iv1->no_overflow)
968 return true;
970 if (TREE_CODE (iv1->base) == INTEGER_CST)
972 d = fold_build2 (MINUS_EXPR, niter_type,
973 fold_convert (niter_type, iv1->base),
974 fold_convert (niter_type, TYPE_MIN_VALUE (type)));
975 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
977 else
978 diff = fold_build2 (MINUS_EXPR, niter_type, step,
979 build_int_cst (niter_type, 1));
980 bound = fold_build2 (PLUS_EXPR, type,
981 TYPE_MIN_VALUE (type), fold_convert (type, diff));
982 assumption = fold_build2 (GE_EXPR, boolean_type_node,
983 iv0->base, bound);
986 if (integer_zerop (assumption))
987 return false;
988 if (!integer_nonzerop (assumption))
989 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
990 niter->assumptions, assumption);
992 iv0->no_overflow = true;
993 iv1->no_overflow = true;
994 return true;
997 /* Add an assumption to NITER that a loop whose ending condition
998 is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS
999 bounds the value of IV1->base - IV0->base. */
1001 static void
1002 assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1003 struct tree_niter_desc *niter, bounds *bnds)
1005 tree assumption = boolean_true_node, bound, diff;
1006 tree mbz, mbzl, mbzr, type1;
1007 bool rolls_p, no_overflow_p;
1008 widest_int dstep;
1009 mpz_t mstep, max;
1011 /* We are going to compute the number of iterations as
1012 (iv1->base - iv0->base + step - 1) / step, computed in the unsigned
1013 variant of TYPE. This formula only works if
1015 -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1
1017 (where MAX is the maximum value of the unsigned variant of TYPE, and
1018 the computations in this formula are performed in full precision,
1019 i.e., without overflows).
1021 Usually, for loops with exit condition iv0->base + step * i < iv1->base,
1022 we have a condition of the form iv0->base - step < iv1->base before the loop,
1023 and for loops iv0->base < iv1->base - step * i the condition
1024 iv0->base < iv1->base + step, due to loop header copying, which enable us
1025 to prove the lower bound.
1027 The upper bound is more complicated. Unless the expressions for initial
1028 and final value themselves contain enough information, we usually cannot
1029 derive it from the context. */
1031 /* First check whether the answer does not follow from the bounds we gathered
1032 before. */
1033 if (integer_nonzerop (iv0->step))
1034 dstep = wi::to_widest (iv0->step);
1035 else
1037 dstep = wi::sext (wi::to_widest (iv1->step), TYPE_PRECISION (type));
1038 dstep = -dstep;
1041 mpz_init (mstep);
1042 wi::to_mpz (dstep, mstep, UNSIGNED);
1043 mpz_neg (mstep, mstep);
1044 mpz_add_ui (mstep, mstep, 1);
1046 rolls_p = mpz_cmp (mstep, bnds->below) <= 0;
1048 mpz_init (max);
1049 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
1050 mpz_add (max, max, mstep);
1051 no_overflow_p = (mpz_cmp (bnds->up, max) <= 0
1052 /* For pointers, only values lying inside a single object
1053 can be compared or manipulated by pointer arithmetics.
1054 Gcc in general does not allow or handle objects larger
1055 than half of the address space, hence the upper bound
1056 is satisfied for pointers. */
1057 || POINTER_TYPE_P (type));
1058 mpz_clear (mstep);
1059 mpz_clear (max);
1061 if (rolls_p && no_overflow_p)
1062 return;
1064 type1 = type;
1065 if (POINTER_TYPE_P (type))
1066 type1 = sizetype;
1068 /* Now the hard part; we must formulate the assumption(s) as expressions, and
1069 we must be careful not to introduce overflow. */
1071 if (integer_nonzerop (iv0->step))
1073 diff = fold_build2 (MINUS_EXPR, type1,
1074 iv0->step, build_int_cst (type1, 1));
1076 /* We need to know that iv0->base >= MIN + iv0->step - 1. Since
1077 0 address never belongs to any object, we can assume this for
1078 pointers. */
1079 if (!POINTER_TYPE_P (type))
1081 bound = fold_build2 (PLUS_EXPR, type1,
1082 TYPE_MIN_VALUE (type), diff);
1083 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1084 iv0->base, bound);
1087 /* And then we can compute iv0->base - diff, and compare it with
1088 iv1->base. */
1089 mbzl = fold_build2 (MINUS_EXPR, type1,
1090 fold_convert (type1, iv0->base), diff);
1091 mbzr = fold_convert (type1, iv1->base);
1093 else
1095 diff = fold_build2 (PLUS_EXPR, type1,
1096 iv1->step, build_int_cst (type1, 1));
1098 if (!POINTER_TYPE_P (type))
1100 bound = fold_build2 (PLUS_EXPR, type1,
1101 TYPE_MAX_VALUE (type), diff);
1102 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1103 iv1->base, bound);
1106 mbzl = fold_convert (type1, iv0->base);
1107 mbzr = fold_build2 (MINUS_EXPR, type1,
1108 fold_convert (type1, iv1->base), diff);
1111 if (!integer_nonzerop (assumption))
1112 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1113 niter->assumptions, assumption);
1114 if (!rolls_p)
1116 mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr);
1117 niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1118 niter->may_be_zero, mbz);
1122 /* Determines number of iterations of loop whose ending condition
1123 is IV0 < IV1. TYPE is the type of the iv. The number of
1124 iterations is stored to NITER. BNDS bounds the difference
1125 IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know
1126 that the exit must be taken eventually. */
1128 static bool
1129 number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1130 struct tree_niter_desc *niter,
1131 bool exit_must_be_taken, bounds *bnds)
1133 tree niter_type = unsigned_type_for (type);
1134 tree delta, step, s;
1135 mpz_t mstep, tmp;
1137 if (integer_nonzerop (iv0->step))
1139 niter->control = *iv0;
1140 niter->cmp = LT_EXPR;
1141 niter->bound = iv1->base;
1143 else
1145 niter->control = *iv1;
1146 niter->cmp = GT_EXPR;
1147 niter->bound = iv0->base;
1150 delta = fold_build2 (MINUS_EXPR, niter_type,
1151 fold_convert (niter_type, iv1->base),
1152 fold_convert (niter_type, iv0->base));
1154 /* First handle the special case that the step is +-1. */
1155 if ((integer_onep (iv0->step) && integer_zerop (iv1->step))
1156 || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step)))
1158 /* for (i = iv0->base; i < iv1->base; i++)
1162 for (i = iv1->base; i > iv0->base; i--).
1164 In both cases # of iterations is iv1->base - iv0->base, assuming that
1165 iv1->base >= iv0->base.
1167 First try to derive a lower bound on the value of
1168 iv1->base - iv0->base, computed in full precision. If the difference
1169 is nonnegative, we are done, otherwise we must record the
1170 condition. */
1172 if (mpz_sgn (bnds->below) < 0)
1173 niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node,
1174 iv1->base, iv0->base);
1175 niter->niter = delta;
1176 niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
1177 TYPE_SIGN (niter_type));
1178 niter->control.no_overflow = true;
1179 return true;
1182 if (integer_nonzerop (iv0->step))
1183 step = fold_convert (niter_type, iv0->step);
1184 else
1185 step = fold_convert (niter_type,
1186 fold_build1 (NEGATE_EXPR, type, iv1->step));
1188 /* If we can determine the final value of the control iv exactly, we can
1189 transform the condition to != comparison. In particular, this will be
1190 the case if DELTA is constant. */
1191 if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, &delta, step,
1192 exit_must_be_taken, bnds))
1194 affine_iv zps;
1196 zps.base = build_int_cst (niter_type, 0);
1197 zps.step = step;
1198 /* number_of_iterations_lt_to_ne will add assumptions that ensure that
1199 zps does not overflow. */
1200 zps.no_overflow = true;
1202 return number_of_iterations_ne (type, &zps, delta, niter, true, bnds);
1205 /* Make sure that the control iv does not overflow. */
1206 if (!assert_no_overflow_lt (type, iv0, iv1, niter, step))
1207 return false;
1209 /* We determine the number of iterations as (delta + step - 1) / step. For
1210 this to work, we must know that iv1->base >= iv0->base - step + 1,
1211 otherwise the loop does not roll. */
1212 assert_loop_rolls_lt (type, iv0, iv1, niter, bnds);
1214 s = fold_build2 (MINUS_EXPR, niter_type,
1215 step, build_int_cst (niter_type, 1));
1216 delta = fold_build2 (PLUS_EXPR, niter_type, delta, s);
1217 niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step);
1219 mpz_init (mstep);
1220 mpz_init (tmp);
1221 wi::to_mpz (step, mstep, UNSIGNED);
1222 mpz_add (tmp, bnds->up, mstep);
1223 mpz_sub_ui (tmp, tmp, 1);
1224 mpz_fdiv_q (tmp, tmp, mstep);
1225 niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
1226 TYPE_SIGN (niter_type));
1227 mpz_clear (mstep);
1228 mpz_clear (tmp);
1230 return true;
1233 /* Determines number of iterations of loop whose ending condition
1234 is IV0 <= IV1. TYPE is the type of the iv. The number of
1235 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
1236 we know that this condition must eventually become false (we derived this
1237 earlier, and possibly set NITER->assumptions to make sure this
1238 is the case). BNDS bounds the difference IV1->base - IV0->base. */
1240 static bool
1241 number_of_iterations_le (tree type, affine_iv *iv0, affine_iv *iv1,
1242 struct tree_niter_desc *niter, bool exit_must_be_taken,
1243 bounds *bnds)
1245 tree assumption;
1246 tree type1 = type;
1247 if (POINTER_TYPE_P (type))
1248 type1 = sizetype;
1250 /* Say that IV0 is the control variable. Then IV0 <= IV1 iff
1251 IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
1252 value of the type. This we must know anyway, since if it is
1253 equal to this value, the loop rolls forever. We do not check
1254 this condition for pointer type ivs, as the code cannot rely on
1255 the object to that the pointer points being placed at the end of
1256 the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is
1257 not defined for pointers). */
1259 if (!exit_must_be_taken && !POINTER_TYPE_P (type))
1261 if (integer_nonzerop (iv0->step))
1262 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1263 iv1->base, TYPE_MAX_VALUE (type));
1264 else
1265 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1266 iv0->base, TYPE_MIN_VALUE (type));
1268 if (integer_zerop (assumption))
1269 return false;
1270 if (!integer_nonzerop (assumption))
1271 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1272 niter->assumptions, assumption);
1275 if (integer_nonzerop (iv0->step))
1277 if (POINTER_TYPE_P (type))
1278 iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1);
1279 else
1280 iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base,
1281 build_int_cst (type1, 1));
1283 else if (POINTER_TYPE_P (type))
1284 iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1);
1285 else
1286 iv0->base = fold_build2 (MINUS_EXPR, type1,
1287 iv0->base, build_int_cst (type1, 1));
1289 bounds_add (bnds, 1, type1);
1291 return number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken,
1292 bnds);
1295 /* Dumps description of affine induction variable IV to FILE. */
1297 static void
1298 dump_affine_iv (FILE *file, affine_iv *iv)
1300 if (!integer_zerop (iv->step))
1301 fprintf (file, "[");
1303 print_generic_expr (dump_file, iv->base, TDF_SLIM);
1305 if (!integer_zerop (iv->step))
1307 fprintf (file, ", + , ");
1308 print_generic_expr (dump_file, iv->step, TDF_SLIM);
1309 fprintf (file, "]%s", iv->no_overflow ? "(no_overflow)" : "");
1313 /* Determine the number of iterations according to condition (for staying
1314 inside loop) which compares two induction variables using comparison
1315 operator CODE. The induction variable on left side of the comparison
1316 is IV0, the right-hand side is IV1. Both induction variables must have
1317 type TYPE, which must be an integer or pointer type. The steps of the
1318 ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
1320 LOOP is the loop whose number of iterations we are determining.
1322 ONLY_EXIT is true if we are sure this is the only way the loop could be
1323 exited (including possibly non-returning function calls, exceptions, etc.)
1324 -- in this case we can use the information whether the control induction
1325 variables can overflow or not in a more efficient way.
1327 if EVERY_ITERATION is true, we know the test is executed on every iteration.
1329 The results (number of iterations and assumptions as described in
1330 comments at struct tree_niter_desc in tree-ssa-loop.h) are stored to NITER.
1331 Returns false if it fails to determine number of iterations, true if it
1332 was determined (possibly with some assumptions). */
1334 static bool
1335 number_of_iterations_cond (struct loop *loop,
1336 tree type, affine_iv *iv0, enum tree_code code,
1337 affine_iv *iv1, struct tree_niter_desc *niter,
1338 bool only_exit, bool every_iteration)
1340 bool exit_must_be_taken = false, ret;
1341 bounds bnds;
1343 /* If the test is not executed every iteration, wrapping may make the test
1344 to pass again.
1345 TODO: the overflow case can be still used as unreliable estimate of upper
1346 bound. But we have no API to pass it down to number of iterations code
1347 and, at present, it will not use it anyway. */
1348 if (!every_iteration
1349 && (!iv0->no_overflow || !iv1->no_overflow
1350 || code == NE_EXPR || code == EQ_EXPR))
1351 return false;
1353 /* The meaning of these assumptions is this:
1354 if !assumptions
1355 then the rest of information does not have to be valid
1356 if may_be_zero then the loop does not roll, even if
1357 niter != 0. */
1358 niter->assumptions = boolean_true_node;
1359 niter->may_be_zero = boolean_false_node;
1360 niter->niter = NULL_TREE;
1361 niter->max = 0;
1362 niter->bound = NULL_TREE;
1363 niter->cmp = ERROR_MARK;
1365 /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
1366 the control variable is on lhs. */
1367 if (code == GE_EXPR || code == GT_EXPR
1368 || (code == NE_EXPR && integer_zerop (iv0->step)))
1370 SWAP (iv0, iv1);
1371 code = swap_tree_comparison (code);
1374 if (POINTER_TYPE_P (type))
1376 /* Comparison of pointers is undefined unless both iv0 and iv1 point
1377 to the same object. If they do, the control variable cannot wrap
1378 (as wrap around the bounds of memory will never return a pointer
1379 that would be guaranteed to point to the same object, even if we
1380 avoid undefined behavior by casting to size_t and back). */
1381 iv0->no_overflow = true;
1382 iv1->no_overflow = true;
1385 /* If the control induction variable does not overflow and the only exit
1386 from the loop is the one that we analyze, we know it must be taken
1387 eventually. */
1388 if (only_exit)
1390 if (!integer_zerop (iv0->step) && iv0->no_overflow)
1391 exit_must_be_taken = true;
1392 else if (!integer_zerop (iv1->step) && iv1->no_overflow)
1393 exit_must_be_taken = true;
1396 /* We can handle the case when neither of the sides of the comparison is
1397 invariant, provided that the test is NE_EXPR. This rarely occurs in
1398 practice, but it is simple enough to manage. */
1399 if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1401 tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
1402 if (code != NE_EXPR)
1403 return false;
1405 iv0->step = fold_binary_to_constant (MINUS_EXPR, step_type,
1406 iv0->step, iv1->step);
1407 iv0->no_overflow = false;
1408 iv1->step = build_int_cst (step_type, 0);
1409 iv1->no_overflow = true;
1412 /* If the result of the comparison is a constant, the loop is weird. More
1413 precise handling would be possible, but the situation is not common enough
1414 to waste time on it. */
1415 if (integer_zerop (iv0->step) && integer_zerop (iv1->step))
1416 return false;
1418 /* Ignore loops of while (i-- < 10) type. */
1419 if (code != NE_EXPR)
1421 if (iv0->step && tree_int_cst_sign_bit (iv0->step))
1422 return false;
1424 if (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))
1425 return false;
1428 /* If the loop exits immediately, there is nothing to do. */
1429 tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base);
1430 if (tem && integer_zerop (tem))
1432 niter->niter = build_int_cst (unsigned_type_for (type), 0);
1433 niter->max = 0;
1434 return true;
1437 /* OK, now we know we have a senseful loop. Handle several cases, depending
1438 on what comparison operator is used. */
1439 bound_difference (loop, iv1->base, iv0->base, &bnds);
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1443 fprintf (dump_file,
1444 "Analyzing # of iterations of loop %d\n", loop->num);
1446 fprintf (dump_file, " exit condition ");
1447 dump_affine_iv (dump_file, iv0);
1448 fprintf (dump_file, " %s ",
1449 code == NE_EXPR ? "!="
1450 : code == LT_EXPR ? "<"
1451 : "<=");
1452 dump_affine_iv (dump_file, iv1);
1453 fprintf (dump_file, "\n");
1455 fprintf (dump_file, " bounds on difference of bases: ");
1456 mpz_out_str (dump_file, 10, bnds.below);
1457 fprintf (dump_file, " ... ");
1458 mpz_out_str (dump_file, 10, bnds.up);
1459 fprintf (dump_file, "\n");
1462 switch (code)
1464 case NE_EXPR:
1465 gcc_assert (integer_zerop (iv1->step));
1466 ret = number_of_iterations_ne (type, iv0, iv1->base, niter,
1467 exit_must_be_taken, &bnds);
1468 break;
1470 case LT_EXPR:
1471 ret = number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken,
1472 &bnds);
1473 break;
1475 case LE_EXPR:
1476 ret = number_of_iterations_le (type, iv0, iv1, niter, exit_must_be_taken,
1477 &bnds);
1478 break;
1480 default:
1481 gcc_unreachable ();
1484 mpz_clear (bnds.up);
1485 mpz_clear (bnds.below);
1487 if (dump_file && (dump_flags & TDF_DETAILS))
1489 if (ret)
1491 fprintf (dump_file, " result:\n");
1492 if (!integer_nonzerop (niter->assumptions))
1494 fprintf (dump_file, " under assumptions ");
1495 print_generic_expr (dump_file, niter->assumptions, TDF_SLIM);
1496 fprintf (dump_file, "\n");
1499 if (!integer_zerop (niter->may_be_zero))
1501 fprintf (dump_file, " zero if ");
1502 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503 fprintf (dump_file, "\n");
1506 fprintf (dump_file, " # of iterations ");
1507 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1508 fprintf (dump_file, ", bounded by ");
1509 print_decu (niter->max, dump_file);
1510 fprintf (dump_file, "\n");
1512 else
1513 fprintf (dump_file, " failed\n\n");
1515 return ret;
1518 /* Substitute NEW for OLD in EXPR and fold the result. */
1520 static tree
1521 simplify_replace_tree (tree expr, tree old, tree new_tree)
1523 unsigned i, n;
1524 tree ret = NULL_TREE, e, se;
1526 if (!expr)
1527 return NULL_TREE;
1529 /* Do not bother to replace constants. */
1530 if (CONSTANT_CLASS_P (old))
1531 return expr;
1533 if (expr == old
1534 || operand_equal_p (expr, old, 0))
1535 return unshare_expr (new_tree);
1537 if (!EXPR_P (expr))
1538 return expr;
1540 n = TREE_OPERAND_LENGTH (expr);
1541 for (i = 0; i < n; i++)
1543 e = TREE_OPERAND (expr, i);
1544 se = simplify_replace_tree (e, old, new_tree);
1545 if (e == se)
1546 continue;
1548 if (!ret)
1549 ret = copy_node (expr);
1551 TREE_OPERAND (ret, i) = se;
1554 return (ret ? fold (ret) : expr);
1557 /* Expand definitions of ssa names in EXPR as long as they are simple
1558 enough, and return the new expression. If STOP is specified, stop
1559 expanding if EXPR equals to it. */
1561 tree
1562 expand_simple_operations (tree expr, tree stop)
1564 unsigned i, n;
1565 tree ret = NULL_TREE, e, ee, e1;
1566 enum tree_code code;
1567 gimple stmt;
1569 if (expr == NULL_TREE)
1570 return expr;
1572 if (is_gimple_min_invariant (expr))
1573 return expr;
1575 code = TREE_CODE (expr);
1576 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1578 n = TREE_OPERAND_LENGTH (expr);
1579 for (i = 0; i < n; i++)
1581 e = TREE_OPERAND (expr, i);
1582 ee = expand_simple_operations (e, stop);
1583 if (e == ee)
1584 continue;
1586 if (!ret)
1587 ret = copy_node (expr);
1589 TREE_OPERAND (ret, i) = ee;
1592 if (!ret)
1593 return expr;
1595 fold_defer_overflow_warnings ();
1596 ret = fold (ret);
1597 fold_undefer_and_ignore_overflow_warnings ();
1598 return ret;
1601 /* Stop if it's not ssa name or the one we don't want to expand. */
1602 if (TREE_CODE (expr) != SSA_NAME || expr == stop)
1603 return expr;
1605 stmt = SSA_NAME_DEF_STMT (expr);
1606 if (gimple_code (stmt) == GIMPLE_PHI)
1608 basic_block src, dest;
1610 if (gimple_phi_num_args (stmt) != 1)
1611 return expr;
1612 e = PHI_ARG_DEF (stmt, 0);
1614 /* Avoid propagating through loop exit phi nodes, which
1615 could break loop-closed SSA form restrictions. */
1616 dest = gimple_bb (stmt);
1617 src = single_pred (dest);
1618 if (TREE_CODE (e) == SSA_NAME
1619 && src->loop_father != dest->loop_father)
1620 return expr;
1622 return expand_simple_operations (e, stop);
1624 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1625 return expr;
1627 /* Avoid expanding to expressions that contain SSA names that need
1628 to take part in abnormal coalescing. */
1629 ssa_op_iter iter;
1630 FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE)
1631 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e))
1632 return expr;
1634 e = gimple_assign_rhs1 (stmt);
1635 code = gimple_assign_rhs_code (stmt);
1636 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1638 if (is_gimple_min_invariant (e))
1639 return e;
1641 if (code == SSA_NAME)
1642 return expand_simple_operations (e, stop);
1644 return expr;
1647 switch (code)
1649 CASE_CONVERT:
1650 /* Casts are simple. */
1651 ee = expand_simple_operations (e, stop);
1652 return fold_build1 (code, TREE_TYPE (expr), ee);
1654 case PLUS_EXPR:
1655 case MINUS_EXPR:
1656 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr))
1657 && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr)))
1658 return expr;
1659 /* Fallthru. */
1660 case POINTER_PLUS_EXPR:
1661 /* And increments and decrements by a constant are simple. */
1662 e1 = gimple_assign_rhs2 (stmt);
1663 if (!is_gimple_min_invariant (e1))
1664 return expr;
1666 ee = expand_simple_operations (e, stop);
1667 return fold_build2 (code, TREE_TYPE (expr), ee, e1);
1669 default:
1670 return expr;
1674 /* Tries to simplify EXPR using the condition COND. Returns the simplified
1675 expression (or EXPR unchanged, if no simplification was possible). */
1677 static tree
1678 tree_simplify_using_condition_1 (tree cond, tree expr)
1680 bool changed;
1681 tree e, te, e0, e1, e2, notcond;
1682 enum tree_code code = TREE_CODE (expr);
1684 if (code == INTEGER_CST)
1685 return expr;
1687 if (code == TRUTH_OR_EXPR
1688 || code == TRUTH_AND_EXPR
1689 || code == COND_EXPR)
1691 changed = false;
1693 e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0));
1694 if (TREE_OPERAND (expr, 0) != e0)
1695 changed = true;
1697 e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1));
1698 if (TREE_OPERAND (expr, 1) != e1)
1699 changed = true;
1701 if (code == COND_EXPR)
1703 e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2));
1704 if (TREE_OPERAND (expr, 2) != e2)
1705 changed = true;
1707 else
1708 e2 = NULL_TREE;
1710 if (changed)
1712 if (code == COND_EXPR)
1713 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
1714 else
1715 expr = fold_build2 (code, boolean_type_node, e0, e1);
1718 return expr;
1721 /* In case COND is equality, we may be able to simplify EXPR by copy/constant
1722 propagation, and vice versa. Fold does not handle this, since it is
1723 considered too expensive. */
1724 if (TREE_CODE (cond) == EQ_EXPR)
1726 e0 = TREE_OPERAND (cond, 0);
1727 e1 = TREE_OPERAND (cond, 1);
1729 /* We know that e0 == e1. Check whether we cannot simplify expr
1730 using this fact. */
1731 e = simplify_replace_tree (expr, e0, e1);
1732 if (integer_zerop (e) || integer_nonzerop (e))
1733 return e;
1735 e = simplify_replace_tree (expr, e1, e0);
1736 if (integer_zerop (e) || integer_nonzerop (e))
1737 return e;
1739 if (TREE_CODE (expr) == EQ_EXPR)
1741 e0 = TREE_OPERAND (expr, 0);
1742 e1 = TREE_OPERAND (expr, 1);
1744 /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */
1745 e = simplify_replace_tree (cond, e0, e1);
1746 if (integer_zerop (e))
1747 return e;
1748 e = simplify_replace_tree (cond, e1, e0);
1749 if (integer_zerop (e))
1750 return e;
1752 if (TREE_CODE (expr) == NE_EXPR)
1754 e0 = TREE_OPERAND (expr, 0);
1755 e1 = TREE_OPERAND (expr, 1);
1757 /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */
1758 e = simplify_replace_tree (cond, e0, e1);
1759 if (integer_zerop (e))
1760 return boolean_true_node;
1761 e = simplify_replace_tree (cond, e1, e0);
1762 if (integer_zerop (e))
1763 return boolean_true_node;
1766 te = expand_simple_operations (expr);
1768 /* Check whether COND ==> EXPR. */
1769 notcond = invert_truthvalue (cond);
1770 e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, te);
1771 if (e && integer_nonzerop (e))
1772 return e;
1774 /* Check whether COND ==> not EXPR. */
1775 e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, te);
1776 if (e && integer_zerop (e))
1777 return e;
1779 return expr;
1782 /* Tries to simplify EXPR using the condition COND. Returns the simplified
1783 expression (or EXPR unchanged, if no simplification was possible).
1784 Wrapper around tree_simplify_using_condition_1 that ensures that chains
1785 of simple operations in definitions of ssa names in COND are expanded,
1786 so that things like casts or incrementing the value of the bound before
1787 the loop do not cause us to fail. */
1789 static tree
1790 tree_simplify_using_condition (tree cond, tree expr)
1792 cond = expand_simple_operations (cond);
1794 return tree_simplify_using_condition_1 (cond, expr);
1797 /* Tries to simplify EXPR using the conditions on entry to LOOP.
1798 Returns the simplified expression (or EXPR unchanged, if no
1799 simplification was possible).*/
1801 static tree
1802 simplify_using_initial_conditions (struct loop *loop, tree expr)
1804 edge e;
1805 basic_block bb;
1806 gimple stmt;
1807 tree cond;
1808 int cnt = 0;
1810 if (TREE_CODE (expr) == INTEGER_CST)
1811 return expr;
1813 /* Limit walking the dominators to avoid quadraticness in
1814 the number of BBs times the number of loops in degenerate
1815 cases. */
1816 for (bb = loop->header;
1817 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
1818 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1820 if (!single_pred_p (bb))
1821 continue;
1822 e = single_pred_edge (bb);
1824 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
1825 continue;
1827 stmt = last_stmt (e->src);
1828 cond = fold_build2 (gimple_cond_code (stmt),
1829 boolean_type_node,
1830 gimple_cond_lhs (stmt),
1831 gimple_cond_rhs (stmt));
1832 if (e->flags & EDGE_FALSE_VALUE)
1833 cond = invert_truthvalue (cond);
1834 expr = tree_simplify_using_condition (cond, expr);
1835 ++cnt;
1838 return expr;
1841 /* Tries to simplify EXPR using the evolutions of the loop invariants
1842 in the superloops of LOOP. Returns the simplified expression
1843 (or EXPR unchanged, if no simplification was possible). */
1845 static tree
1846 simplify_using_outer_evolutions (struct loop *loop, tree expr)
1848 enum tree_code code = TREE_CODE (expr);
1849 bool changed;
1850 tree e, e0, e1, e2;
1852 if (is_gimple_min_invariant (expr))
1853 return expr;
1855 if (code == TRUTH_OR_EXPR
1856 || code == TRUTH_AND_EXPR
1857 || code == COND_EXPR)
1859 changed = false;
1861 e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0));
1862 if (TREE_OPERAND (expr, 0) != e0)
1863 changed = true;
1865 e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1));
1866 if (TREE_OPERAND (expr, 1) != e1)
1867 changed = true;
1869 if (code == COND_EXPR)
1871 e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2));
1872 if (TREE_OPERAND (expr, 2) != e2)
1873 changed = true;
1875 else
1876 e2 = NULL_TREE;
1878 if (changed)
1880 if (code == COND_EXPR)
1881 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
1882 else
1883 expr = fold_build2 (code, boolean_type_node, e0, e1);
1886 return expr;
1889 e = instantiate_parameters (loop, expr);
1890 if (is_gimple_min_invariant (e))
1891 return e;
1893 return expr;
1896 /* Returns true if EXIT is the only possible exit from LOOP. */
1898 bool
1899 loop_only_exit_p (const struct loop *loop, const_edge exit)
1901 basic_block *body;
1902 gimple_stmt_iterator bsi;
1903 unsigned i;
1904 gimple call;
1906 if (exit != single_exit (loop))
1907 return false;
1909 body = get_loop_body (loop);
1910 for (i = 0; i < loop->num_nodes; i++)
1912 for (bsi = gsi_start_bb (body[i]); !gsi_end_p (bsi); gsi_next (&bsi))
1914 call = gsi_stmt (bsi);
1915 if (gimple_code (call) != GIMPLE_CALL)
1916 continue;
1918 if (gimple_has_side_effects (call))
1920 free (body);
1921 return false;
1926 free (body);
1927 return true;
1930 /* Stores description of number of iterations of LOOP derived from
1931 EXIT (an exit edge of the LOOP) in NITER. Returns true if some
1932 useful information could be derived (and fields of NITER has
1933 meaning described in comments at struct tree_niter_desc
1934 declaration), false otherwise. If WARN is true and
1935 -Wunsafe-loop-optimizations was given, warn if the optimizer is going to use
1936 potentially unsafe assumptions.
1937 When EVERY_ITERATION is true, only tests that are known to be executed
1938 every iteration are considered (i.e. only test that alone bounds the loop).
1941 bool
1942 number_of_iterations_exit (struct loop *loop, edge exit,
1943 struct tree_niter_desc *niter,
1944 bool warn, bool every_iteration)
1946 gimple last;
1947 gcond *stmt;
1948 tree type;
1949 tree op0, op1;
1950 enum tree_code code;
1951 affine_iv iv0, iv1;
1952 bool safe;
1954 safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
1956 if (every_iteration && !safe)
1957 return false;
1959 niter->assumptions = boolean_false_node;
1960 niter->control.base = NULL_TREE;
1961 niter->control.step = NULL_TREE;
1962 niter->control.no_overflow = false;
1963 last = last_stmt (exit->src);
1964 if (!last)
1965 return false;
1966 stmt = dyn_cast <gcond *> (last);
1967 if (!stmt)
1968 return false;
1970 /* We want the condition for staying inside loop. */
1971 code = gimple_cond_code (stmt);
1972 if (exit->flags & EDGE_TRUE_VALUE)
1973 code = invert_tree_comparison (code, false);
1975 switch (code)
1977 case GT_EXPR:
1978 case GE_EXPR:
1979 case LT_EXPR:
1980 case LE_EXPR:
1981 case NE_EXPR:
1982 break;
1984 default:
1985 return false;
1988 op0 = gimple_cond_lhs (stmt);
1989 op1 = gimple_cond_rhs (stmt);
1990 type = TREE_TYPE (op0);
1992 if (TREE_CODE (type) != INTEGER_TYPE
1993 && !POINTER_TYPE_P (type))
1994 return false;
1996 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, false))
1997 return false;
1998 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, false))
1999 return false;
2001 /* We don't want to see undefined signed overflow warnings while
2002 computing the number of iterations. */
2003 fold_defer_overflow_warnings ();
2005 iv0.base = expand_simple_operations (iv0.base);
2006 iv1.base = expand_simple_operations (iv1.base);
2007 if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
2008 loop_only_exit_p (loop, exit), safe))
2010 fold_undefer_and_ignore_overflow_warnings ();
2011 return false;
2014 if (optimize >= 3)
2016 niter->assumptions = simplify_using_outer_evolutions (loop,
2017 niter->assumptions);
2018 niter->may_be_zero = simplify_using_outer_evolutions (loop,
2019 niter->may_be_zero);
2020 niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
2023 niter->assumptions
2024 = simplify_using_initial_conditions (loop,
2025 niter->assumptions);
2026 niter->may_be_zero
2027 = simplify_using_initial_conditions (loop,
2028 niter->may_be_zero);
2030 fold_undefer_and_ignore_overflow_warnings ();
2032 /* If NITER has simplified into a constant, update MAX. */
2033 if (TREE_CODE (niter->niter) == INTEGER_CST)
2034 niter->max = wi::to_widest (niter->niter);
2036 if (integer_onep (niter->assumptions))
2037 return true;
2039 /* With -funsafe-loop-optimizations we assume that nothing bad can happen.
2040 But if we can prove that there is overflow or some other source of weird
2041 behavior, ignore the loop even with -funsafe-loop-optimizations. */
2042 if (integer_zerop (niter->assumptions) || !single_exit (loop))
2043 return false;
2045 if (flag_unsafe_loop_optimizations)
2046 niter->assumptions = boolean_true_node;
2048 if (warn)
2050 const char *wording;
2051 location_t loc = gimple_location (stmt);
2053 /* We can provide a more specific warning if one of the operator is
2054 constant and the other advances by +1 or -1. */
2055 if (!integer_zerop (iv1.step)
2056 ? (integer_zerop (iv0.step)
2057 && (integer_onep (iv1.step) || integer_all_onesp (iv1.step)))
2058 : (integer_onep (iv0.step) || integer_all_onesp (iv0.step)))
2059 wording =
2060 flag_unsafe_loop_optimizations
2061 ? N_("assuming that the loop is not infinite")
2062 : N_("cannot optimize possibly infinite loops");
2063 else
2064 wording =
2065 flag_unsafe_loop_optimizations
2066 ? N_("assuming that the loop counter does not overflow")
2067 : N_("cannot optimize loop, the loop counter may overflow");
2069 warning_at ((LOCATION_LINE (loc) > 0) ? loc : input_location,
2070 OPT_Wunsafe_loop_optimizations, "%s", gettext (wording));
2073 return flag_unsafe_loop_optimizations;
2076 /* Try to determine the number of iterations of LOOP. If we succeed,
2077 expression giving number of iterations is returned and *EXIT is
2078 set to the edge from that the information is obtained. Otherwise
2079 chrec_dont_know is returned. */
2081 tree
2082 find_loop_niter (struct loop *loop, edge *exit)
2084 unsigned i;
2085 vec<edge> exits = get_loop_exit_edges (loop);
2086 edge ex;
2087 tree niter = NULL_TREE, aniter;
2088 struct tree_niter_desc desc;
2090 *exit = NULL;
2091 FOR_EACH_VEC_ELT (exits, i, ex)
2093 if (!number_of_iterations_exit (loop, ex, &desc, false))
2094 continue;
2096 if (integer_nonzerop (desc.may_be_zero))
2098 /* We exit in the first iteration through this exit.
2099 We won't find anything better. */
2100 niter = build_int_cst (unsigned_type_node, 0);
2101 *exit = ex;
2102 break;
2105 if (!integer_zerop (desc.may_be_zero))
2106 continue;
2108 aniter = desc.niter;
2110 if (!niter)
2112 /* Nothing recorded yet. */
2113 niter = aniter;
2114 *exit = ex;
2115 continue;
2118 /* Prefer constants, the lower the better. */
2119 if (TREE_CODE (aniter) != INTEGER_CST)
2120 continue;
2122 if (TREE_CODE (niter) != INTEGER_CST)
2124 niter = aniter;
2125 *exit = ex;
2126 continue;
2129 if (tree_int_cst_lt (aniter, niter))
2131 niter = aniter;
2132 *exit = ex;
2133 continue;
2136 exits.release ();
2138 return niter ? niter : chrec_dont_know;
2141 /* Return true if loop is known to have bounded number of iterations. */
2143 bool
2144 finite_loop_p (struct loop *loop)
2146 widest_int nit;
2147 int flags;
2149 if (flag_unsafe_loop_optimizations)
2150 return true;
2151 flags = flags_from_decl_or_type (current_function_decl);
2152 if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE))
2154 if (dump_file && (dump_flags & TDF_DETAILS))
2155 fprintf (dump_file, "Found loop %i to be finite: it is within pure or const function.\n",
2156 loop->num);
2157 return true;
2160 if (loop->any_upper_bound
2161 || max_loop_iterations (loop, &nit))
2163 if (dump_file && (dump_flags & TDF_DETAILS))
2164 fprintf (dump_file, "Found loop %i to be finite: upper bound found.\n",
2165 loop->num);
2166 return true;
2168 return false;
2173 Analysis of a number of iterations of a loop by a brute-force evaluation.
2177 /* Bound on the number of iterations we try to evaluate. */
2179 #define MAX_ITERATIONS_TO_TRACK \
2180 ((unsigned) PARAM_VALUE (PARAM_MAX_ITERATIONS_TO_TRACK))
2182 /* Returns the loop phi node of LOOP such that ssa name X is derived from its
2183 result by a chain of operations such that all but exactly one of their
2184 operands are constants. */
2186 static gphi *
2187 chain_of_csts_start (struct loop *loop, tree x)
2189 gimple stmt = SSA_NAME_DEF_STMT (x);
2190 tree use;
2191 basic_block bb = gimple_bb (stmt);
2192 enum tree_code code;
2194 if (!bb
2195 || !flow_bb_inside_loop_p (loop, bb))
2196 return NULL;
2198 if (gimple_code (stmt) == GIMPLE_PHI)
2200 if (bb == loop->header)
2201 return as_a <gphi *> (stmt);
2203 return NULL;
2206 if (gimple_code (stmt) != GIMPLE_ASSIGN
2207 || gimple_assign_rhs_class (stmt) == GIMPLE_TERNARY_RHS)
2208 return NULL;
2210 code = gimple_assign_rhs_code (stmt);
2211 if (gimple_references_memory_p (stmt)
2212 || TREE_CODE_CLASS (code) == tcc_reference
2213 || (code == ADDR_EXPR
2214 && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
2215 return NULL;
2217 use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2218 if (use == NULL_TREE)
2219 return NULL;
2221 return chain_of_csts_start (loop, use);
2224 /* Determines whether the expression X is derived from a result of a phi node
2225 in header of LOOP such that
2227 * the derivation of X consists only from operations with constants
2228 * the initial value of the phi node is constant
2229 * the value of the phi node in the next iteration can be derived from the
2230 value in the current iteration by a chain of operations with constants.
2232 If such phi node exists, it is returned, otherwise NULL is returned. */
2234 static gphi *
2235 get_base_for (struct loop *loop, tree x)
2237 gphi *phi;
2238 tree init, next;
2240 if (is_gimple_min_invariant (x))
2241 return NULL;
2243 phi = chain_of_csts_start (loop, x);
2244 if (!phi)
2245 return NULL;
2247 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2248 next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2250 if (TREE_CODE (next) != SSA_NAME)
2251 return NULL;
2253 if (!is_gimple_min_invariant (init))
2254 return NULL;
2256 if (chain_of_csts_start (loop, next) != phi)
2257 return NULL;
2259 return phi;
2262 /* Given an expression X, then
2264 * if X is NULL_TREE, we return the constant BASE.
2265 * otherwise X is a SSA name, whose value in the considered loop is derived
2266 by a chain of operations with constant from a result of a phi node in
2267 the header of the loop. Then we return value of X when the value of the
2268 result of this phi node is given by the constant BASE. */
2270 static tree
2271 get_val_for (tree x, tree base)
2273 gimple stmt;
2275 gcc_checking_assert (is_gimple_min_invariant (base));
2277 if (!x)
2278 return base;
2280 stmt = SSA_NAME_DEF_STMT (x);
2281 if (gimple_code (stmt) == GIMPLE_PHI)
2282 return base;
2284 gcc_checking_assert (is_gimple_assign (stmt));
2286 /* STMT must be either an assignment of a single SSA name or an
2287 expression involving an SSA name and a constant. Try to fold that
2288 expression using the value for the SSA name. */
2289 if (gimple_assign_ssa_name_copy_p (stmt))
2290 return get_val_for (gimple_assign_rhs1 (stmt), base);
2291 else if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS
2292 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2294 return fold_build1 (gimple_assign_rhs_code (stmt),
2295 gimple_expr_type (stmt),
2296 get_val_for (gimple_assign_rhs1 (stmt), base));
2298 else if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
2300 tree rhs1 = gimple_assign_rhs1 (stmt);
2301 tree rhs2 = gimple_assign_rhs2 (stmt);
2302 if (TREE_CODE (rhs1) == SSA_NAME)
2303 rhs1 = get_val_for (rhs1, base);
2304 else if (TREE_CODE (rhs2) == SSA_NAME)
2305 rhs2 = get_val_for (rhs2, base);
2306 else
2307 gcc_unreachable ();
2308 return fold_build2 (gimple_assign_rhs_code (stmt),
2309 gimple_expr_type (stmt), rhs1, rhs2);
2311 else
2312 gcc_unreachable ();
2316 /* Tries to count the number of iterations of LOOP till it exits by EXIT
2317 by brute force -- i.e. by determining the value of the operands of the
2318 condition at EXIT in first few iterations of the loop (assuming that
2319 these values are constant) and determining the first one in that the
2320 condition is not satisfied. Returns the constant giving the number
2321 of the iterations of LOOP if successful, chrec_dont_know otherwise. */
2323 tree
2324 loop_niter_by_eval (struct loop *loop, edge exit)
2326 tree acnd;
2327 tree op[2], val[2], next[2], aval[2];
2328 gphi *phi;
2329 gimple cond;
2330 unsigned i, j;
2331 enum tree_code cmp;
2333 cond = last_stmt (exit->src);
2334 if (!cond || gimple_code (cond) != GIMPLE_COND)
2335 return chrec_dont_know;
2337 cmp = gimple_cond_code (cond);
2338 if (exit->flags & EDGE_TRUE_VALUE)
2339 cmp = invert_tree_comparison (cmp, false);
2341 switch (cmp)
2343 case EQ_EXPR:
2344 case NE_EXPR:
2345 case GT_EXPR:
2346 case GE_EXPR:
2347 case LT_EXPR:
2348 case LE_EXPR:
2349 op[0] = gimple_cond_lhs (cond);
2350 op[1] = gimple_cond_rhs (cond);
2351 break;
2353 default:
2354 return chrec_dont_know;
2357 for (j = 0; j < 2; j++)
2359 if (is_gimple_min_invariant (op[j]))
2361 val[j] = op[j];
2362 next[j] = NULL_TREE;
2363 op[j] = NULL_TREE;
2365 else
2367 phi = get_base_for (loop, op[j]);
2368 if (!phi)
2369 return chrec_dont_know;
2370 val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2371 next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2375 /* Don't issue signed overflow warnings. */
2376 fold_defer_overflow_warnings ();
2378 for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++)
2380 for (j = 0; j < 2; j++)
2381 aval[j] = get_val_for (op[j], val[j]);
2383 acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]);
2384 if (acnd && integer_zerop (acnd))
2386 fold_undefer_and_ignore_overflow_warnings ();
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 fprintf (dump_file,
2389 "Proved that loop %d iterates %d times using brute force.\n",
2390 loop->num, i);
2391 return build_int_cst (unsigned_type_node, i);
2394 for (j = 0; j < 2; j++)
2396 val[j] = get_val_for (next[j], val[j]);
2397 if (!is_gimple_min_invariant (val[j]))
2399 fold_undefer_and_ignore_overflow_warnings ();
2400 return chrec_dont_know;
2405 fold_undefer_and_ignore_overflow_warnings ();
2407 return chrec_dont_know;
2410 /* Finds the exit of the LOOP by that the loop exits after a constant
2411 number of iterations and stores the exit edge to *EXIT. The constant
2412 giving the number of iterations of LOOP is returned. The number of
2413 iterations is determined using loop_niter_by_eval (i.e. by brute force
2414 evaluation). If we are unable to find the exit for that loop_niter_by_eval
2415 determines the number of iterations, chrec_dont_know is returned. */
2417 tree
2418 find_loop_niter_by_eval (struct loop *loop, edge *exit)
2420 unsigned i;
2421 vec<edge> exits = get_loop_exit_edges (loop);
2422 edge ex;
2423 tree niter = NULL_TREE, aniter;
2425 *exit = NULL;
2427 /* Loops with multiple exits are expensive to handle and less important. */
2428 if (!flag_expensive_optimizations
2429 && exits.length () > 1)
2431 exits.release ();
2432 return chrec_dont_know;
2435 FOR_EACH_VEC_ELT (exits, i, ex)
2437 if (!just_once_each_iteration_p (loop, ex->src))
2438 continue;
2440 aniter = loop_niter_by_eval (loop, ex);
2441 if (chrec_contains_undetermined (aniter))
2442 continue;
2444 if (niter
2445 && !tree_int_cst_lt (aniter, niter))
2446 continue;
2448 niter = aniter;
2449 *exit = ex;
2451 exits.release ();
2453 return niter ? niter : chrec_dont_know;
2458 Analysis of upper bounds on number of iterations of a loop.
2462 static widest_int derive_constant_upper_bound_ops (tree, tree,
2463 enum tree_code, tree);
2465 /* Returns a constant upper bound on the value of the right-hand side of
2466 an assignment statement STMT. */
2468 static widest_int
2469 derive_constant_upper_bound_assign (gimple stmt)
2471 enum tree_code code = gimple_assign_rhs_code (stmt);
2472 tree op0 = gimple_assign_rhs1 (stmt);
2473 tree op1 = gimple_assign_rhs2 (stmt);
2475 return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)),
2476 op0, code, op1);
2479 /* Returns a constant upper bound on the value of expression VAL. VAL
2480 is considered to be unsigned. If its type is signed, its value must
2481 be nonnegative. */
2483 static widest_int
2484 derive_constant_upper_bound (tree val)
2486 enum tree_code code;
2487 tree op0, op1;
2489 extract_ops_from_tree (val, &code, &op0, &op1);
2490 return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
2493 /* Returns a constant upper bound on the value of expression OP0 CODE OP1,
2494 whose type is TYPE. The expression is considered to be unsigned. If
2495 its type is signed, its value must be nonnegative. */
2497 static widest_int
2498 derive_constant_upper_bound_ops (tree type, tree op0,
2499 enum tree_code code, tree op1)
2501 tree subtype, maxt;
2502 widest_int bnd, max, mmax, cst;
2503 gimple stmt;
2505 if (INTEGRAL_TYPE_P (type))
2506 maxt = TYPE_MAX_VALUE (type);
2507 else
2508 maxt = upper_bound_in_type (type, type);
2510 max = wi::to_widest (maxt);
2512 switch (code)
2514 case INTEGER_CST:
2515 return wi::to_widest (op0);
2517 CASE_CONVERT:
2518 subtype = TREE_TYPE (op0);
2519 if (!TYPE_UNSIGNED (subtype)
2520 /* If TYPE is also signed, the fact that VAL is nonnegative implies
2521 that OP0 is nonnegative. */
2522 && TYPE_UNSIGNED (type)
2523 && !tree_expr_nonnegative_p (op0))
2525 /* If we cannot prove that the casted expression is nonnegative,
2526 we cannot establish more useful upper bound than the precision
2527 of the type gives us. */
2528 return max;
2531 /* We now know that op0 is an nonnegative value. Try deriving an upper
2532 bound for it. */
2533 bnd = derive_constant_upper_bound (op0);
2535 /* If the bound does not fit in TYPE, max. value of TYPE could be
2536 attained. */
2537 if (wi::ltu_p (max, bnd))
2538 return max;
2540 return bnd;
2542 case PLUS_EXPR:
2543 case POINTER_PLUS_EXPR:
2544 case MINUS_EXPR:
2545 if (TREE_CODE (op1) != INTEGER_CST
2546 || !tree_expr_nonnegative_p (op0))
2547 return max;
2549 /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to
2550 choose the most logical way how to treat this constant regardless
2551 of the signedness of the type. */
2552 cst = wi::sext (wi::to_widest (op1), TYPE_PRECISION (type));
2553 if (code != MINUS_EXPR)
2554 cst = -cst;
2556 bnd = derive_constant_upper_bound (op0);
2558 if (wi::neg_p (cst))
2560 cst = -cst;
2561 /* Avoid CST == 0x80000... */
2562 if (wi::neg_p (cst))
2563 return max;
2565 /* OP0 + CST. We need to check that
2566 BND <= MAX (type) - CST. */
2568 mmax -= cst;
2569 if (wi::ltu_p (bnd, max))
2570 return max;
2572 return bnd + cst;
2574 else
2576 /* OP0 - CST, where CST >= 0.
2578 If TYPE is signed, we have already verified that OP0 >= 0, and we
2579 know that the result is nonnegative. This implies that
2580 VAL <= BND - CST.
2582 If TYPE is unsigned, we must additionally know that OP0 >= CST,
2583 otherwise the operation underflows.
2586 /* This should only happen if the type is unsigned; however, for
2587 buggy programs that use overflowing signed arithmetics even with
2588 -fno-wrapv, this condition may also be true for signed values. */
2589 if (wi::ltu_p (bnd, cst))
2590 return max;
2592 if (TYPE_UNSIGNED (type))
2594 tree tem = fold_binary (GE_EXPR, boolean_type_node, op0,
2595 wide_int_to_tree (type, cst));
2596 if (!tem || integer_nonzerop (tem))
2597 return max;
2600 bnd -= cst;
2603 return bnd;
2605 case FLOOR_DIV_EXPR:
2606 case EXACT_DIV_EXPR:
2607 if (TREE_CODE (op1) != INTEGER_CST
2608 || tree_int_cst_sign_bit (op1))
2609 return max;
2611 bnd = derive_constant_upper_bound (op0);
2612 return wi::udiv_floor (bnd, wi::to_widest (op1));
2614 case BIT_AND_EXPR:
2615 if (TREE_CODE (op1) != INTEGER_CST
2616 || tree_int_cst_sign_bit (op1))
2617 return max;
2618 return wi::to_widest (op1);
2620 case SSA_NAME:
2621 stmt = SSA_NAME_DEF_STMT (op0);
2622 if (gimple_code (stmt) != GIMPLE_ASSIGN
2623 || gimple_assign_lhs (stmt) != op0)
2624 return max;
2625 return derive_constant_upper_bound_assign (stmt);
2627 default:
2628 return max;
2632 /* Emit a -Waggressive-loop-optimizations warning if needed. */
2634 static void
2635 do_warn_aggressive_loop_optimizations (struct loop *loop,
2636 widest_int i_bound, gimple stmt)
2638 /* Don't warn if the loop doesn't have known constant bound. */
2639 if (!loop->nb_iterations
2640 || TREE_CODE (loop->nb_iterations) != INTEGER_CST
2641 || !warn_aggressive_loop_optimizations
2642 /* To avoid warning multiple times for the same loop,
2643 only start warning when we preserve loops. */
2644 || (cfun->curr_properties & PROP_loops) == 0
2645 /* Only warn once per loop. */
2646 || loop->warned_aggressive_loop_optimizations
2647 /* Only warn if undefined behavior gives us lower estimate than the
2648 known constant bound. */
2649 || wi::cmpu (i_bound, wi::to_widest (loop->nb_iterations)) >= 0
2650 /* And undefined behavior happens unconditionally. */
2651 || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (stmt)))
2652 return;
2654 edge e = single_exit (loop);
2655 if (e == NULL)
2656 return;
2658 gimple estmt = last_stmt (e->src);
2659 if (warning_at (gimple_location (stmt), OPT_Waggressive_loop_optimizations,
2660 "iteration %E invokes undefined behavior",
2661 wide_int_to_tree (TREE_TYPE (loop->nb_iterations),
2662 i_bound)))
2663 inform (gimple_location (estmt), "containing loop");
2664 loop->warned_aggressive_loop_optimizations = true;
2667 /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT
2668 is true if the loop is exited immediately after STMT, and this exit
2669 is taken at last when the STMT is executed BOUND + 1 times.
2670 REALISTIC is true if BOUND is expected to be close to the real number
2671 of iterations. UPPER is true if we are sure the loop iterates at most
2672 BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */
2674 static void
2675 record_estimate (struct loop *loop, tree bound, const widest_int &i_bound,
2676 gimple at_stmt, bool is_exit, bool realistic, bool upper)
2678 widest_int delta;
2680 if (dump_file && (dump_flags & TDF_DETAILS))
2682 fprintf (dump_file, "Statement %s", is_exit ? "(exit)" : "");
2683 print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM);
2684 fprintf (dump_file, " is %sexecuted at most ",
2685 upper ? "" : "probably ");
2686 print_generic_expr (dump_file, bound, TDF_SLIM);
2687 fprintf (dump_file, " (bounded by ");
2688 print_decu (i_bound, dump_file);
2689 fprintf (dump_file, ") + 1 times in loop %d.\n", loop->num);
2692 /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the
2693 real number of iterations. */
2694 if (TREE_CODE (bound) != INTEGER_CST)
2695 realistic = false;
2696 else
2697 gcc_checking_assert (i_bound == wi::to_widest (bound));
2698 if (!upper && !realistic)
2699 return;
2701 /* If we have a guaranteed upper bound, record it in the appropriate
2702 list, unless this is an !is_exit bound (i.e. undefined behavior in
2703 at_stmt) in a loop with known constant number of iterations. */
2704 if (upper
2705 && (is_exit
2706 || loop->nb_iterations == NULL_TREE
2707 || TREE_CODE (loop->nb_iterations) != INTEGER_CST))
2709 struct nb_iter_bound *elt = ggc_alloc<nb_iter_bound> ();
2711 elt->bound = i_bound;
2712 elt->stmt = at_stmt;
2713 elt->is_exit = is_exit;
2714 elt->next = loop->bounds;
2715 loop->bounds = elt;
2718 /* If statement is executed on every path to the loop latch, we can directly
2719 infer the upper bound on the # of iterations of the loop. */
2720 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (at_stmt)))
2721 return;
2723 /* Update the number of iteration estimates according to the bound.
2724 If at_stmt is an exit then the loop latch is executed at most BOUND times,
2725 otherwise it can be executed BOUND + 1 times. We will lower the estimate
2726 later if such statement must be executed on last iteration */
2727 if (is_exit)
2728 delta = 0;
2729 else
2730 delta = 1;
2731 widest_int new_i_bound = i_bound + delta;
2733 /* If an overflow occurred, ignore the result. */
2734 if (wi::ltu_p (new_i_bound, delta))
2735 return;
2737 if (upper && !is_exit)
2738 do_warn_aggressive_loop_optimizations (loop, new_i_bound, at_stmt);
2739 record_niter_bound (loop, new_i_bound, realistic, upper);
2742 /* Records the control iv analyzed in NITER for LOOP if the iv is valid
2743 and doesn't overflow. */
2745 static void
2746 record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
2748 struct control_iv *iv;
2750 if (!niter->control.base || !niter->control.step)
2751 return;
2753 if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
2754 return;
2756 iv = ggc_alloc<control_iv> ();
2757 iv->base = niter->control.base;
2758 iv->step = niter->control.step;
2759 iv->next = loop->control_ivs;
2760 loop->control_ivs = iv;
2762 return;
2765 /* Record the estimate on number of iterations of LOOP based on the fact that
2766 the induction variable BASE + STEP * i evaluated in STMT does not wrap and
2767 its values belong to the range <LOW, HIGH>. REALISTIC is true if the
2768 estimated number of iterations is expected to be close to the real one.
2769 UPPER is true if we are sure the induction variable does not wrap. */
2771 static void
2772 record_nonwrapping_iv (struct loop *loop, tree base, tree step, gimple stmt,
2773 tree low, tree high, bool realistic, bool upper)
2775 tree niter_bound, extreme, delta;
2776 tree type = TREE_TYPE (base), unsigned_type;
2777 tree orig_base = base;
2779 if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
2780 return;
2782 if (dump_file && (dump_flags & TDF_DETAILS))
2784 fprintf (dump_file, "Induction variable (");
2785 print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM);
2786 fprintf (dump_file, ") ");
2787 print_generic_expr (dump_file, base, TDF_SLIM);
2788 fprintf (dump_file, " + ");
2789 print_generic_expr (dump_file, step, TDF_SLIM);
2790 fprintf (dump_file, " * iteration does not wrap in statement ");
2791 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2792 fprintf (dump_file, " in loop %d.\n", loop->num);
2795 unsigned_type = unsigned_type_for (type);
2796 base = fold_convert (unsigned_type, base);
2797 step = fold_convert (unsigned_type, step);
2799 if (tree_int_cst_sign_bit (step))
2801 wide_int min, max;
2802 extreme = fold_convert (unsigned_type, low);
2803 if (TREE_CODE (orig_base) == SSA_NAME
2804 && TREE_CODE (high) == INTEGER_CST
2805 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
2806 && get_range_info (orig_base, &min, &max) == VR_RANGE
2807 && wi::gts_p (high, max))
2808 base = wide_int_to_tree (unsigned_type, max);
2809 else if (TREE_CODE (base) != INTEGER_CST)
2810 base = fold_convert (unsigned_type, high);
2811 delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
2812 step = fold_build1 (NEGATE_EXPR, unsigned_type, step);
2814 else
2816 wide_int min, max;
2817 extreme = fold_convert (unsigned_type, high);
2818 if (TREE_CODE (orig_base) == SSA_NAME
2819 && TREE_CODE (low) == INTEGER_CST
2820 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
2821 && get_range_info (orig_base, &min, &max) == VR_RANGE
2822 && wi::gts_p (min, low))
2823 base = wide_int_to_tree (unsigned_type, min);
2824 else if (TREE_CODE (base) != INTEGER_CST)
2825 base = fold_convert (unsigned_type, low);
2826 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
2829 /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value
2830 would get out of the range. */
2831 niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step);
2832 widest_int max = derive_constant_upper_bound (niter_bound);
2833 record_estimate (loop, niter_bound, max, stmt, false, realistic, upper);
2836 /* Determine information about number of iterations a LOOP from the index
2837 IDX of a data reference accessed in STMT. RELIABLE is true if STMT is
2838 guaranteed to be executed in every iteration of LOOP. Callback for
2839 for_each_index. */
2841 struct ilb_data
2843 struct loop *loop;
2844 gimple stmt;
2847 static bool
2848 idx_infer_loop_bounds (tree base, tree *idx, void *dta)
2850 struct ilb_data *data = (struct ilb_data *) dta;
2851 tree ev, init, step;
2852 tree low, high, type, next;
2853 bool sign, upper = true, at_end = false;
2854 struct loop *loop = data->loop;
2855 bool reliable = true;
2857 if (TREE_CODE (base) != ARRAY_REF)
2858 return true;
2860 /* For arrays at the end of the structure, we are not guaranteed that they
2861 do not really extend over their declared size. However, for arrays of
2862 size greater than one, this is unlikely to be intended. */
2863 if (array_at_struct_end_p (base))
2865 at_end = true;
2866 upper = false;
2869 struct loop *dloop = loop_containing_stmt (data->stmt);
2870 if (!dloop)
2871 return true;
2873 ev = analyze_scalar_evolution (dloop, *idx);
2874 ev = instantiate_parameters (loop, ev);
2875 init = initial_condition (ev);
2876 step = evolution_part_in_loop_num (ev, loop->num);
2878 if (!init
2879 || !step
2880 || TREE_CODE (step) != INTEGER_CST
2881 || integer_zerop (step)
2882 || tree_contains_chrecs (init, NULL)
2883 || chrec_contains_symbols_defined_in_loop (init, loop->num))
2884 return true;
2886 low = array_ref_low_bound (base);
2887 high = array_ref_up_bound (base);
2889 /* The case of nonconstant bounds could be handled, but it would be
2890 complicated. */
2891 if (TREE_CODE (low) != INTEGER_CST
2892 || !high
2893 || TREE_CODE (high) != INTEGER_CST)
2894 return true;
2895 sign = tree_int_cst_sign_bit (step);
2896 type = TREE_TYPE (step);
2898 /* The array of length 1 at the end of a structure most likely extends
2899 beyond its bounds. */
2900 if (at_end
2901 && operand_equal_p (low, high, 0))
2902 return true;
2904 /* In case the relevant bound of the array does not fit in type, or
2905 it does, but bound + step (in type) still belongs into the range of the
2906 array, the index may wrap and still stay within the range of the array
2907 (consider e.g. if the array is indexed by the full range of
2908 unsigned char).
2910 To make things simpler, we require both bounds to fit into type, although
2911 there are cases where this would not be strictly necessary. */
2912 if (!int_fits_type_p (high, type)
2913 || !int_fits_type_p (low, type))
2914 return true;
2915 low = fold_convert (type, low);
2916 high = fold_convert (type, high);
2918 if (sign)
2919 next = fold_binary (PLUS_EXPR, type, low, step);
2920 else
2921 next = fold_binary (PLUS_EXPR, type, high, step);
2923 if (tree_int_cst_compare (low, next) <= 0
2924 && tree_int_cst_compare (next, high) <= 0)
2925 return true;
2927 /* If access is not executed on every iteration, we must ensure that overlow may
2928 not make the access valid later. */
2929 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt))
2930 && scev_probably_wraps_p (initial_condition_in_loop_num (ev, loop->num),
2931 step, data->stmt, loop, true))
2932 reliable = false;
2934 record_nonwrapping_iv (loop, init, step, data->stmt, low, high, reliable, upper);
2935 return true;
2938 /* Determine information about number of iterations a LOOP from the bounds
2939 of arrays in the data reference REF accessed in STMT. RELIABLE is true if
2940 STMT is guaranteed to be executed in every iteration of LOOP.*/
2942 static void
2943 infer_loop_bounds_from_ref (struct loop *loop, gimple stmt, tree ref)
2945 struct ilb_data data;
2947 data.loop = loop;
2948 data.stmt = stmt;
2949 for_each_index (&ref, idx_infer_loop_bounds, &data);
2952 /* Determine information about number of iterations of a LOOP from the way
2953 arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be
2954 executed in every iteration of LOOP. */
2956 static void
2957 infer_loop_bounds_from_array (struct loop *loop, gimple stmt)
2959 if (is_gimple_assign (stmt))
2961 tree op0 = gimple_assign_lhs (stmt);
2962 tree op1 = gimple_assign_rhs1 (stmt);
2964 /* For each memory access, analyze its access function
2965 and record a bound on the loop iteration domain. */
2966 if (REFERENCE_CLASS_P (op0))
2967 infer_loop_bounds_from_ref (loop, stmt, op0);
2969 if (REFERENCE_CLASS_P (op1))
2970 infer_loop_bounds_from_ref (loop, stmt, op1);
2972 else if (is_gimple_call (stmt))
2974 tree arg, lhs;
2975 unsigned i, n = gimple_call_num_args (stmt);
2977 lhs = gimple_call_lhs (stmt);
2978 if (lhs && REFERENCE_CLASS_P (lhs))
2979 infer_loop_bounds_from_ref (loop, stmt, lhs);
2981 for (i = 0; i < n; i++)
2983 arg = gimple_call_arg (stmt, i);
2984 if (REFERENCE_CLASS_P (arg))
2985 infer_loop_bounds_from_ref (loop, stmt, arg);
2990 /* Determine information about number of iterations of a LOOP from the fact
2991 that pointer arithmetics in STMT does not overflow. */
2993 static void
2994 infer_loop_bounds_from_pointer_arith (struct loop *loop, gimple stmt)
2996 tree def, base, step, scev, type, low, high;
2997 tree var, ptr;
2999 if (!is_gimple_assign (stmt)
3000 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
3001 return;
3003 def = gimple_assign_lhs (stmt);
3004 if (TREE_CODE (def) != SSA_NAME)
3005 return;
3007 type = TREE_TYPE (def);
3008 if (!nowrap_type_p (type))
3009 return;
3011 ptr = gimple_assign_rhs1 (stmt);
3012 if (!expr_invariant_in_loop_p (loop, ptr))
3013 return;
3015 var = gimple_assign_rhs2 (stmt);
3016 if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var)))
3017 return;
3019 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3020 if (chrec_contains_undetermined (scev))
3021 return;
3023 base = initial_condition_in_loop_num (scev, loop->num);
3024 step = evolution_part_in_loop_num (scev, loop->num);
3026 if (!base || !step
3027 || TREE_CODE (step) != INTEGER_CST
3028 || tree_contains_chrecs (base, NULL)
3029 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3030 return;
3032 low = lower_bound_in_type (type, type);
3033 high = upper_bound_in_type (type, type);
3035 /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot
3036 produce a NULL pointer. The contrary would mean NULL points to an object,
3037 while NULL is supposed to compare unequal with the address of all objects.
3038 Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a
3039 NULL pointer since that would mean wrapping, which we assume here not to
3040 happen. So, we can exclude NULL from the valid range of pointer
3041 arithmetic. */
3042 if (flag_delete_null_pointer_checks && int_cst_value (low) == 0)
3043 low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type)));
3045 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3048 /* Determine information about number of iterations of a LOOP from the fact
3049 that signed arithmetics in STMT does not overflow. */
3051 static void
3052 infer_loop_bounds_from_signedness (struct loop *loop, gimple stmt)
3054 tree def, base, step, scev, type, low, high;
3056 if (gimple_code (stmt) != GIMPLE_ASSIGN)
3057 return;
3059 def = gimple_assign_lhs (stmt);
3061 if (TREE_CODE (def) != SSA_NAME)
3062 return;
3064 type = TREE_TYPE (def);
3065 if (!INTEGRAL_TYPE_P (type)
3066 || !TYPE_OVERFLOW_UNDEFINED (type))
3067 return;
3069 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3070 if (chrec_contains_undetermined (scev))
3071 return;
3073 base = initial_condition_in_loop_num (scev, loop->num);
3074 step = evolution_part_in_loop_num (scev, loop->num);
3076 if (!base || !step
3077 || TREE_CODE (step) != INTEGER_CST
3078 || tree_contains_chrecs (base, NULL)
3079 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3080 return;
3082 low = lower_bound_in_type (type, type);
3083 high = upper_bound_in_type (type, type);
3085 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3088 /* The following analyzers are extracting informations on the bounds
3089 of LOOP from the following undefined behaviors:
3091 - data references should not access elements over the statically
3092 allocated size,
3094 - signed variables should not overflow when flag_wrapv is not set.
3097 static void
3098 infer_loop_bounds_from_undefined (struct loop *loop)
3100 unsigned i;
3101 basic_block *bbs;
3102 gimple_stmt_iterator bsi;
3103 basic_block bb;
3104 bool reliable;
3106 bbs = get_loop_body (loop);
3108 for (i = 0; i < loop->num_nodes; i++)
3110 bb = bbs[i];
3112 /* If BB is not executed in each iteration of the loop, we cannot
3113 use the operations in it to infer reliable upper bound on the
3114 # of iterations of the loop. However, we can use it as a guess.
3115 Reliable guesses come only from array bounds. */
3116 reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb);
3118 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
3120 gimple stmt = gsi_stmt (bsi);
3122 infer_loop_bounds_from_array (loop, stmt);
3124 if (reliable)
3126 infer_loop_bounds_from_signedness (loop, stmt);
3127 infer_loop_bounds_from_pointer_arith (loop, stmt);
3133 free (bbs);
3136 /* Compare wide ints, callback for qsort. */
3138 static int
3139 wide_int_cmp (const void *p1, const void *p2)
3141 const widest_int *d1 = (const widest_int *) p1;
3142 const widest_int *d2 = (const widest_int *) p2;
3143 return wi::cmpu (*d1, *d2);
3146 /* Return index of BOUND in BOUNDS array sorted in increasing order.
3147 Lookup by binary search. */
3149 static int
3150 bound_index (vec<widest_int> bounds, const widest_int &bound)
3152 unsigned int end = bounds.length ();
3153 unsigned int begin = 0;
3155 /* Find a matching index by means of a binary search. */
3156 while (begin != end)
3158 unsigned int middle = (begin + end) / 2;
3159 widest_int index = bounds[middle];
3161 if (index == bound)
3162 return middle;
3163 else if (wi::ltu_p (index, bound))
3164 begin = middle + 1;
3165 else
3166 end = middle;
3168 gcc_unreachable ();
3171 /* We recorded loop bounds only for statements dominating loop latch (and thus
3172 executed each loop iteration). If there are any bounds on statements not
3173 dominating the loop latch we can improve the estimate by walking the loop
3174 body and seeing if every path from loop header to loop latch contains
3175 some bounded statement. */
3177 static void
3178 discover_iteration_bound_by_body_walk (struct loop *loop)
3180 struct nb_iter_bound *elt;
3181 vec<widest_int> bounds = vNULL;
3182 vec<vec<basic_block> > queues = vNULL;
3183 vec<basic_block> queue = vNULL;
3184 ptrdiff_t queue_index;
3185 ptrdiff_t latch_index = 0;
3187 /* Discover what bounds may interest us. */
3188 for (elt = loop->bounds; elt; elt = elt->next)
3190 widest_int bound = elt->bound;
3192 /* Exit terminates loop at given iteration, while non-exits produce undefined
3193 effect on the next iteration. */
3194 if (!elt->is_exit)
3196 bound += 1;
3197 /* If an overflow occurred, ignore the result. */
3198 if (bound == 0)
3199 continue;
3202 if (!loop->any_upper_bound
3203 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3204 bounds.safe_push (bound);
3207 /* Exit early if there is nothing to do. */
3208 if (!bounds.exists ())
3209 return;
3211 if (dump_file && (dump_flags & TDF_DETAILS))
3212 fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
3214 /* Sort the bounds in decreasing order. */
3215 bounds.qsort (wide_int_cmp);
3217 /* For every basic block record the lowest bound that is guaranteed to
3218 terminate the loop. */
3220 hash_map<basic_block, ptrdiff_t> bb_bounds;
3221 for (elt = loop->bounds; elt; elt = elt->next)
3223 widest_int bound = elt->bound;
3224 if (!elt->is_exit)
3226 bound += 1;
3227 /* If an overflow occurred, ignore the result. */
3228 if (bound == 0)
3229 continue;
3232 if (!loop->any_upper_bound
3233 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3235 ptrdiff_t index = bound_index (bounds, bound);
3236 ptrdiff_t *entry = bb_bounds.get (gimple_bb (elt->stmt));
3237 if (!entry)
3238 bb_bounds.put (gimple_bb (elt->stmt), index);
3239 else if ((ptrdiff_t)*entry > index)
3240 *entry = index;
3244 hash_map<basic_block, ptrdiff_t> block_priority;
3246 /* Perform shortest path discovery loop->header ... loop->latch.
3248 The "distance" is given by the smallest loop bound of basic block
3249 present in the path and we look for path with largest smallest bound
3250 on it.
3252 To avoid the need for fibonacci heap on double ints we simply compress
3253 double ints into indexes to BOUNDS array and then represent the queue
3254 as arrays of queues for every index.
3255 Index of BOUNDS.length() means that the execution of given BB has
3256 no bounds determined.
3258 VISITED is a pointer map translating basic block into smallest index
3259 it was inserted into the priority queue with. */
3260 latch_index = -1;
3262 /* Start walk in loop header with index set to infinite bound. */
3263 queue_index = bounds.length ();
3264 queues.safe_grow_cleared (queue_index + 1);
3265 queue.safe_push (loop->header);
3266 queues[queue_index] = queue;
3267 block_priority.put (loop->header, queue_index);
3269 for (; queue_index >= 0; queue_index--)
3271 if (latch_index < queue_index)
3273 while (queues[queue_index].length ())
3275 basic_block bb;
3276 ptrdiff_t bound_index = queue_index;
3277 edge e;
3278 edge_iterator ei;
3280 queue = queues[queue_index];
3281 bb = queue.pop ();
3283 /* OK, we later inserted the BB with lower priority, skip it. */
3284 if (*block_priority.get (bb) > queue_index)
3285 continue;
3287 /* See if we can improve the bound. */
3288 ptrdiff_t *entry = bb_bounds.get (bb);
3289 if (entry && *entry < bound_index)
3290 bound_index = *entry;
3292 /* Insert succesors into the queue, watch for latch edge
3293 and record greatest index we saw. */
3294 FOR_EACH_EDGE (e, ei, bb->succs)
3296 bool insert = false;
3298 if (loop_exit_edge_p (loop, e))
3299 continue;
3301 if (e == loop_latch_edge (loop)
3302 && latch_index < bound_index)
3303 latch_index = bound_index;
3304 else if (!(entry = block_priority.get (e->dest)))
3306 insert = true;
3307 block_priority.put (e->dest, bound_index);
3309 else if (*entry < bound_index)
3311 insert = true;
3312 *entry = bound_index;
3315 if (insert)
3316 queues[bound_index].safe_push (e->dest);
3320 queues[queue_index].release ();
3323 gcc_assert (latch_index >= 0);
3324 if ((unsigned)latch_index < bounds.length ())
3326 if (dump_file && (dump_flags & TDF_DETAILS))
3328 fprintf (dump_file, "Found better loop bound ");
3329 print_decu (bounds[latch_index], dump_file);
3330 fprintf (dump_file, "\n");
3332 record_niter_bound (loop, bounds[latch_index], false, true);
3335 queues.release ();
3336 bounds.release ();
3339 /* See if every path cross the loop goes through a statement that is known
3340 to not execute at the last iteration. In that case we can decrese iteration
3341 count by 1. */
3343 static void
3344 maybe_lower_iteration_bound (struct loop *loop)
3346 hash_set<gimple> *not_executed_last_iteration = NULL;
3347 struct nb_iter_bound *elt;
3348 bool found_exit = false;
3349 vec<basic_block> queue = vNULL;
3350 bitmap visited;
3352 /* Collect all statements with interesting (i.e. lower than
3353 nb_iterations_upper_bound) bound on them.
3355 TODO: Due to the way record_estimate choose estimates to store, the bounds
3356 will be always nb_iterations_upper_bound-1. We can change this to record
3357 also statements not dominating the loop latch and update the walk bellow
3358 to the shortest path algorthm. */
3359 for (elt = loop->bounds; elt; elt = elt->next)
3361 if (!elt->is_exit
3362 && wi::ltu_p (elt->bound, loop->nb_iterations_upper_bound))
3364 if (!not_executed_last_iteration)
3365 not_executed_last_iteration = new hash_set<gimple>;
3366 not_executed_last_iteration->add (elt->stmt);
3369 if (!not_executed_last_iteration)
3370 return;
3372 /* Start DFS walk in the loop header and see if we can reach the
3373 loop latch or any of the exits (including statements with side
3374 effects that may terminate the loop otherwise) without visiting
3375 any of the statements known to have undefined effect on the last
3376 iteration. */
3377 queue.safe_push (loop->header);
3378 visited = BITMAP_ALLOC (NULL);
3379 bitmap_set_bit (visited, loop->header->index);
3380 found_exit = false;
3384 basic_block bb = queue.pop ();
3385 gimple_stmt_iterator gsi;
3386 bool stmt_found = false;
3388 /* Loop for possible exits and statements bounding the execution. */
3389 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3391 gimple stmt = gsi_stmt (gsi);
3392 if (not_executed_last_iteration->contains (stmt))
3394 stmt_found = true;
3395 break;
3397 if (gimple_has_side_effects (stmt))
3399 found_exit = true;
3400 break;
3403 if (found_exit)
3404 break;
3406 /* If no bounding statement is found, continue the walk. */
3407 if (!stmt_found)
3409 edge e;
3410 edge_iterator ei;
3412 FOR_EACH_EDGE (e, ei, bb->succs)
3414 if (loop_exit_edge_p (loop, e)
3415 || e == loop_latch_edge (loop))
3417 found_exit = true;
3418 break;
3420 if (bitmap_set_bit (visited, e->dest->index))
3421 queue.safe_push (e->dest);
3425 while (queue.length () && !found_exit);
3427 /* If every path through the loop reach bounding statement before exit,
3428 then we know the last iteration of the loop will have undefined effect
3429 and we can decrease number of iterations. */
3431 if (!found_exit)
3433 if (dump_file && (dump_flags & TDF_DETAILS))
3434 fprintf (dump_file, "Reducing loop iteration estimate by 1; "
3435 "undefined statement must be executed at the last iteration.\n");
3436 record_niter_bound (loop, loop->nb_iterations_upper_bound - 1,
3437 false, true);
3440 BITMAP_FREE (visited);
3441 queue.release ();
3442 delete not_executed_last_iteration;
3445 /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P
3446 is true also use estimates derived from undefined behavior. */
3448 static void
3449 estimate_numbers_of_iterations_loop (struct loop *loop)
3451 vec<edge> exits;
3452 tree niter, type;
3453 unsigned i;
3454 struct tree_niter_desc niter_desc;
3455 edge ex;
3456 widest_int bound;
3457 edge likely_exit;
3459 /* Give up if we already have tried to compute an estimation. */
3460 if (loop->estimate_state != EST_NOT_COMPUTED)
3461 return;
3463 loop->estimate_state = EST_AVAILABLE;
3464 /* Force estimate compuation but leave any existing upper bound in place. */
3465 loop->any_estimate = false;
3467 /* Ensure that loop->nb_iterations is computed if possible. If it turns out
3468 to be constant, we avoid undefined behavior implied bounds and instead
3469 diagnose those loops with -Waggressive-loop-optimizations. */
3470 number_of_latch_executions (loop);
3472 exits = get_loop_exit_edges (loop);
3473 likely_exit = single_likely_exit (loop);
3474 FOR_EACH_VEC_ELT (exits, i, ex)
3476 if (!number_of_iterations_exit (loop, ex, &niter_desc, false, false))
3477 continue;
3479 niter = niter_desc.niter;
3480 type = TREE_TYPE (niter);
3481 if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST)
3482 niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
3483 build_int_cst (type, 0),
3484 niter);
3485 record_estimate (loop, niter, niter_desc.max,
3486 last_stmt (ex->src),
3487 true, ex == likely_exit, true);
3488 record_control_iv (loop, &niter_desc);
3490 exits.release ();
3492 if (flag_aggressive_loop_optimizations)
3493 infer_loop_bounds_from_undefined (loop);
3495 discover_iteration_bound_by_body_walk (loop);
3497 maybe_lower_iteration_bound (loop);
3499 /* If we have a measured profile, use it to estimate the number of
3500 iterations. */
3501 if (loop->header->count != 0)
3503 gcov_type nit = expected_loop_iterations_unbounded (loop) + 1;
3504 bound = gcov_type_to_wide_int (nit);
3505 record_niter_bound (loop, bound, true, false);
3508 /* If we know the exact number of iterations of this loop, try to
3509 not break code with undefined behavior by not recording smaller
3510 maximum number of iterations. */
3511 if (loop->nb_iterations
3512 && TREE_CODE (loop->nb_iterations) == INTEGER_CST)
3514 loop->any_upper_bound = true;
3515 loop->nb_iterations_upper_bound = wi::to_widest (loop->nb_iterations);
3519 /* Sets NIT to the estimated number of executions of the latch of the
3520 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
3521 large as the number of iterations. If we have no reliable estimate,
3522 the function returns false, otherwise returns true. */
3524 bool
3525 estimated_loop_iterations (struct loop *loop, widest_int *nit)
3527 /* When SCEV information is available, try to update loop iterations
3528 estimate. Otherwise just return whatever we recorded earlier. */
3529 if (scev_initialized_p ())
3530 estimate_numbers_of_iterations_loop (loop);
3532 return (get_estimated_loop_iterations (loop, nit));
3535 /* Similar to estimated_loop_iterations, but returns the estimate only
3536 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3537 on the number of iterations of LOOP could not be derived, returns -1. */
3539 HOST_WIDE_INT
3540 estimated_loop_iterations_int (struct loop *loop)
3542 widest_int nit;
3543 HOST_WIDE_INT hwi_nit;
3545 if (!estimated_loop_iterations (loop, &nit))
3546 return -1;
3548 if (!wi::fits_shwi_p (nit))
3549 return -1;
3550 hwi_nit = nit.to_shwi ();
3552 return hwi_nit < 0 ? -1 : hwi_nit;
3556 /* Sets NIT to an upper bound for the maximum number of executions of the
3557 latch of the LOOP. If we have no reliable estimate, the function returns
3558 false, otherwise returns true. */
3560 bool
3561 max_loop_iterations (struct loop *loop, widest_int *nit)
3563 /* When SCEV information is available, try to update loop iterations
3564 estimate. Otherwise just return whatever we recorded earlier. */
3565 if (scev_initialized_p ())
3566 estimate_numbers_of_iterations_loop (loop);
3568 return get_max_loop_iterations (loop, nit);
3571 /* Similar to max_loop_iterations, but returns the estimate only
3572 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3573 on the number of iterations of LOOP could not be derived, returns -1. */
3575 HOST_WIDE_INT
3576 max_loop_iterations_int (struct loop *loop)
3578 widest_int nit;
3579 HOST_WIDE_INT hwi_nit;
3581 if (!max_loop_iterations (loop, &nit))
3582 return -1;
3584 if (!wi::fits_shwi_p (nit))
3585 return -1;
3586 hwi_nit = nit.to_shwi ();
3588 return hwi_nit < 0 ? -1 : hwi_nit;
3591 /* Returns an estimate for the number of executions of statements
3592 in the LOOP. For statements before the loop exit, this exceeds
3593 the number of execution of the latch by one. */
3595 HOST_WIDE_INT
3596 estimated_stmt_executions_int (struct loop *loop)
3598 HOST_WIDE_INT nit = estimated_loop_iterations_int (loop);
3599 HOST_WIDE_INT snit;
3601 if (nit == -1)
3602 return -1;
3604 snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1);
3606 /* If the computation overflows, return -1. */
3607 return snit < 0 ? -1 : snit;
3610 /* Sets NIT to the estimated maximum number of executions of the latch of the
3611 LOOP, plus one. If we have no reliable estimate, the function returns
3612 false, otherwise returns true. */
3614 bool
3615 max_stmt_executions (struct loop *loop, widest_int *nit)
3617 widest_int nit_minus_one;
3619 if (!max_loop_iterations (loop, nit))
3620 return false;
3622 nit_minus_one = *nit;
3624 *nit += 1;
3626 return wi::gtu_p (*nit, nit_minus_one);
3629 /* Sets NIT to the estimated number of executions of the latch of the
3630 LOOP, plus one. If we have no reliable estimate, the function returns
3631 false, otherwise returns true. */
3633 bool
3634 estimated_stmt_executions (struct loop *loop, widest_int *nit)
3636 widest_int nit_minus_one;
3638 if (!estimated_loop_iterations (loop, nit))
3639 return false;
3641 nit_minus_one = *nit;
3643 *nit += 1;
3645 return wi::gtu_p (*nit, nit_minus_one);
3648 /* Records estimates on numbers of iterations of loops. */
3650 void
3651 estimate_numbers_of_iterations (void)
3653 struct loop *loop;
3655 /* We don't want to issue signed overflow warnings while getting
3656 loop iteration estimates. */
3657 fold_defer_overflow_warnings ();
3659 FOR_EACH_LOOP (loop, 0)
3661 estimate_numbers_of_iterations_loop (loop);
3664 fold_undefer_and_ignore_overflow_warnings ();
3667 /* Returns true if statement S1 dominates statement S2. */
3669 bool
3670 stmt_dominates_stmt_p (gimple s1, gimple s2)
3672 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
3674 if (!bb1
3675 || s1 == s2)
3676 return true;
3678 if (bb1 == bb2)
3680 gimple_stmt_iterator bsi;
3682 if (gimple_code (s2) == GIMPLE_PHI)
3683 return false;
3685 if (gimple_code (s1) == GIMPLE_PHI)
3686 return true;
3688 for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
3689 if (gsi_stmt (bsi) == s1)
3690 return true;
3692 return false;
3695 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
3698 /* Returns true when we can prove that the number of executions of
3699 STMT in the loop is at most NITER, according to the bound on
3700 the number of executions of the statement NITER_BOUND->stmt recorded in
3701 NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
3703 ??? This code can become quite a CPU hog - we can have many bounds,
3704 and large basic block forcing stmt_dominates_stmt_p to be queried
3705 many times on a large basic blocks, so the whole thing is O(n^2)
3706 for scev_probably_wraps_p invocation (that can be done n times).
3708 It would make more sense (and give better answers) to remember BB
3709 bounds computed by discover_iteration_bound_by_body_walk. */
3711 static bool
3712 n_of_executions_at_most (gimple stmt,
3713 struct nb_iter_bound *niter_bound,
3714 tree niter)
3716 widest_int bound = niter_bound->bound;
3717 tree nit_type = TREE_TYPE (niter), e;
3718 enum tree_code cmp;
3720 gcc_assert (TYPE_UNSIGNED (nit_type));
3722 /* If the bound does not even fit into NIT_TYPE, it cannot tell us that
3723 the number of iterations is small. */
3724 if (!wi::fits_to_tree_p (bound, nit_type))
3725 return false;
3727 /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
3728 times. This means that:
3730 -- if NITER_BOUND->is_exit is true, then everything after
3731 it at most NITER_BOUND->bound times.
3733 -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT
3734 is executed, then NITER_BOUND->stmt is executed as well in the same
3735 iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
3737 If we can determine that NITER_BOUND->stmt is always executed
3738 after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
3739 We conclude that if both statements belong to the same
3740 basic block and STMT is before NITER_BOUND->stmt and there are no
3741 statements with side effects in between. */
3743 if (niter_bound->is_exit)
3745 if (stmt == niter_bound->stmt
3746 || !stmt_dominates_stmt_p (niter_bound->stmt, stmt))
3747 return false;
3748 cmp = GE_EXPR;
3750 else
3752 if (!stmt_dominates_stmt_p (niter_bound->stmt, stmt))
3754 gimple_stmt_iterator bsi;
3755 if (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
3756 || gimple_code (stmt) == GIMPLE_PHI
3757 || gimple_code (niter_bound->stmt) == GIMPLE_PHI)
3758 return false;
3760 /* By stmt_dominates_stmt_p we already know that STMT appears
3761 before NITER_BOUND->STMT. Still need to test that the loop
3762 can not be terinated by a side effect in between. */
3763 for (bsi = gsi_for_stmt (stmt); gsi_stmt (bsi) != niter_bound->stmt;
3764 gsi_next (&bsi))
3765 if (gimple_has_side_effects (gsi_stmt (bsi)))
3766 return false;
3767 bound += 1;
3768 if (bound == 0
3769 || !wi::fits_to_tree_p (bound, nit_type))
3770 return false;
3772 cmp = GT_EXPR;
3775 e = fold_binary (cmp, boolean_type_node,
3776 niter, wide_int_to_tree (nit_type, bound));
3777 return e && integer_nonzerop (e);
3780 /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
3782 bool
3783 nowrap_type_p (tree type)
3785 if (INTEGRAL_TYPE_P (type)
3786 && TYPE_OVERFLOW_UNDEFINED (type))
3787 return true;
3789 if (POINTER_TYPE_P (type))
3790 return true;
3792 return false;
3795 /* Return true if we can prove LOOP is exited before evolution of induction
3796 variabled {BASE, STEP} overflows with respect to its type bound. */
3798 static bool
3799 loop_exits_before_overflow (tree base, tree step,
3800 gimple at_stmt, struct loop *loop)
3802 widest_int niter;
3803 struct control_iv *civ;
3804 struct nb_iter_bound *bound;
3805 tree e, delta, step_abs, unsigned_base;
3806 tree type = TREE_TYPE (step);
3807 tree unsigned_type, valid_niter;
3809 /* Don't issue signed overflow warnings. */
3810 fold_defer_overflow_warnings ();
3812 /* Compute the number of iterations before we reach the bound of the
3813 type, and verify that the loop is exited before this occurs. */
3814 unsigned_type = unsigned_type_for (type);
3815 unsigned_base = fold_convert (unsigned_type, base);
3817 if (tree_int_cst_sign_bit (step))
3819 tree extreme = fold_convert (unsigned_type,
3820 lower_bound_in_type (type, type));
3821 delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
3822 step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
3823 fold_convert (unsigned_type, step));
3825 else
3827 tree extreme = fold_convert (unsigned_type,
3828 upper_bound_in_type (type, type));
3829 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
3830 step_abs = fold_convert (unsigned_type, step);
3833 valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
3835 estimate_numbers_of_iterations_loop (loop);
3837 if (max_loop_iterations (loop, &niter)
3838 && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
3839 && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
3840 wide_int_to_tree (TREE_TYPE (valid_niter),
3841 niter))) != NULL
3842 && integer_nonzerop (e))
3844 fold_undefer_and_ignore_overflow_warnings ();
3845 return true;
3847 if (at_stmt)
3848 for (bound = loop->bounds; bound; bound = bound->next)
3850 if (n_of_executions_at_most (at_stmt, bound, valid_niter))
3852 fold_undefer_and_ignore_overflow_warnings ();
3853 return true;
3856 fold_undefer_and_ignore_overflow_warnings ();
3858 /* Try to prove loop is exited before {base, step} overflows with the
3859 help of analyzed loop control IV. This is done only for IVs with
3860 constant step because otherwise we don't have the information. */
3861 if (TREE_CODE (step) == INTEGER_CST)
3862 for (civ = loop->control_ivs; civ; civ = civ->next)
3864 enum tree_code code;
3865 tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
3867 /* Have to consider type difference because operand_equal_p ignores
3868 that for constants. */
3869 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
3870 || element_precision (type) != element_precision (civ_type))
3871 continue;
3873 /* Only consider control IV with same step. */
3874 if (!operand_equal_p (step, civ->step, 0))
3875 continue;
3877 /* Done proving if this is a no-overflow control IV. */
3878 if (operand_equal_p (base, civ->base, 0))
3879 return true;
3881 /* If this is a before stepping control IV, in other words, we have
3883 {civ_base, step} = {base + step, step}
3885 Because civ {base + step, step} doesn't overflow during loop
3886 iterations, {base, step} will not overflow if we can prove the
3887 operation "base + step" does not overflow. Specifically, we try
3888 to prove below conditions are satisfied:
3890 base <= UPPER_BOUND (type) - step ;;step > 0
3891 base >= LOWER_BOUND (type) - step ;;step < 0
3893 by proving the reverse conditions are false using loop's initial
3894 condition. */
3895 stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
3896 if (operand_equal_p (stepped, civ->base, 0))
3898 if (tree_int_cst_sign_bit (step))
3900 code = LT_EXPR;
3901 extreme = lower_bound_in_type (type, type);
3903 else
3905 code = GT_EXPR;
3906 extreme = upper_bound_in_type (type, type);
3908 extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
3909 e = fold_build2 (code, boolean_type_node, base, extreme);
3910 e = simplify_using_initial_conditions (loop, e);
3911 if (integer_zerop (e))
3912 return true;
3914 continue;
3917 /* Similar to above, only in this case we have:
3919 {civ_base, step} = {(signed T)((unsigned T)base + step), step}
3920 && TREE_TYPE (civ_base) = signed T.
3922 We prove that below condition is satisfied:
3924 (signed T)((unsigned T)base + step)
3925 == (signed T)(unsigned T)base + step
3926 == base + step
3928 because of exact the same reason as above. This also proves
3929 there is no overflow in the operation "base + step", thus the
3930 induction variable {base, step} during loop iterations.
3932 This is necessary to handle cases as below:
3934 int foo (int *a, signed char s, signed char l)
3936 signed char i;
3937 for (i = s; i < l; i++)
3938 a[i] = 0;
3939 return 0;
3942 The variable I is firstly converted to type unsigned char,
3943 incremented, then converted back to type signed char. */
3944 if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
3945 continue;
3946 e = TREE_OPERAND (civ->base, 0);
3947 if (TREE_CODE (e) != PLUS_EXPR
3948 || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
3949 || !operand_equal_p (step,
3950 fold_convert (type,
3951 TREE_OPERAND (e, 1)), 0))
3952 continue;
3953 e = TREE_OPERAND (e, 0);
3954 if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
3955 continue;
3956 e = TREE_OPERAND (e, 0);
3957 gcc_assert (operand_equal_p (e, base, 0));
3958 if (tree_int_cst_sign_bit (step))
3960 code = LT_EXPR;
3961 extreme = lower_bound_in_type (type, type);
3963 else
3965 code = GT_EXPR;
3966 extreme = upper_bound_in_type (type, type);
3968 extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
3969 e = fold_build2 (code, boolean_type_node, base, extreme);
3970 e = simplify_using_initial_conditions (loop, e);
3971 if (integer_zerop (e))
3972 return true;
3975 return false;
3978 /* Return false only when the induction variable BASE + STEP * I is
3979 known to not overflow: i.e. when the number of iterations is small
3980 enough with respect to the step and initial condition in order to
3981 keep the evolution confined in TYPEs bounds. Return true when the
3982 iv is known to overflow or when the property is not computable.
3984 USE_OVERFLOW_SEMANTICS is true if this function should assume that
3985 the rules for overflow of the given language apply (e.g., that signed
3986 arithmetics in C does not overflow). */
3988 bool
3989 scev_probably_wraps_p (tree base, tree step,
3990 gimple at_stmt, struct loop *loop,
3991 bool use_overflow_semantics)
3993 /* FIXME: We really need something like
3994 http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
3996 We used to test for the following situation that frequently appears
3997 during address arithmetics:
3999 D.1621_13 = (long unsigned intD.4) D.1620_12;
4000 D.1622_14 = D.1621_13 * 8;
4001 D.1623_15 = (doubleD.29 *) D.1622_14;
4003 And derived that the sequence corresponding to D_14
4004 can be proved to not wrap because it is used for computing a
4005 memory access; however, this is not really the case -- for example,
4006 if D_12 = (unsigned char) [254,+,1], then D_14 has values
4007 2032, 2040, 0, 8, ..., but the code is still legal. */
4009 if (chrec_contains_undetermined (base)
4010 || chrec_contains_undetermined (step))
4011 return true;
4013 if (integer_zerop (step))
4014 return false;
4016 /* If we can use the fact that signed and pointer arithmetics does not
4017 wrap, we are done. */
4018 if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base)))
4019 return false;
4021 /* To be able to use estimates on number of iterations of the loop,
4022 we must have an upper bound on the absolute value of the step. */
4023 if (TREE_CODE (step) != INTEGER_CST)
4024 return true;
4026 if (loop_exits_before_overflow (base, step, at_stmt, loop))
4027 return false;
4029 /* At this point we still don't have a proof that the iv does not
4030 overflow: give up. */
4031 return true;
4034 /* Frees the information on upper bounds on numbers of iterations of LOOP. */
4036 void
4037 free_numbers_of_iterations_estimates_loop (struct loop *loop)
4039 struct control_iv *civ;
4040 struct nb_iter_bound *bound;
4042 loop->nb_iterations = NULL;
4043 loop->estimate_state = EST_NOT_COMPUTED;
4044 for (bound = loop->bounds; bound;)
4046 struct nb_iter_bound *next = bound->next;
4047 ggc_free (bound);
4048 bound = next;
4050 loop->bounds = NULL;
4052 for (civ = loop->control_ivs; civ;)
4054 struct control_iv *next = civ->next;
4055 ggc_free (civ);
4056 civ = next;
4058 loop->control_ivs = NULL;
4061 /* Frees the information on upper bounds on numbers of iterations of loops. */
4063 void
4064 free_numbers_of_iterations_estimates (void)
4066 struct loop *loop;
4068 FOR_EACH_LOOP (loop, 0)
4070 free_numbers_of_iterations_estimates_loop (loop);
4074 /* Substitute value VAL for ssa name NAME inside expressions held
4075 at LOOP. */
4077 void
4078 substitute_in_loop_info (struct loop *loop, tree name, tree val)
4080 loop->nb_iterations = simplify_replace_tree (loop->nb_iterations, name, val);