PR c/68024
[official-gcc.git] / gcc / tree-ssa-loop-niter.c
blob5125af4d7bdaa8b36d8902c6b9f61aca51e57f81
1 /* Functions to determine/estimate number of iterations of a loop.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "rtl.h"
27 #include "ssa.h"
28 #include "alias.h"
29 #include "stor-layout.h"
30 #include "fold-const.h"
31 #include "calls.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expmed.h"
35 #include "dojump.h"
36 #include "explow.h"
37 #include "emit-rtl.h"
38 #include "varasm.h"
39 #include "stmt.h"
40 #include "expr.h"
41 #include "tm_p.h"
42 #include "gimple-pretty-print.h"
43 #include "intl.h"
44 #include "internal-fn.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "tree-cfg.h"
48 #include "tree-ssa-loop-ivopts.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "dumpfile.h"
52 #include "cfgloop.h"
53 #include "tree-chrec.h"
54 #include "tree-scalar-evolution.h"
55 #include "tree-data-ref.h"
56 #include "params.h"
57 #include "diagnostic-core.h"
58 #include "tree-inline.h"
59 #include "tree-pass.h"
60 #include "wide-int-print.h"
63 /* The maximum number of dominator BBs we search for conditions
64 of loop header copies we use for simplifying a conditional
65 expression. */
66 #define MAX_DOMINATORS_TO_WALK 8
70 Analysis of number of iterations of an affine exit test.
74 /* Bounds on some value, BELOW <= X <= UP. */
76 struct bounds
78 mpz_t below, up;
82 /* Splits expression EXPR to a variable part VAR and constant OFFSET. */
84 static void
85 split_to_var_and_offset (tree expr, tree *var, mpz_t offset)
87 tree type = TREE_TYPE (expr);
88 tree op0, op1;
89 bool negate = false;
91 *var = expr;
92 mpz_set_ui (offset, 0);
94 switch (TREE_CODE (expr))
96 case MINUS_EXPR:
97 negate = true;
98 /* Fallthru. */
100 case PLUS_EXPR:
101 case POINTER_PLUS_EXPR:
102 op0 = TREE_OPERAND (expr, 0);
103 op1 = TREE_OPERAND (expr, 1);
105 if (TREE_CODE (op1) != INTEGER_CST)
106 break;
108 *var = op0;
109 /* Always sign extend the offset. */
110 wi::to_mpz (op1, offset, SIGNED);
111 if (negate)
112 mpz_neg (offset, offset);
113 break;
115 case INTEGER_CST:
116 *var = build_int_cst_type (type, 0);
117 wi::to_mpz (expr, offset, TYPE_SIGN (type));
118 break;
120 default:
121 break;
125 /* From condition C0 CMP C1 derives information regarding the value range
126 of VAR, which is of TYPE. Results are stored in to BELOW and UP. */
128 static void
129 refine_value_range_using_guard (tree type, tree var,
130 tree c0, enum tree_code cmp, tree c1,
131 mpz_t below, mpz_t up)
133 tree varc0, varc1, ctype;
134 mpz_t offc0, offc1;
135 mpz_t mint, maxt, minc1, maxc1;
136 wide_int minv, maxv;
137 bool no_wrap = nowrap_type_p (type);
138 bool c0_ok, c1_ok;
139 signop sgn = TYPE_SIGN (type);
141 switch (cmp)
143 case LT_EXPR:
144 case LE_EXPR:
145 case GT_EXPR:
146 case GE_EXPR:
147 STRIP_SIGN_NOPS (c0);
148 STRIP_SIGN_NOPS (c1);
149 ctype = TREE_TYPE (c0);
150 if (!useless_type_conversion_p (ctype, type))
151 return;
153 break;
155 case EQ_EXPR:
156 /* We could derive quite precise information from EQ_EXPR, however,
157 such a guard is unlikely to appear, so we do not bother with
158 handling it. */
159 return;
161 case NE_EXPR:
162 /* NE_EXPR comparisons do not contain much of useful information,
163 except for cases of comparing with bounds. */
164 if (TREE_CODE (c1) != INTEGER_CST
165 || !INTEGRAL_TYPE_P (type))
166 return;
168 /* Ensure that the condition speaks about an expression in the same
169 type as X and Y. */
170 ctype = TREE_TYPE (c0);
171 if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
172 return;
173 c0 = fold_convert (type, c0);
174 c1 = fold_convert (type, c1);
176 if (operand_equal_p (var, c0, 0))
178 mpz_t valc1;
180 /* Case of comparing VAR with its below/up bounds. */
181 mpz_init (valc1);
182 wi::to_mpz (c1, valc1, TYPE_SIGN (type));
183 if (mpz_cmp (valc1, below) == 0)
184 cmp = GT_EXPR;
185 if (mpz_cmp (valc1, up) == 0)
186 cmp = LT_EXPR;
188 mpz_clear (valc1);
190 else
192 /* Case of comparing with the bounds of the type. */
193 wide_int min = wi::min_value (type);
194 wide_int max = wi::max_value (type);
196 if (wi::eq_p (c1, min))
197 cmp = GT_EXPR;
198 if (wi::eq_p (c1, max))
199 cmp = LT_EXPR;
202 /* Quick return if no useful information. */
203 if (cmp == NE_EXPR)
204 return;
206 break;
208 default:
209 return;
212 mpz_init (offc0);
213 mpz_init (offc1);
214 split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
215 split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
217 /* We are only interested in comparisons of expressions based on VAR. */
218 if (operand_equal_p (var, varc1, 0))
220 std::swap (varc0, varc1);
221 mpz_swap (offc0, offc1);
222 cmp = swap_tree_comparison (cmp);
224 else if (!operand_equal_p (var, varc0, 0))
226 mpz_clear (offc0);
227 mpz_clear (offc1);
228 return;
231 mpz_init (mint);
232 mpz_init (maxt);
233 get_type_static_bounds (type, mint, maxt);
234 mpz_init (minc1);
235 mpz_init (maxc1);
236 /* Setup range information for varc1. */
237 if (integer_zerop (varc1))
239 wi::to_mpz (integer_zero_node, minc1, TYPE_SIGN (type));
240 wi::to_mpz (integer_zero_node, maxc1, TYPE_SIGN (type));
242 else if (TREE_CODE (varc1) == SSA_NAME
243 && INTEGRAL_TYPE_P (type)
244 && get_range_info (varc1, &minv, &maxv) == VR_RANGE)
246 gcc_assert (wi::le_p (minv, maxv, sgn));
247 wi::to_mpz (minv, minc1, sgn);
248 wi::to_mpz (maxv, maxc1, sgn);
250 else
252 mpz_set (minc1, mint);
253 mpz_set (maxc1, maxt);
256 /* Compute valid range information for varc1 + offc1. Note nothing
257 useful can be derived if it overflows or underflows. Overflow or
258 underflow could happen when:
260 offc1 > 0 && varc1 + offc1 > MAX_VAL (type)
261 offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */
262 mpz_add (minc1, minc1, offc1);
263 mpz_add (maxc1, maxc1, offc1);
264 c1_ok = (no_wrap
265 || mpz_sgn (offc1) == 0
266 || (mpz_sgn (offc1) < 0 && mpz_cmp (minc1, mint) >= 0)
267 || (mpz_sgn (offc1) > 0 && mpz_cmp (maxc1, maxt) <= 0));
268 if (!c1_ok)
269 goto end;
271 if (mpz_cmp (minc1, mint) < 0)
272 mpz_set (minc1, mint);
273 if (mpz_cmp (maxc1, maxt) > 0)
274 mpz_set (maxc1, maxt);
276 if (cmp == LT_EXPR)
278 cmp = LE_EXPR;
279 mpz_sub_ui (maxc1, maxc1, 1);
281 if (cmp == GT_EXPR)
283 cmp = GE_EXPR;
284 mpz_add_ui (minc1, minc1, 1);
287 /* Compute range information for varc0. If there is no overflow,
288 the condition implied that
290 (varc0) cmp (varc1 + offc1 - offc0)
292 We can possibly improve the upper bound of varc0 if cmp is LE_EXPR,
293 or the below bound if cmp is GE_EXPR.
295 To prove there is no overflow/underflow, we need to check below
296 four cases:
297 1) cmp == LE_EXPR && offc0 > 0
299 (varc0 + offc0) doesn't overflow
300 && (varc1 + offc1 - offc0) doesn't underflow
302 2) cmp == LE_EXPR && offc0 < 0
304 (varc0 + offc0) doesn't underflow
305 && (varc1 + offc1 - offc0) doesn't overfloe
307 In this case, (varc0 + offc0) will never underflow if we can
308 prove (varc1 + offc1 - offc0) doesn't overflow.
310 3) cmp == GE_EXPR && offc0 < 0
312 (varc0 + offc0) doesn't underflow
313 && (varc1 + offc1 - offc0) doesn't overflow
315 4) cmp == GE_EXPR && offc0 > 0
317 (varc0 + offc0) doesn't overflow
318 && (varc1 + offc1 - offc0) doesn't underflow
320 In this case, (varc0 + offc0) will never overflow if we can
321 prove (varc1 + offc1 - offc0) doesn't underflow.
323 Note we only handle case 2 and 4 in below code. */
325 mpz_sub (minc1, minc1, offc0);
326 mpz_sub (maxc1, maxc1, offc0);
327 c0_ok = (no_wrap
328 || mpz_sgn (offc0) == 0
329 || (cmp == LE_EXPR
330 && mpz_sgn (offc0) < 0 && mpz_cmp (maxc1, maxt) <= 0)
331 || (cmp == GE_EXPR
332 && mpz_sgn (offc0) > 0 && mpz_cmp (minc1, mint) >= 0));
333 if (!c0_ok)
334 goto end;
336 if (cmp == LE_EXPR)
338 if (mpz_cmp (up, maxc1) > 0)
339 mpz_set (up, maxc1);
341 else
343 if (mpz_cmp (below, minc1) < 0)
344 mpz_set (below, minc1);
347 end:
348 mpz_clear (mint);
349 mpz_clear (maxt);
350 mpz_clear (minc1);
351 mpz_clear (maxc1);
352 mpz_clear (offc0);
353 mpz_clear (offc1);
356 /* Stores estimate on the minimum/maximum value of the expression VAR + OFF
357 in TYPE to MIN and MAX. */
359 static void
360 determine_value_range (struct loop *loop, tree type, tree var, mpz_t off,
361 mpz_t min, mpz_t max)
363 int cnt = 0;
364 mpz_t minm, maxm;
365 basic_block bb;
366 wide_int minv, maxv;
367 enum value_range_type rtype = VR_VARYING;
369 /* If the expression is a constant, we know its value exactly. */
370 if (integer_zerop (var))
372 mpz_set (min, off);
373 mpz_set (max, off);
374 return;
377 get_type_static_bounds (type, min, max);
379 /* See if we have some range info from VRP. */
380 if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type))
382 edge e = loop_preheader_edge (loop);
383 signop sgn = TYPE_SIGN (type);
384 gphi_iterator gsi;
386 /* Either for VAR itself... */
387 rtype = get_range_info (var, &minv, &maxv);
388 /* Or for PHI results in loop->header where VAR is used as
389 PHI argument from the loop preheader edge. */
390 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
392 gphi *phi = gsi.phi ();
393 wide_int minc, maxc;
394 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var
395 && (get_range_info (gimple_phi_result (phi), &minc, &maxc)
396 == VR_RANGE))
398 if (rtype != VR_RANGE)
400 rtype = VR_RANGE;
401 minv = minc;
402 maxv = maxc;
404 else
406 minv = wi::max (minv, minc, sgn);
407 maxv = wi::min (maxv, maxc, sgn);
408 /* If the PHI result range are inconsistent with
409 the VAR range, give up on looking at the PHI
410 results. This can happen if VR_UNDEFINED is
411 involved. */
412 if (wi::gt_p (minv, maxv, sgn))
414 rtype = get_range_info (var, &minv, &maxv);
415 break;
420 mpz_init (minm);
421 mpz_init (maxm);
422 if (rtype != VR_RANGE)
424 mpz_set (minm, min);
425 mpz_set (maxm, max);
427 else
429 gcc_assert (wi::le_p (minv, maxv, sgn));
430 wi::to_mpz (minv, minm, sgn);
431 wi::to_mpz (maxv, maxm, sgn);
433 /* Now walk the dominators of the loop header and use the entry
434 guards to refine the estimates. */
435 for (bb = loop->header;
436 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
437 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
439 edge e;
440 tree c0, c1;
441 gimple *cond;
442 enum tree_code cmp;
444 if (!single_pred_p (bb))
445 continue;
446 e = single_pred_edge (bb);
448 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
449 continue;
451 cond = last_stmt (e->src);
452 c0 = gimple_cond_lhs (cond);
453 cmp = gimple_cond_code (cond);
454 c1 = gimple_cond_rhs (cond);
456 if (e->flags & EDGE_FALSE_VALUE)
457 cmp = invert_tree_comparison (cmp, false);
459 refine_value_range_using_guard (type, var, c0, cmp, c1, minm, maxm);
460 ++cnt;
463 mpz_add (minm, minm, off);
464 mpz_add (maxm, maxm, off);
465 /* If the computation may not wrap or off is zero, then this
466 is always fine. If off is negative and minv + off isn't
467 smaller than type's minimum, or off is positive and
468 maxv + off isn't bigger than type's maximum, use the more
469 precise range too. */
470 if (nowrap_type_p (type)
471 || mpz_sgn (off) == 0
472 || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0)
473 || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0))
475 mpz_set (min, minm);
476 mpz_set (max, maxm);
477 mpz_clear (minm);
478 mpz_clear (maxm);
479 return;
481 mpz_clear (minm);
482 mpz_clear (maxm);
485 /* If the computation may wrap, we know nothing about the value, except for
486 the range of the type. */
487 if (!nowrap_type_p (type))
488 return;
490 /* Since the addition of OFF does not wrap, if OFF is positive, then we may
491 add it to MIN, otherwise to MAX. */
492 if (mpz_sgn (off) < 0)
493 mpz_add (max, max, off);
494 else
495 mpz_add (min, min, off);
498 /* Stores the bounds on the difference of the values of the expressions
499 (var + X) and (var + Y), computed in TYPE, to BNDS. */
501 static void
502 bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y,
503 bounds *bnds)
505 int rel = mpz_cmp (x, y);
506 bool may_wrap = !nowrap_type_p (type);
507 mpz_t m;
509 /* If X == Y, then the expressions are always equal.
510 If X > Y, there are the following possibilities:
511 a) neither of var + X and var + Y overflow or underflow, or both of
512 them do. Then their difference is X - Y.
513 b) var + X overflows, and var + Y does not. Then the values of the
514 expressions are var + X - M and var + Y, where M is the range of
515 the type, and their difference is X - Y - M.
516 c) var + Y underflows and var + X does not. Their difference again
517 is M - X + Y.
518 Therefore, if the arithmetics in type does not overflow, then the
519 bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y)
520 Similarly, if X < Y, the bounds are either (X - Y, X - Y) or
521 (X - Y, X - Y + M). */
523 if (rel == 0)
525 mpz_set_ui (bnds->below, 0);
526 mpz_set_ui (bnds->up, 0);
527 return;
530 mpz_init (m);
531 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED);
532 mpz_add_ui (m, m, 1);
533 mpz_sub (bnds->up, x, y);
534 mpz_set (bnds->below, bnds->up);
536 if (may_wrap)
538 if (rel > 0)
539 mpz_sub (bnds->below, bnds->below, m);
540 else
541 mpz_add (bnds->up, bnds->up, m);
544 mpz_clear (m);
547 /* From condition C0 CMP C1 derives information regarding the
548 difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE,
549 and stores it to BNDS. */
551 static void
552 refine_bounds_using_guard (tree type, tree varx, mpz_t offx,
553 tree vary, mpz_t offy,
554 tree c0, enum tree_code cmp, tree c1,
555 bounds *bnds)
557 tree varc0, varc1, ctype;
558 mpz_t offc0, offc1, loffx, loffy, bnd;
559 bool lbound = false;
560 bool no_wrap = nowrap_type_p (type);
561 bool x_ok, y_ok;
563 switch (cmp)
565 case LT_EXPR:
566 case LE_EXPR:
567 case GT_EXPR:
568 case GE_EXPR:
569 STRIP_SIGN_NOPS (c0);
570 STRIP_SIGN_NOPS (c1);
571 ctype = TREE_TYPE (c0);
572 if (!useless_type_conversion_p (ctype, type))
573 return;
575 break;
577 case EQ_EXPR:
578 /* We could derive quite precise information from EQ_EXPR, however, such
579 a guard is unlikely to appear, so we do not bother with handling
580 it. */
581 return;
583 case NE_EXPR:
584 /* NE_EXPR comparisons do not contain much of useful information, except for
585 special case of comparing with the bounds of the type. */
586 if (TREE_CODE (c1) != INTEGER_CST
587 || !INTEGRAL_TYPE_P (type))
588 return;
590 /* Ensure that the condition speaks about an expression in the same type
591 as X and Y. */
592 ctype = TREE_TYPE (c0);
593 if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
594 return;
595 c0 = fold_convert (type, c0);
596 c1 = fold_convert (type, c1);
598 if (TYPE_MIN_VALUE (type)
599 && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
601 cmp = GT_EXPR;
602 break;
604 if (TYPE_MAX_VALUE (type)
605 && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
607 cmp = LT_EXPR;
608 break;
611 return;
612 default:
613 return;
616 mpz_init (offc0);
617 mpz_init (offc1);
618 split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
619 split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
621 /* We are only interested in comparisons of expressions based on VARX and
622 VARY. TODO -- we might also be able to derive some bounds from
623 expressions containing just one of the variables. */
625 if (operand_equal_p (varx, varc1, 0))
627 std::swap (varc0, varc1);
628 mpz_swap (offc0, offc1);
629 cmp = swap_tree_comparison (cmp);
632 if (!operand_equal_p (varx, varc0, 0)
633 || !operand_equal_p (vary, varc1, 0))
634 goto end;
636 mpz_init_set (loffx, offx);
637 mpz_init_set (loffy, offy);
639 if (cmp == GT_EXPR || cmp == GE_EXPR)
641 std::swap (varx, vary);
642 mpz_swap (offc0, offc1);
643 mpz_swap (loffx, loffy);
644 cmp = swap_tree_comparison (cmp);
645 lbound = true;
648 /* If there is no overflow, the condition implies that
650 (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0).
652 The overflows and underflows may complicate things a bit; each
653 overflow decreases the appropriate offset by M, and underflow
654 increases it by M. The above inequality would not necessarily be
655 true if
657 -- VARX + OFFX underflows and VARX + OFFC0 does not, or
658 VARX + OFFC0 overflows, but VARX + OFFX does not.
659 This may only happen if OFFX < OFFC0.
660 -- VARY + OFFY overflows and VARY + OFFC1 does not, or
661 VARY + OFFC1 underflows and VARY + OFFY does not.
662 This may only happen if OFFY > OFFC1. */
664 if (no_wrap)
666 x_ok = true;
667 y_ok = true;
669 else
671 x_ok = (integer_zerop (varx)
672 || mpz_cmp (loffx, offc0) >= 0);
673 y_ok = (integer_zerop (vary)
674 || mpz_cmp (loffy, offc1) <= 0);
677 if (x_ok && y_ok)
679 mpz_init (bnd);
680 mpz_sub (bnd, loffx, loffy);
681 mpz_add (bnd, bnd, offc1);
682 mpz_sub (bnd, bnd, offc0);
684 if (cmp == LT_EXPR)
685 mpz_sub_ui (bnd, bnd, 1);
687 if (lbound)
689 mpz_neg (bnd, bnd);
690 if (mpz_cmp (bnds->below, bnd) < 0)
691 mpz_set (bnds->below, bnd);
693 else
695 if (mpz_cmp (bnd, bnds->up) < 0)
696 mpz_set (bnds->up, bnd);
698 mpz_clear (bnd);
701 mpz_clear (loffx);
702 mpz_clear (loffy);
703 end:
704 mpz_clear (offc0);
705 mpz_clear (offc1);
708 /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS.
709 The subtraction is considered to be performed in arbitrary precision,
710 without overflows.
712 We do not attempt to be too clever regarding the value ranges of X and
713 Y; most of the time, they are just integers or ssa names offsetted by
714 integer. However, we try to use the information contained in the
715 comparisons before the loop (usually created by loop header copying). */
717 static void
718 bound_difference (struct loop *loop, tree x, tree y, bounds *bnds)
720 tree type = TREE_TYPE (x);
721 tree varx, vary;
722 mpz_t offx, offy;
723 mpz_t minx, maxx, miny, maxy;
724 int cnt = 0;
725 edge e;
726 basic_block bb;
727 tree c0, c1;
728 gimple *cond;
729 enum tree_code cmp;
731 /* Get rid of unnecessary casts, but preserve the value of
732 the expressions. */
733 STRIP_SIGN_NOPS (x);
734 STRIP_SIGN_NOPS (y);
736 mpz_init (bnds->below);
737 mpz_init (bnds->up);
738 mpz_init (offx);
739 mpz_init (offy);
740 split_to_var_and_offset (x, &varx, offx);
741 split_to_var_and_offset (y, &vary, offy);
743 if (!integer_zerop (varx)
744 && operand_equal_p (varx, vary, 0))
746 /* Special case VARX == VARY -- we just need to compare the
747 offsets. The matters are a bit more complicated in the
748 case addition of offsets may wrap. */
749 bound_difference_of_offsetted_base (type, offx, offy, bnds);
751 else
753 /* Otherwise, use the value ranges to determine the initial
754 estimates on below and up. */
755 mpz_init (minx);
756 mpz_init (maxx);
757 mpz_init (miny);
758 mpz_init (maxy);
759 determine_value_range (loop, type, varx, offx, minx, maxx);
760 determine_value_range (loop, type, vary, offy, miny, maxy);
762 mpz_sub (bnds->below, minx, maxy);
763 mpz_sub (bnds->up, maxx, miny);
764 mpz_clear (minx);
765 mpz_clear (maxx);
766 mpz_clear (miny);
767 mpz_clear (maxy);
770 /* If both X and Y are constants, we cannot get any more precise. */
771 if (integer_zerop (varx) && integer_zerop (vary))
772 goto end;
774 /* Now walk the dominators of the loop header and use the entry
775 guards to refine the estimates. */
776 for (bb = loop->header;
777 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
778 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
780 if (!single_pred_p (bb))
781 continue;
782 e = single_pred_edge (bb);
784 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
785 continue;
787 cond = last_stmt (e->src);
788 c0 = gimple_cond_lhs (cond);
789 cmp = gimple_cond_code (cond);
790 c1 = gimple_cond_rhs (cond);
792 if (e->flags & EDGE_FALSE_VALUE)
793 cmp = invert_tree_comparison (cmp, false);
795 refine_bounds_using_guard (type, varx, offx, vary, offy,
796 c0, cmp, c1, bnds);
797 ++cnt;
800 end:
801 mpz_clear (offx);
802 mpz_clear (offy);
805 /* Update the bounds in BNDS that restrict the value of X to the bounds
806 that restrict the value of X + DELTA. X can be obtained as a
807 difference of two values in TYPE. */
809 static void
810 bounds_add (bounds *bnds, const widest_int &delta, tree type)
812 mpz_t mdelta, max;
814 mpz_init (mdelta);
815 wi::to_mpz (delta, mdelta, SIGNED);
817 mpz_init (max);
818 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
820 mpz_add (bnds->up, bnds->up, mdelta);
821 mpz_add (bnds->below, bnds->below, mdelta);
823 if (mpz_cmp (bnds->up, max) > 0)
824 mpz_set (bnds->up, max);
826 mpz_neg (max, max);
827 if (mpz_cmp (bnds->below, max) < 0)
828 mpz_set (bnds->below, max);
830 mpz_clear (mdelta);
831 mpz_clear (max);
834 /* Update the bounds in BNDS that restrict the value of X to the bounds
835 that restrict the value of -X. */
837 static void
838 bounds_negate (bounds *bnds)
840 mpz_t tmp;
842 mpz_init_set (tmp, bnds->up);
843 mpz_neg (bnds->up, bnds->below);
844 mpz_neg (bnds->below, tmp);
845 mpz_clear (tmp);
848 /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */
850 static tree
851 inverse (tree x, tree mask)
853 tree type = TREE_TYPE (x);
854 tree rslt;
855 unsigned ctr = tree_floor_log2 (mask);
857 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT)
859 unsigned HOST_WIDE_INT ix;
860 unsigned HOST_WIDE_INT imask;
861 unsigned HOST_WIDE_INT irslt = 1;
863 gcc_assert (cst_and_fits_in_hwi (x));
864 gcc_assert (cst_and_fits_in_hwi (mask));
866 ix = int_cst_value (x);
867 imask = int_cst_value (mask);
869 for (; ctr; ctr--)
871 irslt *= ix;
872 ix *= ix;
874 irslt &= imask;
876 rslt = build_int_cst_type (type, irslt);
878 else
880 rslt = build_int_cst (type, 1);
881 for (; ctr; ctr--)
883 rslt = int_const_binop (MULT_EXPR, rslt, x);
884 x = int_const_binop (MULT_EXPR, x, x);
886 rslt = int_const_binop (BIT_AND_EXPR, rslt, mask);
889 return rslt;
892 /* Derives the upper bound BND on the number of executions of loop with exit
893 condition S * i <> C. If NO_OVERFLOW is true, then the control variable of
894 the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed
895 that the loop ends through this exit, i.e., the induction variable ever
896 reaches the value of C.
898 The value C is equal to final - base, where final and base are the final and
899 initial value of the actual induction variable in the analysed loop. BNDS
900 bounds the value of this difference when computed in signed type with
901 unbounded range, while the computation of C is performed in an unsigned
902 type with the range matching the range of the type of the induction variable.
903 In particular, BNDS.up contains an upper bound on C in the following cases:
904 -- if the iv must reach its final value without overflow, i.e., if
905 NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or
906 -- if final >= base, which we know to hold when BNDS.below >= 0. */
908 static void
909 number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s,
910 bounds *bnds, bool exit_must_be_taken)
912 widest_int max;
913 mpz_t d;
914 tree type = TREE_TYPE (c);
915 bool bnds_u_valid = ((no_overflow && exit_must_be_taken)
916 || mpz_sgn (bnds->below) >= 0);
918 if (integer_onep (s)
919 || (TREE_CODE (c) == INTEGER_CST
920 && TREE_CODE (s) == INTEGER_CST
921 && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0)
922 || (TYPE_OVERFLOW_UNDEFINED (type)
923 && multiple_of_p (type, c, s)))
925 /* If C is an exact multiple of S, then its value will be reached before
926 the induction variable overflows (unless the loop is exited in some
927 other way before). Note that the actual induction variable in the
928 loop (which ranges from base to final instead of from 0 to C) may
929 overflow, in which case BNDS.up will not be giving a correct upper
930 bound on C; thus, BNDS_U_VALID had to be computed in advance. */
931 no_overflow = true;
932 exit_must_be_taken = true;
935 /* If the induction variable can overflow, the number of iterations is at
936 most the period of the control variable (or infinite, but in that case
937 the whole # of iterations analysis will fail). */
938 if (!no_overflow)
940 max = wi::mask <widest_int> (TYPE_PRECISION (type) - wi::ctz (s), false);
941 wi::to_mpz (max, bnd, UNSIGNED);
942 return;
945 /* Now we know that the induction variable does not overflow, so the loop
946 iterates at most (range of type / S) times. */
947 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED);
949 /* If the induction variable is guaranteed to reach the value of C before
950 overflow, ... */
951 if (exit_must_be_taken)
953 /* ... then we can strengthen this to C / S, and possibly we can use
954 the upper bound on C given by BNDS. */
955 if (TREE_CODE (c) == INTEGER_CST)
956 wi::to_mpz (c, bnd, UNSIGNED);
957 else if (bnds_u_valid)
958 mpz_set (bnd, bnds->up);
961 mpz_init (d);
962 wi::to_mpz (s, d, UNSIGNED);
963 mpz_fdiv_q (bnd, bnd, d);
964 mpz_clear (d);
967 /* Determines number of iterations of loop whose ending condition
968 is IV <> FINAL. TYPE is the type of the iv. The number of
969 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
970 we know that the exit must be taken eventually, i.e., that the IV
971 ever reaches the value FINAL (we derived this earlier, and possibly set
972 NITER->assumptions to make sure this is the case). BNDS contains the
973 bounds on the difference FINAL - IV->base. */
975 static bool
976 number_of_iterations_ne (tree type, affine_iv *iv, tree final,
977 struct tree_niter_desc *niter, bool exit_must_be_taken,
978 bounds *bnds)
980 tree niter_type = unsigned_type_for (type);
981 tree s, c, d, bits, assumption, tmp, bound;
982 mpz_t max;
984 niter->control = *iv;
985 niter->bound = final;
986 niter->cmp = NE_EXPR;
988 /* Rearrange the terms so that we get inequality S * i <> C, with S
989 positive. Also cast everything to the unsigned type. If IV does
990 not overflow, BNDS bounds the value of C. Also, this is the
991 case if the computation |FINAL - IV->base| does not overflow, i.e.,
992 if BNDS->below in the result is nonnegative. */
993 if (tree_int_cst_sign_bit (iv->step))
995 s = fold_convert (niter_type,
996 fold_build1 (NEGATE_EXPR, type, iv->step));
997 c = fold_build2 (MINUS_EXPR, niter_type,
998 fold_convert (niter_type, iv->base),
999 fold_convert (niter_type, final));
1000 bounds_negate (bnds);
1002 else
1004 s = fold_convert (niter_type, iv->step);
1005 c = fold_build2 (MINUS_EXPR, niter_type,
1006 fold_convert (niter_type, final),
1007 fold_convert (niter_type, iv->base));
1010 mpz_init (max);
1011 number_of_iterations_ne_max (max, iv->no_overflow, c, s, bnds,
1012 exit_must_be_taken);
1013 niter->max = widest_int::from (wi::from_mpz (niter_type, max, false),
1014 TYPE_SIGN (niter_type));
1015 mpz_clear (max);
1017 /* First the trivial cases -- when the step is 1. */
1018 if (integer_onep (s))
1020 niter->niter = c;
1021 return true;
1024 /* Let nsd (step, size of mode) = d. If d does not divide c, the loop
1025 is infinite. Otherwise, the number of iterations is
1026 (inverse(s/d) * (c/d)) mod (size of mode/d). */
1027 bits = num_ending_zeros (s);
1028 bound = build_low_bits_mask (niter_type,
1029 (TYPE_PRECISION (niter_type)
1030 - tree_to_uhwi (bits)));
1032 d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
1033 build_int_cst (niter_type, 1), bits);
1034 s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits);
1036 if (!exit_must_be_taken)
1038 /* If we cannot assume that the exit is taken eventually, record the
1039 assumptions for divisibility of c. */
1040 assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d);
1041 assumption = fold_build2 (EQ_EXPR, boolean_type_node,
1042 assumption, build_int_cst (niter_type, 0));
1043 if (!integer_nonzerop (assumption))
1044 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1045 niter->assumptions, assumption);
1048 c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d);
1049 tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound));
1050 niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
1051 return true;
1054 /* Checks whether we can determine the final value of the control variable
1055 of the loop with ending condition IV0 < IV1 (computed in TYPE).
1056 DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
1057 of the step. The assumptions necessary to ensure that the computation
1058 of the final value does not overflow are recorded in NITER. If we
1059 find the final value, we adjust DELTA and return TRUE. Otherwise
1060 we return false. BNDS bounds the value of IV1->base - IV0->base,
1061 and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is
1062 true if we know that the exit must be taken eventually. */
1064 static bool
1065 number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
1066 struct tree_niter_desc *niter,
1067 tree *delta, tree step,
1068 bool exit_must_be_taken, bounds *bnds)
1070 tree niter_type = TREE_TYPE (step);
1071 tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
1072 tree tmod;
1073 mpz_t mmod;
1074 tree assumption = boolean_true_node, bound, noloop;
1075 bool ret = false, fv_comp_no_overflow;
1076 tree type1 = type;
1077 if (POINTER_TYPE_P (type))
1078 type1 = sizetype;
1080 if (TREE_CODE (mod) != INTEGER_CST)
1081 return false;
1082 if (integer_nonzerop (mod))
1083 mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
1084 tmod = fold_convert (type1, mod);
1086 mpz_init (mmod);
1087 wi::to_mpz (mod, mmod, UNSIGNED);
1088 mpz_neg (mmod, mmod);
1090 /* If the induction variable does not overflow and the exit is taken,
1091 then the computation of the final value does not overflow. This is
1092 also obviously the case if the new final value is equal to the
1093 current one. Finally, we postulate this for pointer type variables,
1094 as the code cannot rely on the object to that the pointer points being
1095 placed at the end of the address space (and more pragmatically,
1096 TYPE_{MIN,MAX}_VALUE is not defined for pointers). */
1097 if (integer_zerop (mod) || POINTER_TYPE_P (type))
1098 fv_comp_no_overflow = true;
1099 else if (!exit_must_be_taken)
1100 fv_comp_no_overflow = false;
1101 else
1102 fv_comp_no_overflow =
1103 (iv0->no_overflow && integer_nonzerop (iv0->step))
1104 || (iv1->no_overflow && integer_nonzerop (iv1->step));
1106 if (integer_nonzerop (iv0->step))
1108 /* The final value of the iv is iv1->base + MOD, assuming that this
1109 computation does not overflow, and that
1110 iv0->base <= iv1->base + MOD. */
1111 if (!fv_comp_no_overflow)
1113 bound = fold_build2 (MINUS_EXPR, type1,
1114 TYPE_MAX_VALUE (type1), tmod);
1115 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1116 iv1->base, bound);
1117 if (integer_zerop (assumption))
1118 goto end;
1120 if (mpz_cmp (mmod, bnds->below) < 0)
1121 noloop = boolean_false_node;
1122 else if (POINTER_TYPE_P (type))
1123 noloop = fold_build2 (GT_EXPR, boolean_type_node,
1124 iv0->base,
1125 fold_build_pointer_plus (iv1->base, tmod));
1126 else
1127 noloop = fold_build2 (GT_EXPR, boolean_type_node,
1128 iv0->base,
1129 fold_build2 (PLUS_EXPR, type1,
1130 iv1->base, tmod));
1132 else
1134 /* The final value of the iv is iv0->base - MOD, assuming that this
1135 computation does not overflow, and that
1136 iv0->base - MOD <= iv1->base. */
1137 if (!fv_comp_no_overflow)
1139 bound = fold_build2 (PLUS_EXPR, type1,
1140 TYPE_MIN_VALUE (type1), tmod);
1141 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1142 iv0->base, bound);
1143 if (integer_zerop (assumption))
1144 goto end;
1146 if (mpz_cmp (mmod, bnds->below) < 0)
1147 noloop = boolean_false_node;
1148 else if (POINTER_TYPE_P (type))
1149 noloop = fold_build2 (GT_EXPR, boolean_type_node,
1150 fold_build_pointer_plus (iv0->base,
1151 fold_build1 (NEGATE_EXPR,
1152 type1, tmod)),
1153 iv1->base);
1154 else
1155 noloop = fold_build2 (GT_EXPR, boolean_type_node,
1156 fold_build2 (MINUS_EXPR, type1,
1157 iv0->base, tmod),
1158 iv1->base);
1161 if (!integer_nonzerop (assumption))
1162 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1163 niter->assumptions,
1164 assumption);
1165 if (!integer_zerop (noloop))
1166 niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1167 niter->may_be_zero,
1168 noloop);
1169 bounds_add (bnds, wi::to_widest (mod), type);
1170 *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
1172 ret = true;
1173 end:
1174 mpz_clear (mmod);
1175 return ret;
1178 /* Add assertions to NITER that ensure that the control variable of the loop
1179 with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1
1180 are TYPE. Returns false if we can prove that there is an overflow, true
1181 otherwise. STEP is the absolute value of the step. */
1183 static bool
1184 assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1185 struct tree_niter_desc *niter, tree step)
1187 tree bound, d, assumption, diff;
1188 tree niter_type = TREE_TYPE (step);
1190 if (integer_nonzerop (iv0->step))
1192 /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
1193 if (iv0->no_overflow)
1194 return true;
1196 /* If iv0->base is a constant, we can determine the last value before
1197 overflow precisely; otherwise we conservatively assume
1198 MAX - STEP + 1. */
1200 if (TREE_CODE (iv0->base) == INTEGER_CST)
1202 d = fold_build2 (MINUS_EXPR, niter_type,
1203 fold_convert (niter_type, TYPE_MAX_VALUE (type)),
1204 fold_convert (niter_type, iv0->base));
1205 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1207 else
1208 diff = fold_build2 (MINUS_EXPR, niter_type, step,
1209 build_int_cst (niter_type, 1));
1210 bound = fold_build2 (MINUS_EXPR, type,
1211 TYPE_MAX_VALUE (type), fold_convert (type, diff));
1212 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1213 iv1->base, bound);
1215 else
1217 /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
1218 if (iv1->no_overflow)
1219 return true;
1221 if (TREE_CODE (iv1->base) == INTEGER_CST)
1223 d = fold_build2 (MINUS_EXPR, niter_type,
1224 fold_convert (niter_type, iv1->base),
1225 fold_convert (niter_type, TYPE_MIN_VALUE (type)));
1226 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1228 else
1229 diff = fold_build2 (MINUS_EXPR, niter_type, step,
1230 build_int_cst (niter_type, 1));
1231 bound = fold_build2 (PLUS_EXPR, type,
1232 TYPE_MIN_VALUE (type), fold_convert (type, diff));
1233 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1234 iv0->base, bound);
1237 if (integer_zerop (assumption))
1238 return false;
1239 if (!integer_nonzerop (assumption))
1240 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1241 niter->assumptions, assumption);
1243 iv0->no_overflow = true;
1244 iv1->no_overflow = true;
1245 return true;
1248 /* Add an assumption to NITER that a loop whose ending condition
1249 is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS
1250 bounds the value of IV1->base - IV0->base. */
1252 static void
1253 assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1254 struct tree_niter_desc *niter, bounds *bnds)
1256 tree assumption = boolean_true_node, bound, diff;
1257 tree mbz, mbzl, mbzr, type1;
1258 bool rolls_p, no_overflow_p;
1259 widest_int dstep;
1260 mpz_t mstep, max;
1262 /* We are going to compute the number of iterations as
1263 (iv1->base - iv0->base + step - 1) / step, computed in the unsigned
1264 variant of TYPE. This formula only works if
1266 -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1
1268 (where MAX is the maximum value of the unsigned variant of TYPE, and
1269 the computations in this formula are performed in full precision,
1270 i.e., without overflows).
1272 Usually, for loops with exit condition iv0->base + step * i < iv1->base,
1273 we have a condition of the form iv0->base - step < iv1->base before the loop,
1274 and for loops iv0->base < iv1->base - step * i the condition
1275 iv0->base < iv1->base + step, due to loop header copying, which enable us
1276 to prove the lower bound.
1278 The upper bound is more complicated. Unless the expressions for initial
1279 and final value themselves contain enough information, we usually cannot
1280 derive it from the context. */
1282 /* First check whether the answer does not follow from the bounds we gathered
1283 before. */
1284 if (integer_nonzerop (iv0->step))
1285 dstep = wi::to_widest (iv0->step);
1286 else
1288 dstep = wi::sext (wi::to_widest (iv1->step), TYPE_PRECISION (type));
1289 dstep = -dstep;
1292 mpz_init (mstep);
1293 wi::to_mpz (dstep, mstep, UNSIGNED);
1294 mpz_neg (mstep, mstep);
1295 mpz_add_ui (mstep, mstep, 1);
1297 rolls_p = mpz_cmp (mstep, bnds->below) <= 0;
1299 mpz_init (max);
1300 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
1301 mpz_add (max, max, mstep);
1302 no_overflow_p = (mpz_cmp (bnds->up, max) <= 0
1303 /* For pointers, only values lying inside a single object
1304 can be compared or manipulated by pointer arithmetics.
1305 Gcc in general does not allow or handle objects larger
1306 than half of the address space, hence the upper bound
1307 is satisfied for pointers. */
1308 || POINTER_TYPE_P (type));
1309 mpz_clear (mstep);
1310 mpz_clear (max);
1312 if (rolls_p && no_overflow_p)
1313 return;
1315 type1 = type;
1316 if (POINTER_TYPE_P (type))
1317 type1 = sizetype;
1319 /* Now the hard part; we must formulate the assumption(s) as expressions, and
1320 we must be careful not to introduce overflow. */
1322 if (integer_nonzerop (iv0->step))
1324 diff = fold_build2 (MINUS_EXPR, type1,
1325 iv0->step, build_int_cst (type1, 1));
1327 /* We need to know that iv0->base >= MIN + iv0->step - 1. Since
1328 0 address never belongs to any object, we can assume this for
1329 pointers. */
1330 if (!POINTER_TYPE_P (type))
1332 bound = fold_build2 (PLUS_EXPR, type1,
1333 TYPE_MIN_VALUE (type), diff);
1334 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1335 iv0->base, bound);
1338 /* And then we can compute iv0->base - diff, and compare it with
1339 iv1->base. */
1340 mbzl = fold_build2 (MINUS_EXPR, type1,
1341 fold_convert (type1, iv0->base), diff);
1342 mbzr = fold_convert (type1, iv1->base);
1344 else
1346 diff = fold_build2 (PLUS_EXPR, type1,
1347 iv1->step, build_int_cst (type1, 1));
1349 if (!POINTER_TYPE_P (type))
1351 bound = fold_build2 (PLUS_EXPR, type1,
1352 TYPE_MAX_VALUE (type), diff);
1353 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1354 iv1->base, bound);
1357 mbzl = fold_convert (type1, iv0->base);
1358 mbzr = fold_build2 (MINUS_EXPR, type1,
1359 fold_convert (type1, iv1->base), diff);
1362 if (!integer_nonzerop (assumption))
1363 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1364 niter->assumptions, assumption);
1365 if (!rolls_p)
1367 mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr);
1368 niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1369 niter->may_be_zero, mbz);
1373 /* Determines number of iterations of loop whose ending condition
1374 is IV0 < IV1. TYPE is the type of the iv. The number of
1375 iterations is stored to NITER. BNDS bounds the difference
1376 IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know
1377 that the exit must be taken eventually. */
1379 static bool
1380 number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1381 struct tree_niter_desc *niter,
1382 bool exit_must_be_taken, bounds *bnds)
1384 tree niter_type = unsigned_type_for (type);
1385 tree delta, step, s;
1386 mpz_t mstep, tmp;
1388 if (integer_nonzerop (iv0->step))
1390 niter->control = *iv0;
1391 niter->cmp = LT_EXPR;
1392 niter->bound = iv1->base;
1394 else
1396 niter->control = *iv1;
1397 niter->cmp = GT_EXPR;
1398 niter->bound = iv0->base;
1401 delta = fold_build2 (MINUS_EXPR, niter_type,
1402 fold_convert (niter_type, iv1->base),
1403 fold_convert (niter_type, iv0->base));
1405 /* First handle the special case that the step is +-1. */
1406 if ((integer_onep (iv0->step) && integer_zerop (iv1->step))
1407 || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step)))
1409 /* for (i = iv0->base; i < iv1->base; i++)
1413 for (i = iv1->base; i > iv0->base; i--).
1415 In both cases # of iterations is iv1->base - iv0->base, assuming that
1416 iv1->base >= iv0->base.
1418 First try to derive a lower bound on the value of
1419 iv1->base - iv0->base, computed in full precision. If the difference
1420 is nonnegative, we are done, otherwise we must record the
1421 condition. */
1423 if (mpz_sgn (bnds->below) < 0)
1424 niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node,
1425 iv1->base, iv0->base);
1426 niter->niter = delta;
1427 niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
1428 TYPE_SIGN (niter_type));
1429 niter->control.no_overflow = true;
1430 return true;
1433 if (integer_nonzerop (iv0->step))
1434 step = fold_convert (niter_type, iv0->step);
1435 else
1436 step = fold_convert (niter_type,
1437 fold_build1 (NEGATE_EXPR, type, iv1->step));
1439 /* If we can determine the final value of the control iv exactly, we can
1440 transform the condition to != comparison. In particular, this will be
1441 the case if DELTA is constant. */
1442 if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, &delta, step,
1443 exit_must_be_taken, bnds))
1445 affine_iv zps;
1447 zps.base = build_int_cst (niter_type, 0);
1448 zps.step = step;
1449 /* number_of_iterations_lt_to_ne will add assumptions that ensure that
1450 zps does not overflow. */
1451 zps.no_overflow = true;
1453 return number_of_iterations_ne (type, &zps, delta, niter, true, bnds);
1456 /* Make sure that the control iv does not overflow. */
1457 if (!assert_no_overflow_lt (type, iv0, iv1, niter, step))
1458 return false;
1460 /* We determine the number of iterations as (delta + step - 1) / step. For
1461 this to work, we must know that iv1->base >= iv0->base - step + 1,
1462 otherwise the loop does not roll. */
1463 assert_loop_rolls_lt (type, iv0, iv1, niter, bnds);
1465 s = fold_build2 (MINUS_EXPR, niter_type,
1466 step, build_int_cst (niter_type, 1));
1467 delta = fold_build2 (PLUS_EXPR, niter_type, delta, s);
1468 niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step);
1470 mpz_init (mstep);
1471 mpz_init (tmp);
1472 wi::to_mpz (step, mstep, UNSIGNED);
1473 mpz_add (tmp, bnds->up, mstep);
1474 mpz_sub_ui (tmp, tmp, 1);
1475 mpz_fdiv_q (tmp, tmp, mstep);
1476 niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
1477 TYPE_SIGN (niter_type));
1478 mpz_clear (mstep);
1479 mpz_clear (tmp);
1481 return true;
1484 /* Determines number of iterations of loop whose ending condition
1485 is IV0 <= IV1. TYPE is the type of the iv. The number of
1486 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
1487 we know that this condition must eventually become false (we derived this
1488 earlier, and possibly set NITER->assumptions to make sure this
1489 is the case). BNDS bounds the difference IV1->base - IV0->base. */
1491 static bool
1492 number_of_iterations_le (tree type, affine_iv *iv0, affine_iv *iv1,
1493 struct tree_niter_desc *niter, bool exit_must_be_taken,
1494 bounds *bnds)
1496 tree assumption;
1497 tree type1 = type;
1498 if (POINTER_TYPE_P (type))
1499 type1 = sizetype;
1501 /* Say that IV0 is the control variable. Then IV0 <= IV1 iff
1502 IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
1503 value of the type. This we must know anyway, since if it is
1504 equal to this value, the loop rolls forever. We do not check
1505 this condition for pointer type ivs, as the code cannot rely on
1506 the object to that the pointer points being placed at the end of
1507 the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is
1508 not defined for pointers). */
1510 if (!exit_must_be_taken && !POINTER_TYPE_P (type))
1512 if (integer_nonzerop (iv0->step))
1513 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1514 iv1->base, TYPE_MAX_VALUE (type));
1515 else
1516 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1517 iv0->base, TYPE_MIN_VALUE (type));
1519 if (integer_zerop (assumption))
1520 return false;
1521 if (!integer_nonzerop (assumption))
1522 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1523 niter->assumptions, assumption);
1526 if (integer_nonzerop (iv0->step))
1528 if (POINTER_TYPE_P (type))
1529 iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1);
1530 else
1531 iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base,
1532 build_int_cst (type1, 1));
1534 else if (POINTER_TYPE_P (type))
1535 iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1);
1536 else
1537 iv0->base = fold_build2 (MINUS_EXPR, type1,
1538 iv0->base, build_int_cst (type1, 1));
1540 bounds_add (bnds, 1, type1);
1542 return number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken,
1543 bnds);
1546 /* Dumps description of affine induction variable IV to FILE. */
1548 static void
1549 dump_affine_iv (FILE *file, affine_iv *iv)
1551 if (!integer_zerop (iv->step))
1552 fprintf (file, "[");
1554 print_generic_expr (dump_file, iv->base, TDF_SLIM);
1556 if (!integer_zerop (iv->step))
1558 fprintf (file, ", + , ");
1559 print_generic_expr (dump_file, iv->step, TDF_SLIM);
1560 fprintf (file, "]%s", iv->no_overflow ? "(no_overflow)" : "");
1564 /* Determine the number of iterations according to condition (for staying
1565 inside loop) which compares two induction variables using comparison
1566 operator CODE. The induction variable on left side of the comparison
1567 is IV0, the right-hand side is IV1. Both induction variables must have
1568 type TYPE, which must be an integer or pointer type. The steps of the
1569 ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
1571 LOOP is the loop whose number of iterations we are determining.
1573 ONLY_EXIT is true if we are sure this is the only way the loop could be
1574 exited (including possibly non-returning function calls, exceptions, etc.)
1575 -- in this case we can use the information whether the control induction
1576 variables can overflow or not in a more efficient way.
1578 if EVERY_ITERATION is true, we know the test is executed on every iteration.
1580 The results (number of iterations and assumptions as described in
1581 comments at struct tree_niter_desc in tree-ssa-loop.h) are stored to NITER.
1582 Returns false if it fails to determine number of iterations, true if it
1583 was determined (possibly with some assumptions). */
1585 static bool
1586 number_of_iterations_cond (struct loop *loop,
1587 tree type, affine_iv *iv0, enum tree_code code,
1588 affine_iv *iv1, struct tree_niter_desc *niter,
1589 bool only_exit, bool every_iteration)
1591 bool exit_must_be_taken = false, ret;
1592 bounds bnds;
1594 /* If the test is not executed every iteration, wrapping may make the test
1595 to pass again.
1596 TODO: the overflow case can be still used as unreliable estimate of upper
1597 bound. But we have no API to pass it down to number of iterations code
1598 and, at present, it will not use it anyway. */
1599 if (!every_iteration
1600 && (!iv0->no_overflow || !iv1->no_overflow
1601 || code == NE_EXPR || code == EQ_EXPR))
1602 return false;
1604 /* The meaning of these assumptions is this:
1605 if !assumptions
1606 then the rest of information does not have to be valid
1607 if may_be_zero then the loop does not roll, even if
1608 niter != 0. */
1609 niter->assumptions = boolean_true_node;
1610 niter->may_be_zero = boolean_false_node;
1611 niter->niter = NULL_TREE;
1612 niter->max = 0;
1613 niter->bound = NULL_TREE;
1614 niter->cmp = ERROR_MARK;
1616 /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
1617 the control variable is on lhs. */
1618 if (code == GE_EXPR || code == GT_EXPR
1619 || (code == NE_EXPR && integer_zerop (iv0->step)))
1621 std::swap (iv0, iv1);
1622 code = swap_tree_comparison (code);
1625 if (POINTER_TYPE_P (type))
1627 /* Comparison of pointers is undefined unless both iv0 and iv1 point
1628 to the same object. If they do, the control variable cannot wrap
1629 (as wrap around the bounds of memory will never return a pointer
1630 that would be guaranteed to point to the same object, even if we
1631 avoid undefined behavior by casting to size_t and back). */
1632 iv0->no_overflow = true;
1633 iv1->no_overflow = true;
1636 /* If the control induction variable does not overflow and the only exit
1637 from the loop is the one that we analyze, we know it must be taken
1638 eventually. */
1639 if (only_exit)
1641 if (!integer_zerop (iv0->step) && iv0->no_overflow)
1642 exit_must_be_taken = true;
1643 else if (!integer_zerop (iv1->step) && iv1->no_overflow)
1644 exit_must_be_taken = true;
1647 /* We can handle the case when neither of the sides of the comparison is
1648 invariant, provided that the test is NE_EXPR. This rarely occurs in
1649 practice, but it is simple enough to manage. */
1650 if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1652 tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
1653 if (code != NE_EXPR)
1654 return false;
1656 iv0->step = fold_binary_to_constant (MINUS_EXPR, step_type,
1657 iv0->step, iv1->step);
1658 iv0->no_overflow = false;
1659 iv1->step = build_int_cst (step_type, 0);
1660 iv1->no_overflow = true;
1663 /* If the result of the comparison is a constant, the loop is weird. More
1664 precise handling would be possible, but the situation is not common enough
1665 to waste time on it. */
1666 if (integer_zerop (iv0->step) && integer_zerop (iv1->step))
1667 return false;
1669 /* Ignore loops of while (i-- < 10) type. */
1670 if (code != NE_EXPR)
1672 if (iv0->step && tree_int_cst_sign_bit (iv0->step))
1673 return false;
1675 if (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))
1676 return false;
1679 /* If the loop exits immediately, there is nothing to do. */
1680 tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base);
1681 if (tem && integer_zerop (tem))
1683 niter->niter = build_int_cst (unsigned_type_for (type), 0);
1684 niter->max = 0;
1685 return true;
1688 /* OK, now we know we have a senseful loop. Handle several cases, depending
1689 on what comparison operator is used. */
1690 bound_difference (loop, iv1->base, iv0->base, &bnds);
1692 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file,
1695 "Analyzing # of iterations of loop %d\n", loop->num);
1697 fprintf (dump_file, " exit condition ");
1698 dump_affine_iv (dump_file, iv0);
1699 fprintf (dump_file, " %s ",
1700 code == NE_EXPR ? "!="
1701 : code == LT_EXPR ? "<"
1702 : "<=");
1703 dump_affine_iv (dump_file, iv1);
1704 fprintf (dump_file, "\n");
1706 fprintf (dump_file, " bounds on difference of bases: ");
1707 mpz_out_str (dump_file, 10, bnds.below);
1708 fprintf (dump_file, " ... ");
1709 mpz_out_str (dump_file, 10, bnds.up);
1710 fprintf (dump_file, "\n");
1713 switch (code)
1715 case NE_EXPR:
1716 gcc_assert (integer_zerop (iv1->step));
1717 ret = number_of_iterations_ne (type, iv0, iv1->base, niter,
1718 exit_must_be_taken, &bnds);
1719 break;
1721 case LT_EXPR:
1722 ret = number_of_iterations_lt (type, iv0, iv1, niter, exit_must_be_taken,
1723 &bnds);
1724 break;
1726 case LE_EXPR:
1727 ret = number_of_iterations_le (type, iv0, iv1, niter, exit_must_be_taken,
1728 &bnds);
1729 break;
1731 default:
1732 gcc_unreachable ();
1735 mpz_clear (bnds.up);
1736 mpz_clear (bnds.below);
1738 if (dump_file && (dump_flags & TDF_DETAILS))
1740 if (ret)
1742 fprintf (dump_file, " result:\n");
1743 if (!integer_nonzerop (niter->assumptions))
1745 fprintf (dump_file, " under assumptions ");
1746 print_generic_expr (dump_file, niter->assumptions, TDF_SLIM);
1747 fprintf (dump_file, "\n");
1750 if (!integer_zerop (niter->may_be_zero))
1752 fprintf (dump_file, " zero if ");
1753 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1754 fprintf (dump_file, "\n");
1757 fprintf (dump_file, " # of iterations ");
1758 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1759 fprintf (dump_file, ", bounded by ");
1760 print_decu (niter->max, dump_file);
1761 fprintf (dump_file, "\n");
1763 else
1764 fprintf (dump_file, " failed\n\n");
1766 return ret;
1769 /* Substitute NEW for OLD in EXPR and fold the result. */
1771 static tree
1772 simplify_replace_tree (tree expr, tree old, tree new_tree)
1774 unsigned i, n;
1775 tree ret = NULL_TREE, e, se;
1777 if (!expr)
1778 return NULL_TREE;
1780 /* Do not bother to replace constants. */
1781 if (CONSTANT_CLASS_P (old))
1782 return expr;
1784 if (expr == old
1785 || operand_equal_p (expr, old, 0))
1786 return unshare_expr (new_tree);
1788 if (!EXPR_P (expr))
1789 return expr;
1791 n = TREE_OPERAND_LENGTH (expr);
1792 for (i = 0; i < n; i++)
1794 e = TREE_OPERAND (expr, i);
1795 se = simplify_replace_tree (e, old, new_tree);
1796 if (e == se)
1797 continue;
1799 if (!ret)
1800 ret = copy_node (expr);
1802 TREE_OPERAND (ret, i) = se;
1805 return (ret ? fold (ret) : expr);
1808 /* Expand definitions of ssa names in EXPR as long as they are simple
1809 enough, and return the new expression. If STOP is specified, stop
1810 expanding if EXPR equals to it. */
1812 tree
1813 expand_simple_operations (tree expr, tree stop)
1815 unsigned i, n;
1816 tree ret = NULL_TREE, e, ee, e1;
1817 enum tree_code code;
1818 gimple *stmt;
1820 if (expr == NULL_TREE)
1821 return expr;
1823 if (is_gimple_min_invariant (expr))
1824 return expr;
1826 code = TREE_CODE (expr);
1827 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1829 n = TREE_OPERAND_LENGTH (expr);
1830 for (i = 0; i < n; i++)
1832 e = TREE_OPERAND (expr, i);
1833 ee = expand_simple_operations (e, stop);
1834 if (e == ee)
1835 continue;
1837 if (!ret)
1838 ret = copy_node (expr);
1840 TREE_OPERAND (ret, i) = ee;
1843 if (!ret)
1844 return expr;
1846 fold_defer_overflow_warnings ();
1847 ret = fold (ret);
1848 fold_undefer_and_ignore_overflow_warnings ();
1849 return ret;
1852 /* Stop if it's not ssa name or the one we don't want to expand. */
1853 if (TREE_CODE (expr) != SSA_NAME || expr == stop)
1854 return expr;
1856 stmt = SSA_NAME_DEF_STMT (expr);
1857 if (gimple_code (stmt) == GIMPLE_PHI)
1859 basic_block src, dest;
1861 if (gimple_phi_num_args (stmt) != 1)
1862 return expr;
1863 e = PHI_ARG_DEF (stmt, 0);
1865 /* Avoid propagating through loop exit phi nodes, which
1866 could break loop-closed SSA form restrictions. */
1867 dest = gimple_bb (stmt);
1868 src = single_pred (dest);
1869 if (TREE_CODE (e) == SSA_NAME
1870 && src->loop_father != dest->loop_father)
1871 return expr;
1873 return expand_simple_operations (e, stop);
1875 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1876 return expr;
1878 /* Avoid expanding to expressions that contain SSA names that need
1879 to take part in abnormal coalescing. */
1880 ssa_op_iter iter;
1881 FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE)
1882 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e))
1883 return expr;
1885 e = gimple_assign_rhs1 (stmt);
1886 code = gimple_assign_rhs_code (stmt);
1887 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1889 if (is_gimple_min_invariant (e))
1890 return e;
1892 if (code == SSA_NAME)
1893 return expand_simple_operations (e, stop);
1895 return expr;
1898 switch (code)
1900 CASE_CONVERT:
1901 /* Casts are simple. */
1902 ee = expand_simple_operations (e, stop);
1903 return fold_build1 (code, TREE_TYPE (expr), ee);
1905 case PLUS_EXPR:
1906 case MINUS_EXPR:
1907 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr))
1908 && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr)))
1909 return expr;
1910 /* Fallthru. */
1911 case POINTER_PLUS_EXPR:
1912 /* And increments and decrements by a constant are simple. */
1913 e1 = gimple_assign_rhs2 (stmt);
1914 if (!is_gimple_min_invariant (e1))
1915 return expr;
1917 ee = expand_simple_operations (e, stop);
1918 return fold_build2 (code, TREE_TYPE (expr), ee, e1);
1920 default:
1921 return expr;
1925 /* Tries to simplify EXPR using the condition COND. Returns the simplified
1926 expression (or EXPR unchanged, if no simplification was possible). */
1928 static tree
1929 tree_simplify_using_condition_1 (tree cond, tree expr, tree stop)
1931 bool changed;
1932 tree e, te, e0, e1, e2, notcond;
1933 enum tree_code code = TREE_CODE (expr);
1935 if (code == INTEGER_CST)
1936 return expr;
1938 if (code == TRUTH_OR_EXPR
1939 || code == TRUTH_AND_EXPR
1940 || code == COND_EXPR)
1942 changed = false;
1944 e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0), stop);
1945 if (TREE_OPERAND (expr, 0) != e0)
1946 changed = true;
1948 e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1), stop);
1949 if (TREE_OPERAND (expr, 1) != e1)
1950 changed = true;
1952 if (code == COND_EXPR)
1954 e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2), stop);
1955 if (TREE_OPERAND (expr, 2) != e2)
1956 changed = true;
1958 else
1959 e2 = NULL_TREE;
1961 if (changed)
1963 if (code == COND_EXPR)
1964 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
1965 else
1966 expr = fold_build2 (code, boolean_type_node, e0, e1);
1969 return expr;
1972 /* In case COND is equality, we may be able to simplify EXPR by copy/constant
1973 propagation, and vice versa. Fold does not handle this, since it is
1974 considered too expensive. */
1975 if (TREE_CODE (cond) == EQ_EXPR)
1977 e0 = TREE_OPERAND (cond, 0);
1978 e1 = TREE_OPERAND (cond, 1);
1980 /* We know that e0 == e1. Check whether we cannot simplify expr
1981 using this fact. */
1982 e = simplify_replace_tree (expr, e0, e1);
1983 if (integer_zerop (e) || integer_nonzerop (e))
1984 return e;
1986 e = simplify_replace_tree (expr, e1, e0);
1987 if (integer_zerop (e) || integer_nonzerop (e))
1988 return e;
1990 if (TREE_CODE (expr) == EQ_EXPR)
1992 e0 = TREE_OPERAND (expr, 0);
1993 e1 = TREE_OPERAND (expr, 1);
1995 /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */
1996 e = simplify_replace_tree (cond, e0, e1);
1997 if (integer_zerop (e))
1998 return e;
1999 e = simplify_replace_tree (cond, e1, e0);
2000 if (integer_zerop (e))
2001 return e;
2003 if (TREE_CODE (expr) == NE_EXPR)
2005 e0 = TREE_OPERAND (expr, 0);
2006 e1 = TREE_OPERAND (expr, 1);
2008 /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */
2009 e = simplify_replace_tree (cond, e0, e1);
2010 if (integer_zerop (e))
2011 return boolean_true_node;
2012 e = simplify_replace_tree (cond, e1, e0);
2013 if (integer_zerop (e))
2014 return boolean_true_node;
2017 te = expand_simple_operations (expr, stop);
2019 /* Check whether COND ==> EXPR. */
2020 notcond = invert_truthvalue (cond);
2021 e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, te);
2022 if (e && integer_nonzerop (e))
2023 return e;
2025 /* Check whether COND ==> not EXPR. */
2026 e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, te);
2027 if (e && integer_zerop (e))
2028 return e;
2030 return expr;
2033 /* Tries to simplify EXPR using the condition COND. Returns the simplified
2034 expression (or EXPR unchanged, if no simplification was possible).
2035 Wrapper around tree_simplify_using_condition_1 that ensures that chains
2036 of simple operations in definitions of ssa names in COND are expanded,
2037 so that things like casts or incrementing the value of the bound before
2038 the loop do not cause us to fail. */
2040 static tree
2041 tree_simplify_using_condition (tree cond, tree expr, tree stop)
2043 cond = expand_simple_operations (cond, stop);
2045 return tree_simplify_using_condition_1 (cond, expr, stop);
2048 /* Tries to simplify EXPR using the conditions on entry to LOOP.
2049 Returns the simplified expression (or EXPR unchanged, if no
2050 simplification was possible). */
2052 tree
2053 simplify_using_initial_conditions (struct loop *loop, tree expr, tree stop)
2055 edge e;
2056 basic_block bb;
2057 gimple *stmt;
2058 tree cond;
2059 int cnt = 0;
2061 if (TREE_CODE (expr) == INTEGER_CST)
2062 return expr;
2064 /* Limit walking the dominators to avoid quadraticness in
2065 the number of BBs times the number of loops in degenerate
2066 cases. */
2067 for (bb = loop->header;
2068 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
2069 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
2071 if (!single_pred_p (bb))
2072 continue;
2073 e = single_pred_edge (bb);
2075 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2076 continue;
2078 stmt = last_stmt (e->src);
2079 cond = fold_build2 (gimple_cond_code (stmt),
2080 boolean_type_node,
2081 gimple_cond_lhs (stmt),
2082 gimple_cond_rhs (stmt));
2083 if (e->flags & EDGE_FALSE_VALUE)
2084 cond = invert_truthvalue (cond);
2085 expr = tree_simplify_using_condition (cond, expr, stop);
2086 /* Break if EXPR is simplified to const values. */
2087 if (expr && (integer_zerop (expr) || integer_nonzerop (expr)))
2088 break;
2090 ++cnt;
2093 return expr;
2096 /* Tries to simplify EXPR using the evolutions of the loop invariants
2097 in the superloops of LOOP. Returns the simplified expression
2098 (or EXPR unchanged, if no simplification was possible). */
2100 static tree
2101 simplify_using_outer_evolutions (struct loop *loop, tree expr)
2103 enum tree_code code = TREE_CODE (expr);
2104 bool changed;
2105 tree e, e0, e1, e2;
2107 if (is_gimple_min_invariant (expr))
2108 return expr;
2110 if (code == TRUTH_OR_EXPR
2111 || code == TRUTH_AND_EXPR
2112 || code == COND_EXPR)
2114 changed = false;
2116 e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0));
2117 if (TREE_OPERAND (expr, 0) != e0)
2118 changed = true;
2120 e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1));
2121 if (TREE_OPERAND (expr, 1) != e1)
2122 changed = true;
2124 if (code == COND_EXPR)
2126 e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2));
2127 if (TREE_OPERAND (expr, 2) != e2)
2128 changed = true;
2130 else
2131 e2 = NULL_TREE;
2133 if (changed)
2135 if (code == COND_EXPR)
2136 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
2137 else
2138 expr = fold_build2 (code, boolean_type_node, e0, e1);
2141 return expr;
2144 e = instantiate_parameters (loop, expr);
2145 if (is_gimple_min_invariant (e))
2146 return e;
2148 return expr;
2151 /* Returns true if EXIT is the only possible exit from LOOP. */
2153 bool
2154 loop_only_exit_p (const struct loop *loop, const_edge exit)
2156 basic_block *body;
2157 gimple_stmt_iterator bsi;
2158 unsigned i;
2159 gimple *call;
2161 if (exit != single_exit (loop))
2162 return false;
2164 body = get_loop_body (loop);
2165 for (i = 0; i < loop->num_nodes; i++)
2167 for (bsi = gsi_start_bb (body[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2169 call = gsi_stmt (bsi);
2170 if (gimple_code (call) != GIMPLE_CALL)
2171 continue;
2173 if (gimple_has_side_effects (call))
2175 free (body);
2176 return false;
2181 free (body);
2182 return true;
2185 /* Stores description of number of iterations of LOOP derived from
2186 EXIT (an exit edge of the LOOP) in NITER. Returns true if some
2187 useful information could be derived (and fields of NITER has
2188 meaning described in comments at struct tree_niter_desc
2189 declaration), false otherwise. If WARN is true and
2190 -Wunsafe-loop-optimizations was given, warn if the optimizer is going to use
2191 potentially unsafe assumptions.
2192 When EVERY_ITERATION is true, only tests that are known to be executed
2193 every iteration are considered (i.e. only test that alone bounds the loop).
2196 bool
2197 number_of_iterations_exit (struct loop *loop, edge exit,
2198 struct tree_niter_desc *niter,
2199 bool warn, bool every_iteration)
2201 gimple *last;
2202 gcond *stmt;
2203 tree type;
2204 tree op0, op1;
2205 enum tree_code code;
2206 affine_iv iv0, iv1;
2207 bool safe;
2209 safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
2211 if (every_iteration && !safe)
2212 return false;
2214 niter->assumptions = boolean_false_node;
2215 niter->control.base = NULL_TREE;
2216 niter->control.step = NULL_TREE;
2217 niter->control.no_overflow = false;
2218 last = last_stmt (exit->src);
2219 if (!last)
2220 return false;
2221 stmt = dyn_cast <gcond *> (last);
2222 if (!stmt)
2223 return false;
2225 /* We want the condition for staying inside loop. */
2226 code = gimple_cond_code (stmt);
2227 if (exit->flags & EDGE_TRUE_VALUE)
2228 code = invert_tree_comparison (code, false);
2230 switch (code)
2232 case GT_EXPR:
2233 case GE_EXPR:
2234 case LT_EXPR:
2235 case LE_EXPR:
2236 case NE_EXPR:
2237 break;
2239 default:
2240 return false;
2243 op0 = gimple_cond_lhs (stmt);
2244 op1 = gimple_cond_rhs (stmt);
2245 type = TREE_TYPE (op0);
2247 if (TREE_CODE (type) != INTEGER_TYPE
2248 && !POINTER_TYPE_P (type))
2249 return false;
2251 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, false))
2252 return false;
2253 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, false))
2254 return false;
2256 /* We don't want to see undefined signed overflow warnings while
2257 computing the number of iterations. */
2258 fold_defer_overflow_warnings ();
2260 iv0.base = expand_simple_operations (iv0.base);
2261 iv1.base = expand_simple_operations (iv1.base);
2262 if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
2263 loop_only_exit_p (loop, exit), safe))
2265 fold_undefer_and_ignore_overflow_warnings ();
2266 return false;
2269 if (optimize >= 3)
2271 niter->assumptions = simplify_using_outer_evolutions (loop,
2272 niter->assumptions);
2273 niter->may_be_zero = simplify_using_outer_evolutions (loop,
2274 niter->may_be_zero);
2275 niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
2278 niter->assumptions
2279 = simplify_using_initial_conditions (loop,
2280 niter->assumptions);
2281 niter->may_be_zero
2282 = simplify_using_initial_conditions (loop,
2283 niter->may_be_zero);
2285 fold_undefer_and_ignore_overflow_warnings ();
2287 /* If NITER has simplified into a constant, update MAX. */
2288 if (TREE_CODE (niter->niter) == INTEGER_CST)
2289 niter->max = wi::to_widest (niter->niter);
2291 if (integer_onep (niter->assumptions))
2292 return true;
2294 /* With -funsafe-loop-optimizations we assume that nothing bad can happen.
2295 But if we can prove that there is overflow or some other source of weird
2296 behavior, ignore the loop even with -funsafe-loop-optimizations. */
2297 if (integer_zerop (niter->assumptions) || !single_exit (loop))
2298 return false;
2300 if (flag_unsafe_loop_optimizations)
2301 niter->assumptions = boolean_true_node;
2303 if (warn)
2305 const char *wording;
2306 location_t loc = gimple_location (stmt);
2308 /* We can provide a more specific warning if one of the operator is
2309 constant and the other advances by +1 or -1. */
2310 if (!integer_zerop (iv1.step)
2311 ? (integer_zerop (iv0.step)
2312 && (integer_onep (iv1.step) || integer_all_onesp (iv1.step)))
2313 : (integer_onep (iv0.step) || integer_all_onesp (iv0.step)))
2314 wording =
2315 flag_unsafe_loop_optimizations
2316 ? N_("assuming that the loop is not infinite")
2317 : N_("cannot optimize possibly infinite loops");
2318 else
2319 wording =
2320 flag_unsafe_loop_optimizations
2321 ? N_("assuming that the loop counter does not overflow")
2322 : N_("cannot optimize loop, the loop counter may overflow");
2324 warning_at ((LOCATION_LINE (loc) > 0) ? loc : input_location,
2325 OPT_Wunsafe_loop_optimizations, "%s", gettext (wording));
2328 return flag_unsafe_loop_optimizations;
2331 /* Try to determine the number of iterations of LOOP. If we succeed,
2332 expression giving number of iterations is returned and *EXIT is
2333 set to the edge from that the information is obtained. Otherwise
2334 chrec_dont_know is returned. */
2336 tree
2337 find_loop_niter (struct loop *loop, edge *exit)
2339 unsigned i;
2340 vec<edge> exits = get_loop_exit_edges (loop);
2341 edge ex;
2342 tree niter = NULL_TREE, aniter;
2343 struct tree_niter_desc desc;
2345 *exit = NULL;
2346 FOR_EACH_VEC_ELT (exits, i, ex)
2348 if (!number_of_iterations_exit (loop, ex, &desc, false))
2349 continue;
2351 if (integer_nonzerop (desc.may_be_zero))
2353 /* We exit in the first iteration through this exit.
2354 We won't find anything better. */
2355 niter = build_int_cst (unsigned_type_node, 0);
2356 *exit = ex;
2357 break;
2360 if (!integer_zerop (desc.may_be_zero))
2361 continue;
2363 aniter = desc.niter;
2365 if (!niter)
2367 /* Nothing recorded yet. */
2368 niter = aniter;
2369 *exit = ex;
2370 continue;
2373 /* Prefer constants, the lower the better. */
2374 if (TREE_CODE (aniter) != INTEGER_CST)
2375 continue;
2377 if (TREE_CODE (niter) != INTEGER_CST)
2379 niter = aniter;
2380 *exit = ex;
2381 continue;
2384 if (tree_int_cst_lt (aniter, niter))
2386 niter = aniter;
2387 *exit = ex;
2388 continue;
2391 exits.release ();
2393 return niter ? niter : chrec_dont_know;
2396 /* Return true if loop is known to have bounded number of iterations. */
2398 bool
2399 finite_loop_p (struct loop *loop)
2401 widest_int nit;
2402 int flags;
2404 if (flag_unsafe_loop_optimizations)
2405 return true;
2406 flags = flags_from_decl_or_type (current_function_decl);
2407 if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE))
2409 if (dump_file && (dump_flags & TDF_DETAILS))
2410 fprintf (dump_file, "Found loop %i to be finite: it is within pure or const function.\n",
2411 loop->num);
2412 return true;
2415 if (loop->any_upper_bound
2416 || max_loop_iterations (loop, &nit))
2418 if (dump_file && (dump_flags & TDF_DETAILS))
2419 fprintf (dump_file, "Found loop %i to be finite: upper bound found.\n",
2420 loop->num);
2421 return true;
2423 return false;
2428 Analysis of a number of iterations of a loop by a brute-force evaluation.
2432 /* Bound on the number of iterations we try to evaluate. */
2434 #define MAX_ITERATIONS_TO_TRACK \
2435 ((unsigned) PARAM_VALUE (PARAM_MAX_ITERATIONS_TO_TRACK))
2437 /* Returns the loop phi node of LOOP such that ssa name X is derived from its
2438 result by a chain of operations such that all but exactly one of their
2439 operands are constants. */
2441 static gphi *
2442 chain_of_csts_start (struct loop *loop, tree x)
2444 gimple *stmt = SSA_NAME_DEF_STMT (x);
2445 tree use;
2446 basic_block bb = gimple_bb (stmt);
2447 enum tree_code code;
2449 if (!bb
2450 || !flow_bb_inside_loop_p (loop, bb))
2451 return NULL;
2453 if (gimple_code (stmt) == GIMPLE_PHI)
2455 if (bb == loop->header)
2456 return as_a <gphi *> (stmt);
2458 return NULL;
2461 if (gimple_code (stmt) != GIMPLE_ASSIGN
2462 || gimple_assign_rhs_class (stmt) == GIMPLE_TERNARY_RHS)
2463 return NULL;
2465 code = gimple_assign_rhs_code (stmt);
2466 if (gimple_references_memory_p (stmt)
2467 || TREE_CODE_CLASS (code) == tcc_reference
2468 || (code == ADDR_EXPR
2469 && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
2470 return NULL;
2472 use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2473 if (use == NULL_TREE)
2474 return NULL;
2476 return chain_of_csts_start (loop, use);
2479 /* Determines whether the expression X is derived from a result of a phi node
2480 in header of LOOP such that
2482 * the derivation of X consists only from operations with constants
2483 * the initial value of the phi node is constant
2484 * the value of the phi node in the next iteration can be derived from the
2485 value in the current iteration by a chain of operations with constants.
2487 If such phi node exists, it is returned, otherwise NULL is returned. */
2489 static gphi *
2490 get_base_for (struct loop *loop, tree x)
2492 gphi *phi;
2493 tree init, next;
2495 if (is_gimple_min_invariant (x))
2496 return NULL;
2498 phi = chain_of_csts_start (loop, x);
2499 if (!phi)
2500 return NULL;
2502 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2503 next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2505 if (TREE_CODE (next) != SSA_NAME)
2506 return NULL;
2508 if (!is_gimple_min_invariant (init))
2509 return NULL;
2511 if (chain_of_csts_start (loop, next) != phi)
2512 return NULL;
2514 return phi;
2517 /* Given an expression X, then
2519 * if X is NULL_TREE, we return the constant BASE.
2520 * otherwise X is a SSA name, whose value in the considered loop is derived
2521 by a chain of operations with constant from a result of a phi node in
2522 the header of the loop. Then we return value of X when the value of the
2523 result of this phi node is given by the constant BASE. */
2525 static tree
2526 get_val_for (tree x, tree base)
2528 gimple *stmt;
2530 gcc_checking_assert (is_gimple_min_invariant (base));
2532 if (!x)
2533 return base;
2535 stmt = SSA_NAME_DEF_STMT (x);
2536 if (gimple_code (stmt) == GIMPLE_PHI)
2537 return base;
2539 gcc_checking_assert (is_gimple_assign (stmt));
2541 /* STMT must be either an assignment of a single SSA name or an
2542 expression involving an SSA name and a constant. Try to fold that
2543 expression using the value for the SSA name. */
2544 if (gimple_assign_ssa_name_copy_p (stmt))
2545 return get_val_for (gimple_assign_rhs1 (stmt), base);
2546 else if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS
2547 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2549 return fold_build1 (gimple_assign_rhs_code (stmt),
2550 gimple_expr_type (stmt),
2551 get_val_for (gimple_assign_rhs1 (stmt), base));
2553 else if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
2555 tree rhs1 = gimple_assign_rhs1 (stmt);
2556 tree rhs2 = gimple_assign_rhs2 (stmt);
2557 if (TREE_CODE (rhs1) == SSA_NAME)
2558 rhs1 = get_val_for (rhs1, base);
2559 else if (TREE_CODE (rhs2) == SSA_NAME)
2560 rhs2 = get_val_for (rhs2, base);
2561 else
2562 gcc_unreachable ();
2563 return fold_build2 (gimple_assign_rhs_code (stmt),
2564 gimple_expr_type (stmt), rhs1, rhs2);
2566 else
2567 gcc_unreachable ();
2571 /* Tries to count the number of iterations of LOOP till it exits by EXIT
2572 by brute force -- i.e. by determining the value of the operands of the
2573 condition at EXIT in first few iterations of the loop (assuming that
2574 these values are constant) and determining the first one in that the
2575 condition is not satisfied. Returns the constant giving the number
2576 of the iterations of LOOP if successful, chrec_dont_know otherwise. */
2578 tree
2579 loop_niter_by_eval (struct loop *loop, edge exit)
2581 tree acnd;
2582 tree op[2], val[2], next[2], aval[2];
2583 gphi *phi;
2584 gimple *cond;
2585 unsigned i, j;
2586 enum tree_code cmp;
2588 cond = last_stmt (exit->src);
2589 if (!cond || gimple_code (cond) != GIMPLE_COND)
2590 return chrec_dont_know;
2592 cmp = gimple_cond_code (cond);
2593 if (exit->flags & EDGE_TRUE_VALUE)
2594 cmp = invert_tree_comparison (cmp, false);
2596 switch (cmp)
2598 case EQ_EXPR:
2599 case NE_EXPR:
2600 case GT_EXPR:
2601 case GE_EXPR:
2602 case LT_EXPR:
2603 case LE_EXPR:
2604 op[0] = gimple_cond_lhs (cond);
2605 op[1] = gimple_cond_rhs (cond);
2606 break;
2608 default:
2609 return chrec_dont_know;
2612 for (j = 0; j < 2; j++)
2614 if (is_gimple_min_invariant (op[j]))
2616 val[j] = op[j];
2617 next[j] = NULL_TREE;
2618 op[j] = NULL_TREE;
2620 else
2622 phi = get_base_for (loop, op[j]);
2623 if (!phi)
2624 return chrec_dont_know;
2625 val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2626 next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2630 /* Don't issue signed overflow warnings. */
2631 fold_defer_overflow_warnings ();
2633 for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++)
2635 for (j = 0; j < 2; j++)
2636 aval[j] = get_val_for (op[j], val[j]);
2638 acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]);
2639 if (acnd && integer_zerop (acnd))
2641 fold_undefer_and_ignore_overflow_warnings ();
2642 if (dump_file && (dump_flags & TDF_DETAILS))
2643 fprintf (dump_file,
2644 "Proved that loop %d iterates %d times using brute force.\n",
2645 loop->num, i);
2646 return build_int_cst (unsigned_type_node, i);
2649 for (j = 0; j < 2; j++)
2651 val[j] = get_val_for (next[j], val[j]);
2652 if (!is_gimple_min_invariant (val[j]))
2654 fold_undefer_and_ignore_overflow_warnings ();
2655 return chrec_dont_know;
2660 fold_undefer_and_ignore_overflow_warnings ();
2662 return chrec_dont_know;
2665 /* Finds the exit of the LOOP by that the loop exits after a constant
2666 number of iterations and stores the exit edge to *EXIT. The constant
2667 giving the number of iterations of LOOP is returned. The number of
2668 iterations is determined using loop_niter_by_eval (i.e. by brute force
2669 evaluation). If we are unable to find the exit for that loop_niter_by_eval
2670 determines the number of iterations, chrec_dont_know is returned. */
2672 tree
2673 find_loop_niter_by_eval (struct loop *loop, edge *exit)
2675 unsigned i;
2676 vec<edge> exits = get_loop_exit_edges (loop);
2677 edge ex;
2678 tree niter = NULL_TREE, aniter;
2680 *exit = NULL;
2682 /* Loops with multiple exits are expensive to handle and less important. */
2683 if (!flag_expensive_optimizations
2684 && exits.length () > 1)
2686 exits.release ();
2687 return chrec_dont_know;
2690 FOR_EACH_VEC_ELT (exits, i, ex)
2692 if (!just_once_each_iteration_p (loop, ex->src))
2693 continue;
2695 aniter = loop_niter_by_eval (loop, ex);
2696 if (chrec_contains_undetermined (aniter))
2697 continue;
2699 if (niter
2700 && !tree_int_cst_lt (aniter, niter))
2701 continue;
2703 niter = aniter;
2704 *exit = ex;
2706 exits.release ();
2708 return niter ? niter : chrec_dont_know;
2713 Analysis of upper bounds on number of iterations of a loop.
2717 static widest_int derive_constant_upper_bound_ops (tree, tree,
2718 enum tree_code, tree);
2720 /* Returns a constant upper bound on the value of the right-hand side of
2721 an assignment statement STMT. */
2723 static widest_int
2724 derive_constant_upper_bound_assign (gimple *stmt)
2726 enum tree_code code = gimple_assign_rhs_code (stmt);
2727 tree op0 = gimple_assign_rhs1 (stmt);
2728 tree op1 = gimple_assign_rhs2 (stmt);
2730 return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)),
2731 op0, code, op1);
2734 /* Returns a constant upper bound on the value of expression VAL. VAL
2735 is considered to be unsigned. If its type is signed, its value must
2736 be nonnegative. */
2738 static widest_int
2739 derive_constant_upper_bound (tree val)
2741 enum tree_code code;
2742 tree op0, op1;
2744 extract_ops_from_tree (val, &code, &op0, &op1);
2745 return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
2748 /* Returns a constant upper bound on the value of expression OP0 CODE OP1,
2749 whose type is TYPE. The expression is considered to be unsigned. If
2750 its type is signed, its value must be nonnegative. */
2752 static widest_int
2753 derive_constant_upper_bound_ops (tree type, tree op0,
2754 enum tree_code code, tree op1)
2756 tree subtype, maxt;
2757 widest_int bnd, max, mmax, cst;
2758 gimple *stmt;
2760 if (INTEGRAL_TYPE_P (type))
2761 maxt = TYPE_MAX_VALUE (type);
2762 else
2763 maxt = upper_bound_in_type (type, type);
2765 max = wi::to_widest (maxt);
2767 switch (code)
2769 case INTEGER_CST:
2770 return wi::to_widest (op0);
2772 CASE_CONVERT:
2773 subtype = TREE_TYPE (op0);
2774 if (!TYPE_UNSIGNED (subtype)
2775 /* If TYPE is also signed, the fact that VAL is nonnegative implies
2776 that OP0 is nonnegative. */
2777 && TYPE_UNSIGNED (type)
2778 && !tree_expr_nonnegative_p (op0))
2780 /* If we cannot prove that the casted expression is nonnegative,
2781 we cannot establish more useful upper bound than the precision
2782 of the type gives us. */
2783 return max;
2786 /* We now know that op0 is an nonnegative value. Try deriving an upper
2787 bound for it. */
2788 bnd = derive_constant_upper_bound (op0);
2790 /* If the bound does not fit in TYPE, max. value of TYPE could be
2791 attained. */
2792 if (wi::ltu_p (max, bnd))
2793 return max;
2795 return bnd;
2797 case PLUS_EXPR:
2798 case POINTER_PLUS_EXPR:
2799 case MINUS_EXPR:
2800 if (TREE_CODE (op1) != INTEGER_CST
2801 || !tree_expr_nonnegative_p (op0))
2802 return max;
2804 /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to
2805 choose the most logical way how to treat this constant regardless
2806 of the signedness of the type. */
2807 cst = wi::sext (wi::to_widest (op1), TYPE_PRECISION (type));
2808 if (code != MINUS_EXPR)
2809 cst = -cst;
2811 bnd = derive_constant_upper_bound (op0);
2813 if (wi::neg_p (cst))
2815 cst = -cst;
2816 /* Avoid CST == 0x80000... */
2817 if (wi::neg_p (cst))
2818 return max;
2820 /* OP0 + CST. We need to check that
2821 BND <= MAX (type) - CST. */
2823 mmax -= cst;
2824 if (wi::ltu_p (bnd, max))
2825 return max;
2827 return bnd + cst;
2829 else
2831 /* OP0 - CST, where CST >= 0.
2833 If TYPE is signed, we have already verified that OP0 >= 0, and we
2834 know that the result is nonnegative. This implies that
2835 VAL <= BND - CST.
2837 If TYPE is unsigned, we must additionally know that OP0 >= CST,
2838 otherwise the operation underflows.
2841 /* This should only happen if the type is unsigned; however, for
2842 buggy programs that use overflowing signed arithmetics even with
2843 -fno-wrapv, this condition may also be true for signed values. */
2844 if (wi::ltu_p (bnd, cst))
2845 return max;
2847 if (TYPE_UNSIGNED (type))
2849 tree tem = fold_binary (GE_EXPR, boolean_type_node, op0,
2850 wide_int_to_tree (type, cst));
2851 if (!tem || integer_nonzerop (tem))
2852 return max;
2855 bnd -= cst;
2858 return bnd;
2860 case FLOOR_DIV_EXPR:
2861 case EXACT_DIV_EXPR:
2862 if (TREE_CODE (op1) != INTEGER_CST
2863 || tree_int_cst_sign_bit (op1))
2864 return max;
2866 bnd = derive_constant_upper_bound (op0);
2867 return wi::udiv_floor (bnd, wi::to_widest (op1));
2869 case BIT_AND_EXPR:
2870 if (TREE_CODE (op1) != INTEGER_CST
2871 || tree_int_cst_sign_bit (op1))
2872 return max;
2873 return wi::to_widest (op1);
2875 case SSA_NAME:
2876 stmt = SSA_NAME_DEF_STMT (op0);
2877 if (gimple_code (stmt) != GIMPLE_ASSIGN
2878 || gimple_assign_lhs (stmt) != op0)
2879 return max;
2880 return derive_constant_upper_bound_assign (stmt);
2882 default:
2883 return max;
2887 /* Emit a -Waggressive-loop-optimizations warning if needed. */
2889 static void
2890 do_warn_aggressive_loop_optimizations (struct loop *loop,
2891 widest_int i_bound, gimple *stmt)
2893 /* Don't warn if the loop doesn't have known constant bound. */
2894 if (!loop->nb_iterations
2895 || TREE_CODE (loop->nb_iterations) != INTEGER_CST
2896 || !warn_aggressive_loop_optimizations
2897 /* To avoid warning multiple times for the same loop,
2898 only start warning when we preserve loops. */
2899 || (cfun->curr_properties & PROP_loops) == 0
2900 /* Only warn once per loop. */
2901 || loop->warned_aggressive_loop_optimizations
2902 /* Only warn if undefined behavior gives us lower estimate than the
2903 known constant bound. */
2904 || wi::cmpu (i_bound, wi::to_widest (loop->nb_iterations)) >= 0
2905 /* And undefined behavior happens unconditionally. */
2906 || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (stmt)))
2907 return;
2909 edge e = single_exit (loop);
2910 if (e == NULL)
2911 return;
2913 gimple *estmt = last_stmt (e->src);
2914 char buf[WIDE_INT_PRINT_BUFFER_SIZE];
2915 print_dec (i_bound, buf, TYPE_UNSIGNED (TREE_TYPE (loop->nb_iterations))
2916 ? UNSIGNED : SIGNED);
2917 if (warning_at (gimple_location (stmt), OPT_Waggressive_loop_optimizations,
2918 "iteration %s invokes undefined behavior", buf))
2919 inform (gimple_location (estmt), "within this loop");
2920 loop->warned_aggressive_loop_optimizations = true;
2923 /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT
2924 is true if the loop is exited immediately after STMT, and this exit
2925 is taken at last when the STMT is executed BOUND + 1 times.
2926 REALISTIC is true if BOUND is expected to be close to the real number
2927 of iterations. UPPER is true if we are sure the loop iterates at most
2928 BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */
2930 static void
2931 record_estimate (struct loop *loop, tree bound, const widest_int &i_bound,
2932 gimple *at_stmt, bool is_exit, bool realistic, bool upper)
2934 widest_int delta;
2936 if (dump_file && (dump_flags & TDF_DETAILS))
2938 fprintf (dump_file, "Statement %s", is_exit ? "(exit)" : "");
2939 print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM);
2940 fprintf (dump_file, " is %sexecuted at most ",
2941 upper ? "" : "probably ");
2942 print_generic_expr (dump_file, bound, TDF_SLIM);
2943 fprintf (dump_file, " (bounded by ");
2944 print_decu (i_bound, dump_file);
2945 fprintf (dump_file, ") + 1 times in loop %d.\n", loop->num);
2948 /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the
2949 real number of iterations. */
2950 if (TREE_CODE (bound) != INTEGER_CST)
2951 realistic = false;
2952 else
2953 gcc_checking_assert (i_bound == wi::to_widest (bound));
2954 if (!upper && !realistic)
2955 return;
2957 /* If we have a guaranteed upper bound, record it in the appropriate
2958 list, unless this is an !is_exit bound (i.e. undefined behavior in
2959 at_stmt) in a loop with known constant number of iterations. */
2960 if (upper
2961 && (is_exit
2962 || loop->nb_iterations == NULL_TREE
2963 || TREE_CODE (loop->nb_iterations) != INTEGER_CST))
2965 struct nb_iter_bound *elt = ggc_alloc<nb_iter_bound> ();
2967 elt->bound = i_bound;
2968 elt->stmt = at_stmt;
2969 elt->is_exit = is_exit;
2970 elt->next = loop->bounds;
2971 loop->bounds = elt;
2974 /* If statement is executed on every path to the loop latch, we can directly
2975 infer the upper bound on the # of iterations of the loop. */
2976 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (at_stmt)))
2977 return;
2979 /* Update the number of iteration estimates according to the bound.
2980 If at_stmt is an exit then the loop latch is executed at most BOUND times,
2981 otherwise it can be executed BOUND + 1 times. We will lower the estimate
2982 later if such statement must be executed on last iteration */
2983 if (is_exit)
2984 delta = 0;
2985 else
2986 delta = 1;
2987 widest_int new_i_bound = i_bound + delta;
2989 /* If an overflow occurred, ignore the result. */
2990 if (wi::ltu_p (new_i_bound, delta))
2991 return;
2993 if (upper && !is_exit)
2994 do_warn_aggressive_loop_optimizations (loop, new_i_bound, at_stmt);
2995 record_niter_bound (loop, new_i_bound, realistic, upper);
2998 /* Records the control iv analyzed in NITER for LOOP if the iv is valid
2999 and doesn't overflow. */
3001 static void
3002 record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
3004 struct control_iv *iv;
3006 if (!niter->control.base || !niter->control.step)
3007 return;
3009 if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
3010 return;
3012 iv = ggc_alloc<control_iv> ();
3013 iv->base = niter->control.base;
3014 iv->step = niter->control.step;
3015 iv->next = loop->control_ivs;
3016 loop->control_ivs = iv;
3018 return;
3021 /* Record the estimate on number of iterations of LOOP based on the fact that
3022 the induction variable BASE + STEP * i evaluated in STMT does not wrap and
3023 its values belong to the range <LOW, HIGH>. REALISTIC is true if the
3024 estimated number of iterations is expected to be close to the real one.
3025 UPPER is true if we are sure the induction variable does not wrap. */
3027 static void
3028 record_nonwrapping_iv (struct loop *loop, tree base, tree step, gimple *stmt,
3029 tree low, tree high, bool realistic, bool upper)
3031 tree niter_bound, extreme, delta;
3032 tree type = TREE_TYPE (base), unsigned_type;
3033 tree orig_base = base;
3035 if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3036 return;
3038 if (dump_file && (dump_flags & TDF_DETAILS))
3040 fprintf (dump_file, "Induction variable (");
3041 print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM);
3042 fprintf (dump_file, ") ");
3043 print_generic_expr (dump_file, base, TDF_SLIM);
3044 fprintf (dump_file, " + ");
3045 print_generic_expr (dump_file, step, TDF_SLIM);
3046 fprintf (dump_file, " * iteration does not wrap in statement ");
3047 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3048 fprintf (dump_file, " in loop %d.\n", loop->num);
3051 unsigned_type = unsigned_type_for (type);
3052 base = fold_convert (unsigned_type, base);
3053 step = fold_convert (unsigned_type, step);
3055 if (tree_int_cst_sign_bit (step))
3057 wide_int min, max;
3058 extreme = fold_convert (unsigned_type, low);
3059 if (TREE_CODE (orig_base) == SSA_NAME
3060 && TREE_CODE (high) == INTEGER_CST
3061 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
3062 && get_range_info (orig_base, &min, &max) == VR_RANGE
3063 && wi::gts_p (high, max))
3064 base = wide_int_to_tree (unsigned_type, max);
3065 else if (TREE_CODE (base) != INTEGER_CST)
3066 base = fold_convert (unsigned_type, high);
3067 delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
3068 step = fold_build1 (NEGATE_EXPR, unsigned_type, step);
3070 else
3072 wide_int min, max;
3073 extreme = fold_convert (unsigned_type, high);
3074 if (TREE_CODE (orig_base) == SSA_NAME
3075 && TREE_CODE (low) == INTEGER_CST
3076 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
3077 && get_range_info (orig_base, &min, &max) == VR_RANGE
3078 && wi::gts_p (min, low))
3079 base = wide_int_to_tree (unsigned_type, min);
3080 else if (TREE_CODE (base) != INTEGER_CST)
3081 base = fold_convert (unsigned_type, low);
3082 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
3085 /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value
3086 would get out of the range. */
3087 niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step);
3088 widest_int max = derive_constant_upper_bound (niter_bound);
3089 record_estimate (loop, niter_bound, max, stmt, false, realistic, upper);
3092 /* Determine information about number of iterations a LOOP from the index
3093 IDX of a data reference accessed in STMT. RELIABLE is true if STMT is
3094 guaranteed to be executed in every iteration of LOOP. Callback for
3095 for_each_index. */
3097 struct ilb_data
3099 struct loop *loop;
3100 gimple *stmt;
3103 static bool
3104 idx_infer_loop_bounds (tree base, tree *idx, void *dta)
3106 struct ilb_data *data = (struct ilb_data *) dta;
3107 tree ev, init, step;
3108 tree low, high, type, next;
3109 bool sign, upper = true, at_end = false;
3110 struct loop *loop = data->loop;
3111 bool reliable = true;
3113 if (TREE_CODE (base) != ARRAY_REF)
3114 return true;
3116 /* For arrays at the end of the structure, we are not guaranteed that they
3117 do not really extend over their declared size. However, for arrays of
3118 size greater than one, this is unlikely to be intended. */
3119 if (array_at_struct_end_p (base))
3121 at_end = true;
3122 upper = false;
3125 struct loop *dloop = loop_containing_stmt (data->stmt);
3126 if (!dloop)
3127 return true;
3129 ev = analyze_scalar_evolution (dloop, *idx);
3130 ev = instantiate_parameters (loop, ev);
3131 init = initial_condition (ev);
3132 step = evolution_part_in_loop_num (ev, loop->num);
3134 if (!init
3135 || !step
3136 || TREE_CODE (step) != INTEGER_CST
3137 || integer_zerop (step)
3138 || tree_contains_chrecs (init, NULL)
3139 || chrec_contains_symbols_defined_in_loop (init, loop->num))
3140 return true;
3142 low = array_ref_low_bound (base);
3143 high = array_ref_up_bound (base);
3145 /* The case of nonconstant bounds could be handled, but it would be
3146 complicated. */
3147 if (TREE_CODE (low) != INTEGER_CST
3148 || !high
3149 || TREE_CODE (high) != INTEGER_CST)
3150 return true;
3151 sign = tree_int_cst_sign_bit (step);
3152 type = TREE_TYPE (step);
3154 /* The array of length 1 at the end of a structure most likely extends
3155 beyond its bounds. */
3156 if (at_end
3157 && operand_equal_p (low, high, 0))
3158 return true;
3160 /* In case the relevant bound of the array does not fit in type, or
3161 it does, but bound + step (in type) still belongs into the range of the
3162 array, the index may wrap and still stay within the range of the array
3163 (consider e.g. if the array is indexed by the full range of
3164 unsigned char).
3166 To make things simpler, we require both bounds to fit into type, although
3167 there are cases where this would not be strictly necessary. */
3168 if (!int_fits_type_p (high, type)
3169 || !int_fits_type_p (low, type))
3170 return true;
3171 low = fold_convert (type, low);
3172 high = fold_convert (type, high);
3174 if (sign)
3175 next = fold_binary (PLUS_EXPR, type, low, step);
3176 else
3177 next = fold_binary (PLUS_EXPR, type, high, step);
3179 if (tree_int_cst_compare (low, next) <= 0
3180 && tree_int_cst_compare (next, high) <= 0)
3181 return true;
3183 /* If access is not executed on every iteration, we must ensure that overlow may
3184 not make the access valid later. */
3185 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt))
3186 && scev_probably_wraps_p (initial_condition_in_loop_num (ev, loop->num),
3187 step, data->stmt, loop, true))
3188 reliable = false;
3190 record_nonwrapping_iv (loop, init, step, data->stmt, low, high, reliable, upper);
3191 return true;
3194 /* Determine information about number of iterations a LOOP from the bounds
3195 of arrays in the data reference REF accessed in STMT. RELIABLE is true if
3196 STMT is guaranteed to be executed in every iteration of LOOP.*/
3198 static void
3199 infer_loop_bounds_from_ref (struct loop *loop, gimple *stmt, tree ref)
3201 struct ilb_data data;
3203 data.loop = loop;
3204 data.stmt = stmt;
3205 for_each_index (&ref, idx_infer_loop_bounds, &data);
3208 /* Determine information about number of iterations of a LOOP from the way
3209 arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be
3210 executed in every iteration of LOOP. */
3212 static void
3213 infer_loop_bounds_from_array (struct loop *loop, gimple *stmt)
3215 if (is_gimple_assign (stmt))
3217 tree op0 = gimple_assign_lhs (stmt);
3218 tree op1 = gimple_assign_rhs1 (stmt);
3220 /* For each memory access, analyze its access function
3221 and record a bound on the loop iteration domain. */
3222 if (REFERENCE_CLASS_P (op0))
3223 infer_loop_bounds_from_ref (loop, stmt, op0);
3225 if (REFERENCE_CLASS_P (op1))
3226 infer_loop_bounds_from_ref (loop, stmt, op1);
3228 else if (is_gimple_call (stmt))
3230 tree arg, lhs;
3231 unsigned i, n = gimple_call_num_args (stmt);
3233 lhs = gimple_call_lhs (stmt);
3234 if (lhs && REFERENCE_CLASS_P (lhs))
3235 infer_loop_bounds_from_ref (loop, stmt, lhs);
3237 for (i = 0; i < n; i++)
3239 arg = gimple_call_arg (stmt, i);
3240 if (REFERENCE_CLASS_P (arg))
3241 infer_loop_bounds_from_ref (loop, stmt, arg);
3246 /* Determine information about number of iterations of a LOOP from the fact
3247 that pointer arithmetics in STMT does not overflow. */
3249 static void
3250 infer_loop_bounds_from_pointer_arith (struct loop *loop, gimple *stmt)
3252 tree def, base, step, scev, type, low, high;
3253 tree var, ptr;
3255 if (!is_gimple_assign (stmt)
3256 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
3257 return;
3259 def = gimple_assign_lhs (stmt);
3260 if (TREE_CODE (def) != SSA_NAME)
3261 return;
3263 type = TREE_TYPE (def);
3264 if (!nowrap_type_p (type))
3265 return;
3267 ptr = gimple_assign_rhs1 (stmt);
3268 if (!expr_invariant_in_loop_p (loop, ptr))
3269 return;
3271 var = gimple_assign_rhs2 (stmt);
3272 if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var)))
3273 return;
3275 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3276 if (chrec_contains_undetermined (scev))
3277 return;
3279 base = initial_condition_in_loop_num (scev, loop->num);
3280 step = evolution_part_in_loop_num (scev, loop->num);
3282 if (!base || !step
3283 || TREE_CODE (step) != INTEGER_CST
3284 || tree_contains_chrecs (base, NULL)
3285 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3286 return;
3288 low = lower_bound_in_type (type, type);
3289 high = upper_bound_in_type (type, type);
3291 /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot
3292 produce a NULL pointer. The contrary would mean NULL points to an object,
3293 while NULL is supposed to compare unequal with the address of all objects.
3294 Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a
3295 NULL pointer since that would mean wrapping, which we assume here not to
3296 happen. So, we can exclude NULL from the valid range of pointer
3297 arithmetic. */
3298 if (flag_delete_null_pointer_checks && int_cst_value (low) == 0)
3299 low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type)));
3301 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3304 /* Determine information about number of iterations of a LOOP from the fact
3305 that signed arithmetics in STMT does not overflow. */
3307 static void
3308 infer_loop_bounds_from_signedness (struct loop *loop, gimple *stmt)
3310 tree def, base, step, scev, type, low, high;
3312 if (gimple_code (stmt) != GIMPLE_ASSIGN)
3313 return;
3315 def = gimple_assign_lhs (stmt);
3317 if (TREE_CODE (def) != SSA_NAME)
3318 return;
3320 type = TREE_TYPE (def);
3321 if (!INTEGRAL_TYPE_P (type)
3322 || !TYPE_OVERFLOW_UNDEFINED (type))
3323 return;
3325 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3326 if (chrec_contains_undetermined (scev))
3327 return;
3329 base = initial_condition_in_loop_num (scev, loop->num);
3330 step = evolution_part_in_loop_num (scev, loop->num);
3332 if (!base || !step
3333 || TREE_CODE (step) != INTEGER_CST
3334 || tree_contains_chrecs (base, NULL)
3335 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3336 return;
3338 low = lower_bound_in_type (type, type);
3339 high = upper_bound_in_type (type, type);
3341 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3344 /* The following analyzers are extracting informations on the bounds
3345 of LOOP from the following undefined behaviors:
3347 - data references should not access elements over the statically
3348 allocated size,
3350 - signed variables should not overflow when flag_wrapv is not set.
3353 static void
3354 infer_loop_bounds_from_undefined (struct loop *loop)
3356 unsigned i;
3357 basic_block *bbs;
3358 gimple_stmt_iterator bsi;
3359 basic_block bb;
3360 bool reliable;
3362 bbs = get_loop_body (loop);
3364 for (i = 0; i < loop->num_nodes; i++)
3366 bb = bbs[i];
3368 /* If BB is not executed in each iteration of the loop, we cannot
3369 use the operations in it to infer reliable upper bound on the
3370 # of iterations of the loop. However, we can use it as a guess.
3371 Reliable guesses come only from array bounds. */
3372 reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb);
3374 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
3376 gimple *stmt = gsi_stmt (bsi);
3378 infer_loop_bounds_from_array (loop, stmt);
3380 if (reliable)
3382 infer_loop_bounds_from_signedness (loop, stmt);
3383 infer_loop_bounds_from_pointer_arith (loop, stmt);
3389 free (bbs);
3392 /* Compare wide ints, callback for qsort. */
3394 static int
3395 wide_int_cmp (const void *p1, const void *p2)
3397 const widest_int *d1 = (const widest_int *) p1;
3398 const widest_int *d2 = (const widest_int *) p2;
3399 return wi::cmpu (*d1, *d2);
3402 /* Return index of BOUND in BOUNDS array sorted in increasing order.
3403 Lookup by binary search. */
3405 static int
3406 bound_index (vec<widest_int> bounds, const widest_int &bound)
3408 unsigned int end = bounds.length ();
3409 unsigned int begin = 0;
3411 /* Find a matching index by means of a binary search. */
3412 while (begin != end)
3414 unsigned int middle = (begin + end) / 2;
3415 widest_int index = bounds[middle];
3417 if (index == bound)
3418 return middle;
3419 else if (wi::ltu_p (index, bound))
3420 begin = middle + 1;
3421 else
3422 end = middle;
3424 gcc_unreachable ();
3427 /* We recorded loop bounds only for statements dominating loop latch (and thus
3428 executed each loop iteration). If there are any bounds on statements not
3429 dominating the loop latch we can improve the estimate by walking the loop
3430 body and seeing if every path from loop header to loop latch contains
3431 some bounded statement. */
3433 static void
3434 discover_iteration_bound_by_body_walk (struct loop *loop)
3436 struct nb_iter_bound *elt;
3437 vec<widest_int> bounds = vNULL;
3438 vec<vec<basic_block> > queues = vNULL;
3439 vec<basic_block> queue = vNULL;
3440 ptrdiff_t queue_index;
3441 ptrdiff_t latch_index = 0;
3443 /* Discover what bounds may interest us. */
3444 for (elt = loop->bounds; elt; elt = elt->next)
3446 widest_int bound = elt->bound;
3448 /* Exit terminates loop at given iteration, while non-exits produce undefined
3449 effect on the next iteration. */
3450 if (!elt->is_exit)
3452 bound += 1;
3453 /* If an overflow occurred, ignore the result. */
3454 if (bound == 0)
3455 continue;
3458 if (!loop->any_upper_bound
3459 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3460 bounds.safe_push (bound);
3463 /* Exit early if there is nothing to do. */
3464 if (!bounds.exists ())
3465 return;
3467 if (dump_file && (dump_flags & TDF_DETAILS))
3468 fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
3470 /* Sort the bounds in decreasing order. */
3471 bounds.qsort (wide_int_cmp);
3473 /* For every basic block record the lowest bound that is guaranteed to
3474 terminate the loop. */
3476 hash_map<basic_block, ptrdiff_t> bb_bounds;
3477 for (elt = loop->bounds; elt; elt = elt->next)
3479 widest_int bound = elt->bound;
3480 if (!elt->is_exit)
3482 bound += 1;
3483 /* If an overflow occurred, ignore the result. */
3484 if (bound == 0)
3485 continue;
3488 if (!loop->any_upper_bound
3489 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3491 ptrdiff_t index = bound_index (bounds, bound);
3492 ptrdiff_t *entry = bb_bounds.get (gimple_bb (elt->stmt));
3493 if (!entry)
3494 bb_bounds.put (gimple_bb (elt->stmt), index);
3495 else if ((ptrdiff_t)*entry > index)
3496 *entry = index;
3500 hash_map<basic_block, ptrdiff_t> block_priority;
3502 /* Perform shortest path discovery loop->header ... loop->latch.
3504 The "distance" is given by the smallest loop bound of basic block
3505 present in the path and we look for path with largest smallest bound
3506 on it.
3508 To avoid the need for fibonacci heap on double ints we simply compress
3509 double ints into indexes to BOUNDS array and then represent the queue
3510 as arrays of queues for every index.
3511 Index of BOUNDS.length() means that the execution of given BB has
3512 no bounds determined.
3514 VISITED is a pointer map translating basic block into smallest index
3515 it was inserted into the priority queue with. */
3516 latch_index = -1;
3518 /* Start walk in loop header with index set to infinite bound. */
3519 queue_index = bounds.length ();
3520 queues.safe_grow_cleared (queue_index + 1);
3521 queue.safe_push (loop->header);
3522 queues[queue_index] = queue;
3523 block_priority.put (loop->header, queue_index);
3525 for (; queue_index >= 0; queue_index--)
3527 if (latch_index < queue_index)
3529 while (queues[queue_index].length ())
3531 basic_block bb;
3532 ptrdiff_t bound_index = queue_index;
3533 edge e;
3534 edge_iterator ei;
3536 queue = queues[queue_index];
3537 bb = queue.pop ();
3539 /* OK, we later inserted the BB with lower priority, skip it. */
3540 if (*block_priority.get (bb) > queue_index)
3541 continue;
3543 /* See if we can improve the bound. */
3544 ptrdiff_t *entry = bb_bounds.get (bb);
3545 if (entry && *entry < bound_index)
3546 bound_index = *entry;
3548 /* Insert succesors into the queue, watch for latch edge
3549 and record greatest index we saw. */
3550 FOR_EACH_EDGE (e, ei, bb->succs)
3552 bool insert = false;
3554 if (loop_exit_edge_p (loop, e))
3555 continue;
3557 if (e == loop_latch_edge (loop)
3558 && latch_index < bound_index)
3559 latch_index = bound_index;
3560 else if (!(entry = block_priority.get (e->dest)))
3562 insert = true;
3563 block_priority.put (e->dest, bound_index);
3565 else if (*entry < bound_index)
3567 insert = true;
3568 *entry = bound_index;
3571 if (insert)
3572 queues[bound_index].safe_push (e->dest);
3576 queues[queue_index].release ();
3579 gcc_assert (latch_index >= 0);
3580 if ((unsigned)latch_index < bounds.length ())
3582 if (dump_file && (dump_flags & TDF_DETAILS))
3584 fprintf (dump_file, "Found better loop bound ");
3585 print_decu (bounds[latch_index], dump_file);
3586 fprintf (dump_file, "\n");
3588 record_niter_bound (loop, bounds[latch_index], false, true);
3591 queues.release ();
3592 bounds.release ();
3595 /* See if every path cross the loop goes through a statement that is known
3596 to not execute at the last iteration. In that case we can decrese iteration
3597 count by 1. */
3599 static void
3600 maybe_lower_iteration_bound (struct loop *loop)
3602 hash_set<gimple *> *not_executed_last_iteration = NULL;
3603 struct nb_iter_bound *elt;
3604 bool found_exit = false;
3605 vec<basic_block> queue = vNULL;
3606 bitmap visited;
3608 /* Collect all statements with interesting (i.e. lower than
3609 nb_iterations_upper_bound) bound on them.
3611 TODO: Due to the way record_estimate choose estimates to store, the bounds
3612 will be always nb_iterations_upper_bound-1. We can change this to record
3613 also statements not dominating the loop latch and update the walk bellow
3614 to the shortest path algorthm. */
3615 for (elt = loop->bounds; elt; elt = elt->next)
3617 if (!elt->is_exit
3618 && wi::ltu_p (elt->bound, loop->nb_iterations_upper_bound))
3620 if (!not_executed_last_iteration)
3621 not_executed_last_iteration = new hash_set<gimple *>;
3622 not_executed_last_iteration->add (elt->stmt);
3625 if (!not_executed_last_iteration)
3626 return;
3628 /* Start DFS walk in the loop header and see if we can reach the
3629 loop latch or any of the exits (including statements with side
3630 effects that may terminate the loop otherwise) without visiting
3631 any of the statements known to have undefined effect on the last
3632 iteration. */
3633 queue.safe_push (loop->header);
3634 visited = BITMAP_ALLOC (NULL);
3635 bitmap_set_bit (visited, loop->header->index);
3636 found_exit = false;
3640 basic_block bb = queue.pop ();
3641 gimple_stmt_iterator gsi;
3642 bool stmt_found = false;
3644 /* Loop for possible exits and statements bounding the execution. */
3645 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3647 gimple *stmt = gsi_stmt (gsi);
3648 if (not_executed_last_iteration->contains (stmt))
3650 stmt_found = true;
3651 break;
3653 if (gimple_has_side_effects (stmt))
3655 found_exit = true;
3656 break;
3659 if (found_exit)
3660 break;
3662 /* If no bounding statement is found, continue the walk. */
3663 if (!stmt_found)
3665 edge e;
3666 edge_iterator ei;
3668 FOR_EACH_EDGE (e, ei, bb->succs)
3670 if (loop_exit_edge_p (loop, e)
3671 || e == loop_latch_edge (loop))
3673 found_exit = true;
3674 break;
3676 if (bitmap_set_bit (visited, e->dest->index))
3677 queue.safe_push (e->dest);
3681 while (queue.length () && !found_exit);
3683 /* If every path through the loop reach bounding statement before exit,
3684 then we know the last iteration of the loop will have undefined effect
3685 and we can decrease number of iterations. */
3687 if (!found_exit)
3689 if (dump_file && (dump_flags & TDF_DETAILS))
3690 fprintf (dump_file, "Reducing loop iteration estimate by 1; "
3691 "undefined statement must be executed at the last iteration.\n");
3692 record_niter_bound (loop, loop->nb_iterations_upper_bound - 1,
3693 false, true);
3696 BITMAP_FREE (visited);
3697 queue.release ();
3698 delete not_executed_last_iteration;
3701 /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P
3702 is true also use estimates derived from undefined behavior. */
3704 static void
3705 estimate_numbers_of_iterations_loop (struct loop *loop)
3707 vec<edge> exits;
3708 tree niter, type;
3709 unsigned i;
3710 struct tree_niter_desc niter_desc;
3711 edge ex;
3712 widest_int bound;
3713 edge likely_exit;
3715 /* Give up if we already have tried to compute an estimation. */
3716 if (loop->estimate_state != EST_NOT_COMPUTED)
3717 return;
3719 loop->estimate_state = EST_AVAILABLE;
3720 /* Force estimate compuation but leave any existing upper bound in place. */
3721 loop->any_estimate = false;
3723 /* Ensure that loop->nb_iterations is computed if possible. If it turns out
3724 to be constant, we avoid undefined behavior implied bounds and instead
3725 diagnose those loops with -Waggressive-loop-optimizations. */
3726 number_of_latch_executions (loop);
3728 exits = get_loop_exit_edges (loop);
3729 likely_exit = single_likely_exit (loop);
3730 FOR_EACH_VEC_ELT (exits, i, ex)
3732 if (!number_of_iterations_exit (loop, ex, &niter_desc, false, false))
3733 continue;
3735 niter = niter_desc.niter;
3736 type = TREE_TYPE (niter);
3737 if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST)
3738 niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
3739 build_int_cst (type, 0),
3740 niter);
3741 record_estimate (loop, niter, niter_desc.max,
3742 last_stmt (ex->src),
3743 true, ex == likely_exit, true);
3744 record_control_iv (loop, &niter_desc);
3746 exits.release ();
3748 if (flag_aggressive_loop_optimizations)
3749 infer_loop_bounds_from_undefined (loop);
3751 discover_iteration_bound_by_body_walk (loop);
3753 maybe_lower_iteration_bound (loop);
3755 /* If we have a measured profile, use it to estimate the number of
3756 iterations. */
3757 if (loop->header->count != 0)
3759 gcov_type nit = expected_loop_iterations_unbounded (loop) + 1;
3760 bound = gcov_type_to_wide_int (nit);
3761 record_niter_bound (loop, bound, true, false);
3764 /* If we know the exact number of iterations of this loop, try to
3765 not break code with undefined behavior by not recording smaller
3766 maximum number of iterations. */
3767 if (loop->nb_iterations
3768 && TREE_CODE (loop->nb_iterations) == INTEGER_CST)
3770 loop->any_upper_bound = true;
3771 loop->nb_iterations_upper_bound = wi::to_widest (loop->nb_iterations);
3775 /* Sets NIT to the estimated number of executions of the latch of the
3776 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
3777 large as the number of iterations. If we have no reliable estimate,
3778 the function returns false, otherwise returns true. */
3780 bool
3781 estimated_loop_iterations (struct loop *loop, widest_int *nit)
3783 /* When SCEV information is available, try to update loop iterations
3784 estimate. Otherwise just return whatever we recorded earlier. */
3785 if (scev_initialized_p ())
3786 estimate_numbers_of_iterations_loop (loop);
3788 return (get_estimated_loop_iterations (loop, nit));
3791 /* Similar to estimated_loop_iterations, but returns the estimate only
3792 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3793 on the number of iterations of LOOP could not be derived, returns -1. */
3795 HOST_WIDE_INT
3796 estimated_loop_iterations_int (struct loop *loop)
3798 widest_int nit;
3799 HOST_WIDE_INT hwi_nit;
3801 if (!estimated_loop_iterations (loop, &nit))
3802 return -1;
3804 if (!wi::fits_shwi_p (nit))
3805 return -1;
3806 hwi_nit = nit.to_shwi ();
3808 return hwi_nit < 0 ? -1 : hwi_nit;
3812 /* Sets NIT to an upper bound for the maximum number of executions of the
3813 latch of the LOOP. If we have no reliable estimate, the function returns
3814 false, otherwise returns true. */
3816 bool
3817 max_loop_iterations (struct loop *loop, widest_int *nit)
3819 /* When SCEV information is available, try to update loop iterations
3820 estimate. Otherwise just return whatever we recorded earlier. */
3821 if (scev_initialized_p ())
3822 estimate_numbers_of_iterations_loop (loop);
3824 return get_max_loop_iterations (loop, nit);
3827 /* Similar to max_loop_iterations, but returns the estimate only
3828 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3829 on the number of iterations of LOOP could not be derived, returns -1. */
3831 HOST_WIDE_INT
3832 max_loop_iterations_int (struct loop *loop)
3834 widest_int nit;
3835 HOST_WIDE_INT hwi_nit;
3837 if (!max_loop_iterations (loop, &nit))
3838 return -1;
3840 if (!wi::fits_shwi_p (nit))
3841 return -1;
3842 hwi_nit = nit.to_shwi ();
3844 return hwi_nit < 0 ? -1 : hwi_nit;
3847 /* Returns an estimate for the number of executions of statements
3848 in the LOOP. For statements before the loop exit, this exceeds
3849 the number of execution of the latch by one. */
3851 HOST_WIDE_INT
3852 estimated_stmt_executions_int (struct loop *loop)
3854 HOST_WIDE_INT nit = estimated_loop_iterations_int (loop);
3855 HOST_WIDE_INT snit;
3857 if (nit == -1)
3858 return -1;
3860 snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1);
3862 /* If the computation overflows, return -1. */
3863 return snit < 0 ? -1 : snit;
3866 /* Sets NIT to the estimated maximum number of executions of the latch of the
3867 LOOP, plus one. If we have no reliable estimate, the function returns
3868 false, otherwise returns true. */
3870 bool
3871 max_stmt_executions (struct loop *loop, widest_int *nit)
3873 widest_int nit_minus_one;
3875 if (!max_loop_iterations (loop, nit))
3876 return false;
3878 nit_minus_one = *nit;
3880 *nit += 1;
3882 return wi::gtu_p (*nit, nit_minus_one);
3885 /* Sets NIT to the estimated number of executions of the latch of the
3886 LOOP, plus one. If we have no reliable estimate, the function returns
3887 false, otherwise returns true. */
3889 bool
3890 estimated_stmt_executions (struct loop *loop, widest_int *nit)
3892 widest_int nit_minus_one;
3894 if (!estimated_loop_iterations (loop, nit))
3895 return false;
3897 nit_minus_one = *nit;
3899 *nit += 1;
3901 return wi::gtu_p (*nit, nit_minus_one);
3904 /* Records estimates on numbers of iterations of loops. */
3906 void
3907 estimate_numbers_of_iterations (void)
3909 struct loop *loop;
3911 /* We don't want to issue signed overflow warnings while getting
3912 loop iteration estimates. */
3913 fold_defer_overflow_warnings ();
3915 FOR_EACH_LOOP (loop, 0)
3917 estimate_numbers_of_iterations_loop (loop);
3920 fold_undefer_and_ignore_overflow_warnings ();
3923 /* Returns true if statement S1 dominates statement S2. */
3925 bool
3926 stmt_dominates_stmt_p (gimple *s1, gimple *s2)
3928 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
3930 if (!bb1
3931 || s1 == s2)
3932 return true;
3934 if (bb1 == bb2)
3936 gimple_stmt_iterator bsi;
3938 if (gimple_code (s2) == GIMPLE_PHI)
3939 return false;
3941 if (gimple_code (s1) == GIMPLE_PHI)
3942 return true;
3944 for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
3945 if (gsi_stmt (bsi) == s1)
3946 return true;
3948 return false;
3951 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
3954 /* Returns true when we can prove that the number of executions of
3955 STMT in the loop is at most NITER, according to the bound on
3956 the number of executions of the statement NITER_BOUND->stmt recorded in
3957 NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
3959 ??? This code can become quite a CPU hog - we can have many bounds,
3960 and large basic block forcing stmt_dominates_stmt_p to be queried
3961 many times on a large basic blocks, so the whole thing is O(n^2)
3962 for scev_probably_wraps_p invocation (that can be done n times).
3964 It would make more sense (and give better answers) to remember BB
3965 bounds computed by discover_iteration_bound_by_body_walk. */
3967 static bool
3968 n_of_executions_at_most (gimple *stmt,
3969 struct nb_iter_bound *niter_bound,
3970 tree niter)
3972 widest_int bound = niter_bound->bound;
3973 tree nit_type = TREE_TYPE (niter), e;
3974 enum tree_code cmp;
3976 gcc_assert (TYPE_UNSIGNED (nit_type));
3978 /* If the bound does not even fit into NIT_TYPE, it cannot tell us that
3979 the number of iterations is small. */
3980 if (!wi::fits_to_tree_p (bound, nit_type))
3981 return false;
3983 /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
3984 times. This means that:
3986 -- if NITER_BOUND->is_exit is true, then everything after
3987 it at most NITER_BOUND->bound times.
3989 -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT
3990 is executed, then NITER_BOUND->stmt is executed as well in the same
3991 iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
3993 If we can determine that NITER_BOUND->stmt is always executed
3994 after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
3995 We conclude that if both statements belong to the same
3996 basic block and STMT is before NITER_BOUND->stmt and there are no
3997 statements with side effects in between. */
3999 if (niter_bound->is_exit)
4001 if (stmt == niter_bound->stmt
4002 || !stmt_dominates_stmt_p (niter_bound->stmt, stmt))
4003 return false;
4004 cmp = GE_EXPR;
4006 else
4008 if (!stmt_dominates_stmt_p (niter_bound->stmt, stmt))
4010 gimple_stmt_iterator bsi;
4011 if (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
4012 || gimple_code (stmt) == GIMPLE_PHI
4013 || gimple_code (niter_bound->stmt) == GIMPLE_PHI)
4014 return false;
4016 /* By stmt_dominates_stmt_p we already know that STMT appears
4017 before NITER_BOUND->STMT. Still need to test that the loop
4018 can not be terinated by a side effect in between. */
4019 for (bsi = gsi_for_stmt (stmt); gsi_stmt (bsi) != niter_bound->stmt;
4020 gsi_next (&bsi))
4021 if (gimple_has_side_effects (gsi_stmt (bsi)))
4022 return false;
4023 bound += 1;
4024 if (bound == 0
4025 || !wi::fits_to_tree_p (bound, nit_type))
4026 return false;
4028 cmp = GT_EXPR;
4031 e = fold_binary (cmp, boolean_type_node,
4032 niter, wide_int_to_tree (nit_type, bound));
4033 return e && integer_nonzerop (e);
4036 /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
4038 bool
4039 nowrap_type_p (tree type)
4041 if (INTEGRAL_TYPE_P (type)
4042 && TYPE_OVERFLOW_UNDEFINED (type))
4043 return true;
4045 if (POINTER_TYPE_P (type))
4046 return true;
4048 return false;
4051 /* Return true if we can prove LOOP is exited before evolution of induction
4052 variabled {BASE, STEP} overflows with respect to its type bound. */
4054 static bool
4055 loop_exits_before_overflow (tree base, tree step,
4056 gimple *at_stmt, struct loop *loop)
4058 widest_int niter;
4059 struct control_iv *civ;
4060 struct nb_iter_bound *bound;
4061 tree e, delta, step_abs, unsigned_base;
4062 tree type = TREE_TYPE (step);
4063 tree unsigned_type, valid_niter;
4065 /* Don't issue signed overflow warnings. */
4066 fold_defer_overflow_warnings ();
4068 /* Compute the number of iterations before we reach the bound of the
4069 type, and verify that the loop is exited before this occurs. */
4070 unsigned_type = unsigned_type_for (type);
4071 unsigned_base = fold_convert (unsigned_type, base);
4073 if (tree_int_cst_sign_bit (step))
4075 tree extreme = fold_convert (unsigned_type,
4076 lower_bound_in_type (type, type));
4077 delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
4078 step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
4079 fold_convert (unsigned_type, step));
4081 else
4083 tree extreme = fold_convert (unsigned_type,
4084 upper_bound_in_type (type, type));
4085 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
4086 step_abs = fold_convert (unsigned_type, step);
4089 valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
4091 estimate_numbers_of_iterations_loop (loop);
4093 if (max_loop_iterations (loop, &niter)
4094 && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
4095 && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
4096 wide_int_to_tree (TREE_TYPE (valid_niter),
4097 niter))) != NULL
4098 && integer_nonzerop (e))
4100 fold_undefer_and_ignore_overflow_warnings ();
4101 return true;
4103 if (at_stmt)
4104 for (bound = loop->bounds; bound; bound = bound->next)
4106 if (n_of_executions_at_most (at_stmt, bound, valid_niter))
4108 fold_undefer_and_ignore_overflow_warnings ();
4109 return true;
4112 fold_undefer_and_ignore_overflow_warnings ();
4114 /* Try to prove loop is exited before {base, step} overflows with the
4115 help of analyzed loop control IV. This is done only for IVs with
4116 constant step because otherwise we don't have the information. */
4117 if (TREE_CODE (step) == INTEGER_CST)
4119 tree stop = (TREE_CODE (base) == SSA_NAME) ? base : NULL;
4121 for (civ = loop->control_ivs; civ; civ = civ->next)
4123 enum tree_code code;
4124 tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
4126 /* Have to consider type difference because operand_equal_p ignores
4127 that for constants. */
4128 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
4129 || element_precision (type) != element_precision (civ_type))
4130 continue;
4132 /* Only consider control IV with same step. */
4133 if (!operand_equal_p (step, civ->step, 0))
4134 continue;
4136 /* Done proving if this is a no-overflow control IV. */
4137 if (operand_equal_p (base, civ->base, 0))
4138 return true;
4140 /* If this is a before stepping control IV, in other words, we have
4142 {civ_base, step} = {base + step, step}
4144 Because civ {base + step, step} doesn't overflow during loop
4145 iterations, {base, step} will not overflow if we can prove the
4146 operation "base + step" does not overflow. Specifically, we try
4147 to prove below conditions are satisfied:
4149 base <= UPPER_BOUND (type) - step ;;step > 0
4150 base >= LOWER_BOUND (type) - step ;;step < 0
4152 by proving the reverse conditions are false using loop's initial
4153 condition. */
4154 if (POINTER_TYPE_P (TREE_TYPE (base)))
4155 code = POINTER_PLUS_EXPR;
4156 else
4157 code = PLUS_EXPR;
4159 stepped = fold_build2 (code, TREE_TYPE (base), base, step);
4160 if (operand_equal_p (stepped, civ->base, 0))
4162 if (tree_int_cst_sign_bit (step))
4164 code = LT_EXPR;
4165 extreme = lower_bound_in_type (type, type);
4167 else
4169 code = GT_EXPR;
4170 extreme = upper_bound_in_type (type, type);
4172 extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
4173 e = fold_build2 (code, boolean_type_node, base, extreme);
4174 e = simplify_using_initial_conditions (loop, e, stop);
4175 if (integer_zerop (e))
4176 return true;
4181 return false;
4184 /* Return false only when the induction variable BASE + STEP * I is
4185 known to not overflow: i.e. when the number of iterations is small
4186 enough with respect to the step and initial condition in order to
4187 keep the evolution confined in TYPEs bounds. Return true when the
4188 iv is known to overflow or when the property is not computable.
4190 USE_OVERFLOW_SEMANTICS is true if this function should assume that
4191 the rules for overflow of the given language apply (e.g., that signed
4192 arithmetics in C does not overflow). */
4194 bool
4195 scev_probably_wraps_p (tree base, tree step,
4196 gimple *at_stmt, struct loop *loop,
4197 bool use_overflow_semantics)
4199 /* FIXME: We really need something like
4200 http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
4202 We used to test for the following situation that frequently appears
4203 during address arithmetics:
4205 D.1621_13 = (long unsigned intD.4) D.1620_12;
4206 D.1622_14 = D.1621_13 * 8;
4207 D.1623_15 = (doubleD.29 *) D.1622_14;
4209 And derived that the sequence corresponding to D_14
4210 can be proved to not wrap because it is used for computing a
4211 memory access; however, this is not really the case -- for example,
4212 if D_12 = (unsigned char) [254,+,1], then D_14 has values
4213 2032, 2040, 0, 8, ..., but the code is still legal. */
4215 if (chrec_contains_undetermined (base)
4216 || chrec_contains_undetermined (step))
4217 return true;
4219 if (integer_zerop (step))
4220 return false;
4222 /* If we can use the fact that signed and pointer arithmetics does not
4223 wrap, we are done. */
4224 if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base)))
4225 return false;
4227 /* To be able to use estimates on number of iterations of the loop,
4228 we must have an upper bound on the absolute value of the step. */
4229 if (TREE_CODE (step) != INTEGER_CST)
4230 return true;
4232 if (loop_exits_before_overflow (base, step, at_stmt, loop))
4233 return false;
4235 /* At this point we still don't have a proof that the iv does not
4236 overflow: give up. */
4237 return true;
4240 /* Frees the information on upper bounds on numbers of iterations of LOOP. */
4242 void
4243 free_numbers_of_iterations_estimates_loop (struct loop *loop)
4245 struct control_iv *civ;
4246 struct nb_iter_bound *bound;
4248 loop->nb_iterations = NULL;
4249 loop->estimate_state = EST_NOT_COMPUTED;
4250 for (bound = loop->bounds; bound;)
4252 struct nb_iter_bound *next = bound->next;
4253 ggc_free (bound);
4254 bound = next;
4256 loop->bounds = NULL;
4258 for (civ = loop->control_ivs; civ;)
4260 struct control_iv *next = civ->next;
4261 ggc_free (civ);
4262 civ = next;
4264 loop->control_ivs = NULL;
4267 /* Frees the information on upper bounds on numbers of iterations of loops. */
4269 void
4270 free_numbers_of_iterations_estimates (void)
4272 struct loop *loop;
4274 FOR_EACH_LOOP (loop, 0)
4276 free_numbers_of_iterations_estimates_loop (loop);
4280 /* Substitute value VAL for ssa name NAME inside expressions held
4281 at LOOP. */
4283 void
4284 substitute_in_loop_info (struct loop *loop, tree name, tree val)
4286 loop->nb_iterations = simplify_replace_tree (loop->nb_iterations, name, val);