PR tree-optimization/34114
[official-gcc.git] / gcc / tree-ssa-loop-niter.c
blobc740ffa9d6a88a9454094d2369393e616d547d35
1 /* Functions to determine/estimate number of iterations of a loop.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "diagnostic-core.h"
31 #include "stor-layout.h"
32 #include "fold-const.h"
33 #include "calls.h"
34 #include "intl.h"
35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "tree-cfg.h"
38 #include "tree-ssa-loop-ivopts.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop.h"
41 #include "cfgloop.h"
42 #include "tree-chrec.h"
43 #include "tree-scalar-evolution.h"
44 #include "params.h"
47 /* The maximum number of dominator BBs we search for conditions
48 of loop header copies we use for simplifying a conditional
49 expression. */
50 #define MAX_DOMINATORS_TO_WALK 8
54 Analysis of number of iterations of an affine exit test.
58 /* Bounds on some value, BELOW <= X <= UP. */
60 struct bounds
62 mpz_t below, up;
66 /* Splits expression EXPR to a variable part VAR and constant OFFSET. */
68 static void
69 split_to_var_and_offset (tree expr, tree *var, mpz_t offset)
71 tree type = TREE_TYPE (expr);
72 tree op0, op1;
73 bool negate = false;
75 *var = expr;
76 mpz_set_ui (offset, 0);
78 switch (TREE_CODE (expr))
80 case MINUS_EXPR:
81 negate = true;
82 /* Fallthru. */
84 case PLUS_EXPR:
85 case POINTER_PLUS_EXPR:
86 op0 = TREE_OPERAND (expr, 0);
87 op1 = TREE_OPERAND (expr, 1);
89 if (TREE_CODE (op1) != INTEGER_CST)
90 break;
92 *var = op0;
93 /* Always sign extend the offset. */
94 wi::to_mpz (op1, offset, SIGNED);
95 if (negate)
96 mpz_neg (offset, offset);
97 break;
99 case INTEGER_CST:
100 *var = build_int_cst_type (type, 0);
101 wi::to_mpz (expr, offset, TYPE_SIGN (type));
102 break;
104 default:
105 break;
109 /* From condition C0 CMP C1 derives information regarding the value range
110 of VAR, which is of TYPE. Results are stored in to BELOW and UP. */
112 static void
113 refine_value_range_using_guard (tree type, tree var,
114 tree c0, enum tree_code cmp, tree c1,
115 mpz_t below, mpz_t up)
117 tree varc0, varc1, ctype;
118 mpz_t offc0, offc1;
119 mpz_t mint, maxt, minc1, maxc1;
120 wide_int minv, maxv;
121 bool no_wrap = nowrap_type_p (type);
122 bool c0_ok, c1_ok;
123 signop sgn = TYPE_SIGN (type);
125 switch (cmp)
127 case LT_EXPR:
128 case LE_EXPR:
129 case GT_EXPR:
130 case GE_EXPR:
131 STRIP_SIGN_NOPS (c0);
132 STRIP_SIGN_NOPS (c1);
133 ctype = TREE_TYPE (c0);
134 if (!useless_type_conversion_p (ctype, type))
135 return;
137 break;
139 case EQ_EXPR:
140 /* We could derive quite precise information from EQ_EXPR, however,
141 such a guard is unlikely to appear, so we do not bother with
142 handling it. */
143 return;
145 case NE_EXPR:
146 /* NE_EXPR comparisons do not contain much of useful information,
147 except for cases of comparing with bounds. */
148 if (TREE_CODE (c1) != INTEGER_CST
149 || !INTEGRAL_TYPE_P (type))
150 return;
152 /* Ensure that the condition speaks about an expression in the same
153 type as X and Y. */
154 ctype = TREE_TYPE (c0);
155 if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
156 return;
157 c0 = fold_convert (type, c0);
158 c1 = fold_convert (type, c1);
160 if (operand_equal_p (var, c0, 0))
162 mpz_t valc1;
164 /* Case of comparing VAR with its below/up bounds. */
165 mpz_init (valc1);
166 wi::to_mpz (c1, valc1, TYPE_SIGN (type));
167 if (mpz_cmp (valc1, below) == 0)
168 cmp = GT_EXPR;
169 if (mpz_cmp (valc1, up) == 0)
170 cmp = LT_EXPR;
172 mpz_clear (valc1);
174 else
176 /* Case of comparing with the bounds of the type. */
177 wide_int min = wi::min_value (type);
178 wide_int max = wi::max_value (type);
180 if (wi::eq_p (c1, min))
181 cmp = GT_EXPR;
182 if (wi::eq_p (c1, max))
183 cmp = LT_EXPR;
186 /* Quick return if no useful information. */
187 if (cmp == NE_EXPR)
188 return;
190 break;
192 default:
193 return;
196 mpz_init (offc0);
197 mpz_init (offc1);
198 split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
199 split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
201 /* We are only interested in comparisons of expressions based on VAR. */
202 if (operand_equal_p (var, varc1, 0))
204 std::swap (varc0, varc1);
205 mpz_swap (offc0, offc1);
206 cmp = swap_tree_comparison (cmp);
208 else if (!operand_equal_p (var, varc0, 0))
210 mpz_clear (offc0);
211 mpz_clear (offc1);
212 return;
215 mpz_init (mint);
216 mpz_init (maxt);
217 get_type_static_bounds (type, mint, maxt);
218 mpz_init (minc1);
219 mpz_init (maxc1);
220 /* Setup range information for varc1. */
221 if (integer_zerop (varc1))
223 wi::to_mpz (integer_zero_node, minc1, TYPE_SIGN (type));
224 wi::to_mpz (integer_zero_node, maxc1, TYPE_SIGN (type));
226 else if (TREE_CODE (varc1) == SSA_NAME
227 && INTEGRAL_TYPE_P (type)
228 && get_range_info (varc1, &minv, &maxv) == VR_RANGE)
230 gcc_assert (wi::le_p (minv, maxv, sgn));
231 wi::to_mpz (minv, minc1, sgn);
232 wi::to_mpz (maxv, maxc1, sgn);
234 else
236 mpz_set (minc1, mint);
237 mpz_set (maxc1, maxt);
240 /* Compute valid range information for varc1 + offc1. Note nothing
241 useful can be derived if it overflows or underflows. Overflow or
242 underflow could happen when:
244 offc1 > 0 && varc1 + offc1 > MAX_VAL (type)
245 offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */
246 mpz_add (minc1, minc1, offc1);
247 mpz_add (maxc1, maxc1, offc1);
248 c1_ok = (no_wrap
249 || mpz_sgn (offc1) == 0
250 || (mpz_sgn (offc1) < 0 && mpz_cmp (minc1, mint) >= 0)
251 || (mpz_sgn (offc1) > 0 && mpz_cmp (maxc1, maxt) <= 0));
252 if (!c1_ok)
253 goto end;
255 if (mpz_cmp (minc1, mint) < 0)
256 mpz_set (minc1, mint);
257 if (mpz_cmp (maxc1, maxt) > 0)
258 mpz_set (maxc1, maxt);
260 if (cmp == LT_EXPR)
262 cmp = LE_EXPR;
263 mpz_sub_ui (maxc1, maxc1, 1);
265 if (cmp == GT_EXPR)
267 cmp = GE_EXPR;
268 mpz_add_ui (minc1, minc1, 1);
271 /* Compute range information for varc0. If there is no overflow,
272 the condition implied that
274 (varc0) cmp (varc1 + offc1 - offc0)
276 We can possibly improve the upper bound of varc0 if cmp is LE_EXPR,
277 or the below bound if cmp is GE_EXPR.
279 To prove there is no overflow/underflow, we need to check below
280 four cases:
281 1) cmp == LE_EXPR && offc0 > 0
283 (varc0 + offc0) doesn't overflow
284 && (varc1 + offc1 - offc0) doesn't underflow
286 2) cmp == LE_EXPR && offc0 < 0
288 (varc0 + offc0) doesn't underflow
289 && (varc1 + offc1 - offc0) doesn't overfloe
291 In this case, (varc0 + offc0) will never underflow if we can
292 prove (varc1 + offc1 - offc0) doesn't overflow.
294 3) cmp == GE_EXPR && offc0 < 0
296 (varc0 + offc0) doesn't underflow
297 && (varc1 + offc1 - offc0) doesn't overflow
299 4) cmp == GE_EXPR && offc0 > 0
301 (varc0 + offc0) doesn't overflow
302 && (varc1 + offc1 - offc0) doesn't underflow
304 In this case, (varc0 + offc0) will never overflow if we can
305 prove (varc1 + offc1 - offc0) doesn't underflow.
307 Note we only handle case 2 and 4 in below code. */
309 mpz_sub (minc1, minc1, offc0);
310 mpz_sub (maxc1, maxc1, offc0);
311 c0_ok = (no_wrap
312 || mpz_sgn (offc0) == 0
313 || (cmp == LE_EXPR
314 && mpz_sgn (offc0) < 0 && mpz_cmp (maxc1, maxt) <= 0)
315 || (cmp == GE_EXPR
316 && mpz_sgn (offc0) > 0 && mpz_cmp (minc1, mint) >= 0));
317 if (!c0_ok)
318 goto end;
320 if (cmp == LE_EXPR)
322 if (mpz_cmp (up, maxc1) > 0)
323 mpz_set (up, maxc1);
325 else
327 if (mpz_cmp (below, minc1) < 0)
328 mpz_set (below, minc1);
331 end:
332 mpz_clear (mint);
333 mpz_clear (maxt);
334 mpz_clear (minc1);
335 mpz_clear (maxc1);
336 mpz_clear (offc0);
337 mpz_clear (offc1);
340 /* Stores estimate on the minimum/maximum value of the expression VAR + OFF
341 in TYPE to MIN and MAX. */
343 static void
344 determine_value_range (struct loop *loop, tree type, tree var, mpz_t off,
345 mpz_t min, mpz_t max)
347 int cnt = 0;
348 mpz_t minm, maxm;
349 basic_block bb;
350 wide_int minv, maxv;
351 enum value_range_type rtype = VR_VARYING;
353 /* If the expression is a constant, we know its value exactly. */
354 if (integer_zerop (var))
356 mpz_set (min, off);
357 mpz_set (max, off);
358 return;
361 get_type_static_bounds (type, min, max);
363 /* See if we have some range info from VRP. */
364 if (TREE_CODE (var) == SSA_NAME && INTEGRAL_TYPE_P (type))
366 edge e = loop_preheader_edge (loop);
367 signop sgn = TYPE_SIGN (type);
368 gphi_iterator gsi;
370 /* Either for VAR itself... */
371 rtype = get_range_info (var, &minv, &maxv);
372 /* Or for PHI results in loop->header where VAR is used as
373 PHI argument from the loop preheader edge. */
374 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
376 gphi *phi = gsi.phi ();
377 wide_int minc, maxc;
378 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == var
379 && (get_range_info (gimple_phi_result (phi), &minc, &maxc)
380 == VR_RANGE))
382 if (rtype != VR_RANGE)
384 rtype = VR_RANGE;
385 minv = minc;
386 maxv = maxc;
388 else
390 minv = wi::max (minv, minc, sgn);
391 maxv = wi::min (maxv, maxc, sgn);
392 /* If the PHI result range are inconsistent with
393 the VAR range, give up on looking at the PHI
394 results. This can happen if VR_UNDEFINED is
395 involved. */
396 if (wi::gt_p (minv, maxv, sgn))
398 rtype = get_range_info (var, &minv, &maxv);
399 break;
404 mpz_init (minm);
405 mpz_init (maxm);
406 if (rtype != VR_RANGE)
408 mpz_set (minm, min);
409 mpz_set (maxm, max);
411 else
413 gcc_assert (wi::le_p (minv, maxv, sgn));
414 wi::to_mpz (minv, minm, sgn);
415 wi::to_mpz (maxv, maxm, sgn);
417 /* Now walk the dominators of the loop header and use the entry
418 guards to refine the estimates. */
419 for (bb = loop->header;
420 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
421 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
423 edge e;
424 tree c0, c1;
425 gimple *cond;
426 enum tree_code cmp;
428 if (!single_pred_p (bb))
429 continue;
430 e = single_pred_edge (bb);
432 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
433 continue;
435 cond = last_stmt (e->src);
436 c0 = gimple_cond_lhs (cond);
437 cmp = gimple_cond_code (cond);
438 c1 = gimple_cond_rhs (cond);
440 if (e->flags & EDGE_FALSE_VALUE)
441 cmp = invert_tree_comparison (cmp, false);
443 refine_value_range_using_guard (type, var, c0, cmp, c1, minm, maxm);
444 ++cnt;
447 mpz_add (minm, minm, off);
448 mpz_add (maxm, maxm, off);
449 /* If the computation may not wrap or off is zero, then this
450 is always fine. If off is negative and minv + off isn't
451 smaller than type's minimum, or off is positive and
452 maxv + off isn't bigger than type's maximum, use the more
453 precise range too. */
454 if (nowrap_type_p (type)
455 || mpz_sgn (off) == 0
456 || (mpz_sgn (off) < 0 && mpz_cmp (minm, min) >= 0)
457 || (mpz_sgn (off) > 0 && mpz_cmp (maxm, max) <= 0))
459 mpz_set (min, minm);
460 mpz_set (max, maxm);
461 mpz_clear (minm);
462 mpz_clear (maxm);
463 return;
465 mpz_clear (minm);
466 mpz_clear (maxm);
469 /* If the computation may wrap, we know nothing about the value, except for
470 the range of the type. */
471 if (!nowrap_type_p (type))
472 return;
474 /* Since the addition of OFF does not wrap, if OFF is positive, then we may
475 add it to MIN, otherwise to MAX. */
476 if (mpz_sgn (off) < 0)
477 mpz_add (max, max, off);
478 else
479 mpz_add (min, min, off);
482 /* Stores the bounds on the difference of the values of the expressions
483 (var + X) and (var + Y), computed in TYPE, to BNDS. */
485 static void
486 bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y,
487 bounds *bnds)
489 int rel = mpz_cmp (x, y);
490 bool may_wrap = !nowrap_type_p (type);
491 mpz_t m;
493 /* If X == Y, then the expressions are always equal.
494 If X > Y, there are the following possibilities:
495 a) neither of var + X and var + Y overflow or underflow, or both of
496 them do. Then their difference is X - Y.
497 b) var + X overflows, and var + Y does not. Then the values of the
498 expressions are var + X - M and var + Y, where M is the range of
499 the type, and their difference is X - Y - M.
500 c) var + Y underflows and var + X does not. Their difference again
501 is M - X + Y.
502 Therefore, if the arithmetics in type does not overflow, then the
503 bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y)
504 Similarly, if X < Y, the bounds are either (X - Y, X - Y) or
505 (X - Y, X - Y + M). */
507 if (rel == 0)
509 mpz_set_ui (bnds->below, 0);
510 mpz_set_ui (bnds->up, 0);
511 return;
514 mpz_init (m);
515 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED);
516 mpz_add_ui (m, m, 1);
517 mpz_sub (bnds->up, x, y);
518 mpz_set (bnds->below, bnds->up);
520 if (may_wrap)
522 if (rel > 0)
523 mpz_sub (bnds->below, bnds->below, m);
524 else
525 mpz_add (bnds->up, bnds->up, m);
528 mpz_clear (m);
531 /* From condition C0 CMP C1 derives information regarding the
532 difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE,
533 and stores it to BNDS. */
535 static void
536 refine_bounds_using_guard (tree type, tree varx, mpz_t offx,
537 tree vary, mpz_t offy,
538 tree c0, enum tree_code cmp, tree c1,
539 bounds *bnds)
541 tree varc0, varc1, ctype;
542 mpz_t offc0, offc1, loffx, loffy, bnd;
543 bool lbound = false;
544 bool no_wrap = nowrap_type_p (type);
545 bool x_ok, y_ok;
547 switch (cmp)
549 case LT_EXPR:
550 case LE_EXPR:
551 case GT_EXPR:
552 case GE_EXPR:
553 STRIP_SIGN_NOPS (c0);
554 STRIP_SIGN_NOPS (c1);
555 ctype = TREE_TYPE (c0);
556 if (!useless_type_conversion_p (ctype, type))
557 return;
559 break;
561 case EQ_EXPR:
562 /* We could derive quite precise information from EQ_EXPR, however, such
563 a guard is unlikely to appear, so we do not bother with handling
564 it. */
565 return;
567 case NE_EXPR:
568 /* NE_EXPR comparisons do not contain much of useful information, except for
569 special case of comparing with the bounds of the type. */
570 if (TREE_CODE (c1) != INTEGER_CST
571 || !INTEGRAL_TYPE_P (type))
572 return;
574 /* Ensure that the condition speaks about an expression in the same type
575 as X and Y. */
576 ctype = TREE_TYPE (c0);
577 if (TYPE_PRECISION (ctype) != TYPE_PRECISION (type))
578 return;
579 c0 = fold_convert (type, c0);
580 c1 = fold_convert (type, c1);
582 if (TYPE_MIN_VALUE (type)
583 && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
585 cmp = GT_EXPR;
586 break;
588 if (TYPE_MAX_VALUE (type)
589 && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
591 cmp = LT_EXPR;
592 break;
595 return;
596 default:
597 return;
600 mpz_init (offc0);
601 mpz_init (offc1);
602 split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
603 split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
605 /* We are only interested in comparisons of expressions based on VARX and
606 VARY. TODO -- we might also be able to derive some bounds from
607 expressions containing just one of the variables. */
609 if (operand_equal_p (varx, varc1, 0))
611 std::swap (varc0, varc1);
612 mpz_swap (offc0, offc1);
613 cmp = swap_tree_comparison (cmp);
616 if (!operand_equal_p (varx, varc0, 0)
617 || !operand_equal_p (vary, varc1, 0))
618 goto end;
620 mpz_init_set (loffx, offx);
621 mpz_init_set (loffy, offy);
623 if (cmp == GT_EXPR || cmp == GE_EXPR)
625 std::swap (varx, vary);
626 mpz_swap (offc0, offc1);
627 mpz_swap (loffx, loffy);
628 cmp = swap_tree_comparison (cmp);
629 lbound = true;
632 /* If there is no overflow, the condition implies that
634 (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0).
636 The overflows and underflows may complicate things a bit; each
637 overflow decreases the appropriate offset by M, and underflow
638 increases it by M. The above inequality would not necessarily be
639 true if
641 -- VARX + OFFX underflows and VARX + OFFC0 does not, or
642 VARX + OFFC0 overflows, but VARX + OFFX does not.
643 This may only happen if OFFX < OFFC0.
644 -- VARY + OFFY overflows and VARY + OFFC1 does not, or
645 VARY + OFFC1 underflows and VARY + OFFY does not.
646 This may only happen if OFFY > OFFC1. */
648 if (no_wrap)
650 x_ok = true;
651 y_ok = true;
653 else
655 x_ok = (integer_zerop (varx)
656 || mpz_cmp (loffx, offc0) >= 0);
657 y_ok = (integer_zerop (vary)
658 || mpz_cmp (loffy, offc1) <= 0);
661 if (x_ok && y_ok)
663 mpz_init (bnd);
664 mpz_sub (bnd, loffx, loffy);
665 mpz_add (bnd, bnd, offc1);
666 mpz_sub (bnd, bnd, offc0);
668 if (cmp == LT_EXPR)
669 mpz_sub_ui (bnd, bnd, 1);
671 if (lbound)
673 mpz_neg (bnd, bnd);
674 if (mpz_cmp (bnds->below, bnd) < 0)
675 mpz_set (bnds->below, bnd);
677 else
679 if (mpz_cmp (bnd, bnds->up) < 0)
680 mpz_set (bnds->up, bnd);
682 mpz_clear (bnd);
685 mpz_clear (loffx);
686 mpz_clear (loffy);
687 end:
688 mpz_clear (offc0);
689 mpz_clear (offc1);
692 /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS.
693 The subtraction is considered to be performed in arbitrary precision,
694 without overflows.
696 We do not attempt to be too clever regarding the value ranges of X and
697 Y; most of the time, they are just integers or ssa names offsetted by
698 integer. However, we try to use the information contained in the
699 comparisons before the loop (usually created by loop header copying). */
701 static void
702 bound_difference (struct loop *loop, tree x, tree y, bounds *bnds)
704 tree type = TREE_TYPE (x);
705 tree varx, vary;
706 mpz_t offx, offy;
707 mpz_t minx, maxx, miny, maxy;
708 int cnt = 0;
709 edge e;
710 basic_block bb;
711 tree c0, c1;
712 gimple *cond;
713 enum tree_code cmp;
715 /* Get rid of unnecessary casts, but preserve the value of
716 the expressions. */
717 STRIP_SIGN_NOPS (x);
718 STRIP_SIGN_NOPS (y);
720 mpz_init (bnds->below);
721 mpz_init (bnds->up);
722 mpz_init (offx);
723 mpz_init (offy);
724 split_to_var_and_offset (x, &varx, offx);
725 split_to_var_and_offset (y, &vary, offy);
727 if (!integer_zerop (varx)
728 && operand_equal_p (varx, vary, 0))
730 /* Special case VARX == VARY -- we just need to compare the
731 offsets. The matters are a bit more complicated in the
732 case addition of offsets may wrap. */
733 bound_difference_of_offsetted_base (type, offx, offy, bnds);
735 else
737 /* Otherwise, use the value ranges to determine the initial
738 estimates on below and up. */
739 mpz_init (minx);
740 mpz_init (maxx);
741 mpz_init (miny);
742 mpz_init (maxy);
743 determine_value_range (loop, type, varx, offx, minx, maxx);
744 determine_value_range (loop, type, vary, offy, miny, maxy);
746 mpz_sub (bnds->below, minx, maxy);
747 mpz_sub (bnds->up, maxx, miny);
748 mpz_clear (minx);
749 mpz_clear (maxx);
750 mpz_clear (miny);
751 mpz_clear (maxy);
754 /* If both X and Y are constants, we cannot get any more precise. */
755 if (integer_zerop (varx) && integer_zerop (vary))
756 goto end;
758 /* Now walk the dominators of the loop header and use the entry
759 guards to refine the estimates. */
760 for (bb = loop->header;
761 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
762 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
764 if (!single_pred_p (bb))
765 continue;
766 e = single_pred_edge (bb);
768 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
769 continue;
771 cond = last_stmt (e->src);
772 c0 = gimple_cond_lhs (cond);
773 cmp = gimple_cond_code (cond);
774 c1 = gimple_cond_rhs (cond);
776 if (e->flags & EDGE_FALSE_VALUE)
777 cmp = invert_tree_comparison (cmp, false);
779 refine_bounds_using_guard (type, varx, offx, vary, offy,
780 c0, cmp, c1, bnds);
781 ++cnt;
784 end:
785 mpz_clear (offx);
786 mpz_clear (offy);
789 /* Update the bounds in BNDS that restrict the value of X to the bounds
790 that restrict the value of X + DELTA. X can be obtained as a
791 difference of two values in TYPE. */
793 static void
794 bounds_add (bounds *bnds, const widest_int &delta, tree type)
796 mpz_t mdelta, max;
798 mpz_init (mdelta);
799 wi::to_mpz (delta, mdelta, SIGNED);
801 mpz_init (max);
802 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
804 mpz_add (bnds->up, bnds->up, mdelta);
805 mpz_add (bnds->below, bnds->below, mdelta);
807 if (mpz_cmp (bnds->up, max) > 0)
808 mpz_set (bnds->up, max);
810 mpz_neg (max, max);
811 if (mpz_cmp (bnds->below, max) < 0)
812 mpz_set (bnds->below, max);
814 mpz_clear (mdelta);
815 mpz_clear (max);
818 /* Update the bounds in BNDS that restrict the value of X to the bounds
819 that restrict the value of -X. */
821 static void
822 bounds_negate (bounds *bnds)
824 mpz_t tmp;
826 mpz_init_set (tmp, bnds->up);
827 mpz_neg (bnds->up, bnds->below);
828 mpz_neg (bnds->below, tmp);
829 mpz_clear (tmp);
832 /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */
834 static tree
835 inverse (tree x, tree mask)
837 tree type = TREE_TYPE (x);
838 tree rslt;
839 unsigned ctr = tree_floor_log2 (mask);
841 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT)
843 unsigned HOST_WIDE_INT ix;
844 unsigned HOST_WIDE_INT imask;
845 unsigned HOST_WIDE_INT irslt = 1;
847 gcc_assert (cst_and_fits_in_hwi (x));
848 gcc_assert (cst_and_fits_in_hwi (mask));
850 ix = int_cst_value (x);
851 imask = int_cst_value (mask);
853 for (; ctr; ctr--)
855 irslt *= ix;
856 ix *= ix;
858 irslt &= imask;
860 rslt = build_int_cst_type (type, irslt);
862 else
864 rslt = build_int_cst (type, 1);
865 for (; ctr; ctr--)
867 rslt = int_const_binop (MULT_EXPR, rslt, x);
868 x = int_const_binop (MULT_EXPR, x, x);
870 rslt = int_const_binop (BIT_AND_EXPR, rslt, mask);
873 return rslt;
876 /* Derives the upper bound BND on the number of executions of loop with exit
877 condition S * i <> C. If NO_OVERFLOW is true, then the control variable of
878 the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed
879 that the loop ends through this exit, i.e., the induction variable ever
880 reaches the value of C.
882 The value C is equal to final - base, where final and base are the final and
883 initial value of the actual induction variable in the analysed loop. BNDS
884 bounds the value of this difference when computed in signed type with
885 unbounded range, while the computation of C is performed in an unsigned
886 type with the range matching the range of the type of the induction variable.
887 In particular, BNDS.up contains an upper bound on C in the following cases:
888 -- if the iv must reach its final value without overflow, i.e., if
889 NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or
890 -- if final >= base, which we know to hold when BNDS.below >= 0. */
892 static void
893 number_of_iterations_ne_max (mpz_t bnd, bool no_overflow, tree c, tree s,
894 bounds *bnds, bool exit_must_be_taken)
896 widest_int max;
897 mpz_t d;
898 tree type = TREE_TYPE (c);
899 bool bnds_u_valid = ((no_overflow && exit_must_be_taken)
900 || mpz_sgn (bnds->below) >= 0);
902 if (integer_onep (s)
903 || (TREE_CODE (c) == INTEGER_CST
904 && TREE_CODE (s) == INTEGER_CST
905 && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0)
906 || (TYPE_OVERFLOW_UNDEFINED (type)
907 && multiple_of_p (type, c, s)))
909 /* If C is an exact multiple of S, then its value will be reached before
910 the induction variable overflows (unless the loop is exited in some
911 other way before). Note that the actual induction variable in the
912 loop (which ranges from base to final instead of from 0 to C) may
913 overflow, in which case BNDS.up will not be giving a correct upper
914 bound on C; thus, BNDS_U_VALID had to be computed in advance. */
915 no_overflow = true;
916 exit_must_be_taken = true;
919 /* If the induction variable can overflow, the number of iterations is at
920 most the period of the control variable (or infinite, but in that case
921 the whole # of iterations analysis will fail). */
922 if (!no_overflow)
924 max = wi::mask <widest_int> (TYPE_PRECISION (type) - wi::ctz (s), false);
925 wi::to_mpz (max, bnd, UNSIGNED);
926 return;
929 /* Now we know that the induction variable does not overflow, so the loop
930 iterates at most (range of type / S) times. */
931 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), bnd, UNSIGNED);
933 /* If the induction variable is guaranteed to reach the value of C before
934 overflow, ... */
935 if (exit_must_be_taken)
937 /* ... then we can strengthen this to C / S, and possibly we can use
938 the upper bound on C given by BNDS. */
939 if (TREE_CODE (c) == INTEGER_CST)
940 wi::to_mpz (c, bnd, UNSIGNED);
941 else if (bnds_u_valid)
942 mpz_set (bnd, bnds->up);
945 mpz_init (d);
946 wi::to_mpz (s, d, UNSIGNED);
947 mpz_fdiv_q (bnd, bnd, d);
948 mpz_clear (d);
951 /* Determines number of iterations of loop whose ending condition
952 is IV <> FINAL. TYPE is the type of the iv. The number of
953 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
954 we know that the exit must be taken eventually, i.e., that the IV
955 ever reaches the value FINAL (we derived this earlier, and possibly set
956 NITER->assumptions to make sure this is the case). BNDS contains the
957 bounds on the difference FINAL - IV->base. */
959 static bool
960 number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv,
961 tree final, struct tree_niter_desc *niter,
962 bool exit_must_be_taken, bounds *bnds)
964 tree niter_type = unsigned_type_for (type);
965 tree s, c, d, bits, assumption, tmp, bound;
966 mpz_t max;
968 niter->control = *iv;
969 niter->bound = final;
970 niter->cmp = NE_EXPR;
972 /* Rearrange the terms so that we get inequality S * i <> C, with S
973 positive. Also cast everything to the unsigned type. If IV does
974 not overflow, BNDS bounds the value of C. Also, this is the
975 case if the computation |FINAL - IV->base| does not overflow, i.e.,
976 if BNDS->below in the result is nonnegative. */
977 if (tree_int_cst_sign_bit (iv->step))
979 s = fold_convert (niter_type,
980 fold_build1 (NEGATE_EXPR, type, iv->step));
981 c = fold_build2 (MINUS_EXPR, niter_type,
982 fold_convert (niter_type, iv->base),
983 fold_convert (niter_type, final));
984 bounds_negate (bnds);
986 else
988 s = fold_convert (niter_type, iv->step);
989 c = fold_build2 (MINUS_EXPR, niter_type,
990 fold_convert (niter_type, final),
991 fold_convert (niter_type, iv->base));
994 mpz_init (max);
995 number_of_iterations_ne_max (max, iv->no_overflow, c, s, bnds,
996 exit_must_be_taken);
997 niter->max = widest_int::from (wi::from_mpz (niter_type, max, false),
998 TYPE_SIGN (niter_type));
999 mpz_clear (max);
1001 /* Compute no-overflow information for the control iv. This can be
1002 proven when below two conditions hold.
1004 1) |FINAL - base| is an exact multiple of step.
1005 2) IV evaluates toward FINAL at beginning, i.e:
1007 base <= FINAL ; step > 0
1008 base >= FINAL ; step < 0
1010 Note the first condition holds, the second can be then relaxed
1011 to below condition.
1013 base - step < FINAL ; step > 0
1014 && base - step doesn't underflow
1015 base - step > FINAL ; step < 0
1016 && base - step doesn't overflow
1018 The relaxation is important because after pass loop-ch, loop
1019 with exit condition (IV != FINAL) will usually be guarded by
1020 pre-condition (IV.base - IV.step != FINAL). Please refer to
1021 PR34114 as an example.
1023 Also note, for NE_EXPR, base equals to FINAL is a special case, in
1024 which the loop exits immediately, and the iv does not overflow. */
1025 if (!niter->control.no_overflow
1026 && (integer_onep (s) || multiple_of_p (type, c, s)))
1028 tree t, cond, relaxed_cond = boolean_false_node;
1030 if (tree_int_cst_sign_bit (iv->step))
1032 cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final);
1033 if (TREE_CODE (type) == INTEGER_TYPE)
1035 /* Only when base - step doesn't overflow. */
1036 t = TYPE_MAX_VALUE (type);
1037 t = fold_build2 (PLUS_EXPR, type, t, iv->step);
1038 t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base);
1039 if (integer_nonzerop (t))
1041 t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
1042 relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node,
1043 t, final);
1047 else
1049 cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final);
1050 if (TREE_CODE (type) == INTEGER_TYPE)
1052 /* Only when base - step doesn't underflow. */
1053 t = TYPE_MIN_VALUE (type);
1054 t = fold_build2 (PLUS_EXPR, type, t, iv->step);
1055 t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base);
1056 if (integer_nonzerop (t))
1058 t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
1059 relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node,
1060 t, final);
1065 t = simplify_using_initial_conditions (loop, cond);
1066 if (!t || !integer_onep (t))
1067 t = simplify_using_initial_conditions (loop, relaxed_cond);
1069 if (t && integer_onep (t))
1070 niter->control.no_overflow = true;
1073 /* First the trivial cases -- when the step is 1. */
1074 if (integer_onep (s))
1076 niter->niter = c;
1077 return true;
1079 if (niter->control.no_overflow && multiple_of_p (type, c, s))
1081 niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, c, s);
1082 return true;
1085 /* Let nsd (step, size of mode) = d. If d does not divide c, the loop
1086 is infinite. Otherwise, the number of iterations is
1087 (inverse(s/d) * (c/d)) mod (size of mode/d). */
1088 bits = num_ending_zeros (s);
1089 bound = build_low_bits_mask (niter_type,
1090 (TYPE_PRECISION (niter_type)
1091 - tree_to_uhwi (bits)));
1093 d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
1094 build_int_cst (niter_type, 1), bits);
1095 s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits);
1097 if (!exit_must_be_taken)
1099 /* If we cannot assume that the exit is taken eventually, record the
1100 assumptions for divisibility of c. */
1101 assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d);
1102 assumption = fold_build2 (EQ_EXPR, boolean_type_node,
1103 assumption, build_int_cst (niter_type, 0));
1104 if (!integer_nonzerop (assumption))
1105 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1106 niter->assumptions, assumption);
1109 c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d);
1110 tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound));
1111 niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
1112 return true;
1115 /* Checks whether we can determine the final value of the control variable
1116 of the loop with ending condition IV0 < IV1 (computed in TYPE).
1117 DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
1118 of the step. The assumptions necessary to ensure that the computation
1119 of the final value does not overflow are recorded in NITER. If we
1120 find the final value, we adjust DELTA and return TRUE. Otherwise
1121 we return false. BNDS bounds the value of IV1->base - IV0->base,
1122 and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is
1123 true if we know that the exit must be taken eventually. */
1125 static bool
1126 number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
1127 struct tree_niter_desc *niter,
1128 tree *delta, tree step,
1129 bool exit_must_be_taken, bounds *bnds)
1131 tree niter_type = TREE_TYPE (step);
1132 tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
1133 tree tmod;
1134 tree assumption = boolean_true_node, bound;
1135 tree type1 = (POINTER_TYPE_P (type)) ? sizetype : type;
1137 if (TREE_CODE (mod) != INTEGER_CST)
1138 return false;
1139 if (integer_nonzerop (mod))
1140 mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
1141 tmod = fold_convert (type1, mod);
1143 /* If the induction variable does not overflow and the exit is taken,
1144 then the computation of the final value does not overflow. There
1145 are three cases:
1146 1) The case if the new final value is equal to the current one.
1147 2) Induction varaible has pointer type, as the code cannot rely
1148 on the object to that the pointer points being placed at the
1149 end of the address space (and more pragmatically,
1150 TYPE_{MIN,MAX}_VALUE is not defined for pointers).
1151 3) EXIT_MUST_BE_TAKEN is true, note it implies that the induction
1152 variable does not overflow. */
1153 if (!integer_zerop (mod) && !POINTER_TYPE_P (type) && !exit_must_be_taken)
1155 if (integer_nonzerop (iv0->step))
1157 /* The final value of the iv is iv1->base + MOD, assuming
1158 that this computation does not overflow, and that
1159 iv0->base <= iv1->base + MOD. */
1160 bound = fold_build2 (MINUS_EXPR, type1,
1161 TYPE_MAX_VALUE (type1), tmod);
1162 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1163 iv1->base, bound);
1165 else
1167 /* The final value of the iv is iv0->base - MOD, assuming
1168 that this computation does not overflow, and that
1169 iv0->base - MOD <= iv1->base. */
1170 bound = fold_build2 (PLUS_EXPR, type1,
1171 TYPE_MIN_VALUE (type1), tmod);
1172 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1173 iv0->base, bound);
1175 if (integer_zerop (assumption))
1176 return false;
1177 else if (!integer_nonzerop (assumption))
1178 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1179 niter->assumptions, assumption);
1182 /* Since we are transforming LT to NE and DELTA is constant, there
1183 is no need to compute may_be_zero because this loop must roll. */
1185 bounds_add (bnds, wi::to_widest (mod), type);
1186 *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
1187 return true;
1190 /* Add assertions to NITER that ensure that the control variable of the loop
1191 with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1
1192 are TYPE. Returns false if we can prove that there is an overflow, true
1193 otherwise. STEP is the absolute value of the step. */
1195 static bool
1196 assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1197 struct tree_niter_desc *niter, tree step)
1199 tree bound, d, assumption, diff;
1200 tree niter_type = TREE_TYPE (step);
1202 if (integer_nonzerop (iv0->step))
1204 /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
1205 if (iv0->no_overflow)
1206 return true;
1208 /* If iv0->base is a constant, we can determine the last value before
1209 overflow precisely; otherwise we conservatively assume
1210 MAX - STEP + 1. */
1212 if (TREE_CODE (iv0->base) == INTEGER_CST)
1214 d = fold_build2 (MINUS_EXPR, niter_type,
1215 fold_convert (niter_type, TYPE_MAX_VALUE (type)),
1216 fold_convert (niter_type, iv0->base));
1217 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1219 else
1220 diff = fold_build2 (MINUS_EXPR, niter_type, step,
1221 build_int_cst (niter_type, 1));
1222 bound = fold_build2 (MINUS_EXPR, type,
1223 TYPE_MAX_VALUE (type), fold_convert (type, diff));
1224 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1225 iv1->base, bound);
1227 else
1229 /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
1230 if (iv1->no_overflow)
1231 return true;
1233 if (TREE_CODE (iv1->base) == INTEGER_CST)
1235 d = fold_build2 (MINUS_EXPR, niter_type,
1236 fold_convert (niter_type, iv1->base),
1237 fold_convert (niter_type, TYPE_MIN_VALUE (type)));
1238 diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
1240 else
1241 diff = fold_build2 (MINUS_EXPR, niter_type, step,
1242 build_int_cst (niter_type, 1));
1243 bound = fold_build2 (PLUS_EXPR, type,
1244 TYPE_MIN_VALUE (type), fold_convert (type, diff));
1245 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1246 iv0->base, bound);
1249 if (integer_zerop (assumption))
1250 return false;
1251 if (!integer_nonzerop (assumption))
1252 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1253 niter->assumptions, assumption);
1255 iv0->no_overflow = true;
1256 iv1->no_overflow = true;
1257 return true;
1260 /* Add an assumption to NITER that a loop whose ending condition
1261 is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS
1262 bounds the value of IV1->base - IV0->base. */
1264 static void
1265 assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
1266 struct tree_niter_desc *niter, bounds *bnds)
1268 tree assumption = boolean_true_node, bound, diff;
1269 tree mbz, mbzl, mbzr, type1;
1270 bool rolls_p, no_overflow_p;
1271 widest_int dstep;
1272 mpz_t mstep, max;
1274 /* We are going to compute the number of iterations as
1275 (iv1->base - iv0->base + step - 1) / step, computed in the unsigned
1276 variant of TYPE. This formula only works if
1278 -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1
1280 (where MAX is the maximum value of the unsigned variant of TYPE, and
1281 the computations in this formula are performed in full precision,
1282 i.e., without overflows).
1284 Usually, for loops with exit condition iv0->base + step * i < iv1->base,
1285 we have a condition of the form iv0->base - step < iv1->base before the loop,
1286 and for loops iv0->base < iv1->base - step * i the condition
1287 iv0->base < iv1->base + step, due to loop header copying, which enable us
1288 to prove the lower bound.
1290 The upper bound is more complicated. Unless the expressions for initial
1291 and final value themselves contain enough information, we usually cannot
1292 derive it from the context. */
1294 /* First check whether the answer does not follow from the bounds we gathered
1295 before. */
1296 if (integer_nonzerop (iv0->step))
1297 dstep = wi::to_widest (iv0->step);
1298 else
1300 dstep = wi::sext (wi::to_widest (iv1->step), TYPE_PRECISION (type));
1301 dstep = -dstep;
1304 mpz_init (mstep);
1305 wi::to_mpz (dstep, mstep, UNSIGNED);
1306 mpz_neg (mstep, mstep);
1307 mpz_add_ui (mstep, mstep, 1);
1309 rolls_p = mpz_cmp (mstep, bnds->below) <= 0;
1311 mpz_init (max);
1312 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), max, UNSIGNED);
1313 mpz_add (max, max, mstep);
1314 no_overflow_p = (mpz_cmp (bnds->up, max) <= 0
1315 /* For pointers, only values lying inside a single object
1316 can be compared or manipulated by pointer arithmetics.
1317 Gcc in general does not allow or handle objects larger
1318 than half of the address space, hence the upper bound
1319 is satisfied for pointers. */
1320 || POINTER_TYPE_P (type));
1321 mpz_clear (mstep);
1322 mpz_clear (max);
1324 if (rolls_p && no_overflow_p)
1325 return;
1327 type1 = type;
1328 if (POINTER_TYPE_P (type))
1329 type1 = sizetype;
1331 /* Now the hard part; we must formulate the assumption(s) as expressions, and
1332 we must be careful not to introduce overflow. */
1334 if (integer_nonzerop (iv0->step))
1336 diff = fold_build2 (MINUS_EXPR, type1,
1337 iv0->step, build_int_cst (type1, 1));
1339 /* We need to know that iv0->base >= MIN + iv0->step - 1. Since
1340 0 address never belongs to any object, we can assume this for
1341 pointers. */
1342 if (!POINTER_TYPE_P (type))
1344 bound = fold_build2 (PLUS_EXPR, type1,
1345 TYPE_MIN_VALUE (type), diff);
1346 assumption = fold_build2 (GE_EXPR, boolean_type_node,
1347 iv0->base, bound);
1350 /* And then we can compute iv0->base - diff, and compare it with
1351 iv1->base. */
1352 mbzl = fold_build2 (MINUS_EXPR, type1,
1353 fold_convert (type1, iv0->base), diff);
1354 mbzr = fold_convert (type1, iv1->base);
1356 else
1358 diff = fold_build2 (PLUS_EXPR, type1,
1359 iv1->step, build_int_cst (type1, 1));
1361 if (!POINTER_TYPE_P (type))
1363 bound = fold_build2 (PLUS_EXPR, type1,
1364 TYPE_MAX_VALUE (type), diff);
1365 assumption = fold_build2 (LE_EXPR, boolean_type_node,
1366 iv1->base, bound);
1369 mbzl = fold_convert (type1, iv0->base);
1370 mbzr = fold_build2 (MINUS_EXPR, type1,
1371 fold_convert (type1, iv1->base), diff);
1374 if (!integer_nonzerop (assumption))
1375 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1376 niter->assumptions, assumption);
1377 if (!rolls_p)
1379 mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr);
1380 niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1381 niter->may_be_zero, mbz);
1385 /* Determines number of iterations of loop whose ending condition
1386 is IV0 < IV1. TYPE is the type of the iv. The number of
1387 iterations is stored to NITER. BNDS bounds the difference
1388 IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know
1389 that the exit must be taken eventually. */
1391 static bool
1392 number_of_iterations_lt (struct loop *loop, tree type, affine_iv *iv0,
1393 affine_iv *iv1, struct tree_niter_desc *niter,
1394 bool exit_must_be_taken, bounds *bnds)
1396 tree niter_type = unsigned_type_for (type);
1397 tree delta, step, s;
1398 mpz_t mstep, tmp;
1400 if (integer_nonzerop (iv0->step))
1402 niter->control = *iv0;
1403 niter->cmp = LT_EXPR;
1404 niter->bound = iv1->base;
1406 else
1408 niter->control = *iv1;
1409 niter->cmp = GT_EXPR;
1410 niter->bound = iv0->base;
1413 delta = fold_build2 (MINUS_EXPR, niter_type,
1414 fold_convert (niter_type, iv1->base),
1415 fold_convert (niter_type, iv0->base));
1417 /* First handle the special case that the step is +-1. */
1418 if ((integer_onep (iv0->step) && integer_zerop (iv1->step))
1419 || (integer_all_onesp (iv1->step) && integer_zerop (iv0->step)))
1421 /* for (i = iv0->base; i < iv1->base; i++)
1425 for (i = iv1->base; i > iv0->base; i--).
1427 In both cases # of iterations is iv1->base - iv0->base, assuming that
1428 iv1->base >= iv0->base.
1430 First try to derive a lower bound on the value of
1431 iv1->base - iv0->base, computed in full precision. If the difference
1432 is nonnegative, we are done, otherwise we must record the
1433 condition. */
1435 if (mpz_sgn (bnds->below) < 0)
1436 niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node,
1437 iv1->base, iv0->base);
1438 niter->niter = delta;
1439 niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
1440 TYPE_SIGN (niter_type));
1441 niter->control.no_overflow = true;
1442 return true;
1445 if (integer_nonzerop (iv0->step))
1446 step = fold_convert (niter_type, iv0->step);
1447 else
1448 step = fold_convert (niter_type,
1449 fold_build1 (NEGATE_EXPR, type, iv1->step));
1451 /* If we can determine the final value of the control iv exactly, we can
1452 transform the condition to != comparison. In particular, this will be
1453 the case if DELTA is constant. */
1454 if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, &delta, step,
1455 exit_must_be_taken, bnds))
1457 affine_iv zps;
1459 zps.base = build_int_cst (niter_type, 0);
1460 zps.step = step;
1461 /* number_of_iterations_lt_to_ne will add assumptions that ensure that
1462 zps does not overflow. */
1463 zps.no_overflow = true;
1465 return number_of_iterations_ne (loop, type, &zps,
1466 delta, niter, true, bnds);
1469 /* Make sure that the control iv does not overflow. */
1470 if (!assert_no_overflow_lt (type, iv0, iv1, niter, step))
1471 return false;
1473 /* We determine the number of iterations as (delta + step - 1) / step. For
1474 this to work, we must know that iv1->base >= iv0->base - step + 1,
1475 otherwise the loop does not roll. */
1476 assert_loop_rolls_lt (type, iv0, iv1, niter, bnds);
1478 s = fold_build2 (MINUS_EXPR, niter_type,
1479 step, build_int_cst (niter_type, 1));
1480 delta = fold_build2 (PLUS_EXPR, niter_type, delta, s);
1481 niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step);
1483 mpz_init (mstep);
1484 mpz_init (tmp);
1485 wi::to_mpz (step, mstep, UNSIGNED);
1486 mpz_add (tmp, bnds->up, mstep);
1487 mpz_sub_ui (tmp, tmp, 1);
1488 mpz_fdiv_q (tmp, tmp, mstep);
1489 niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
1490 TYPE_SIGN (niter_type));
1491 mpz_clear (mstep);
1492 mpz_clear (tmp);
1494 return true;
1497 /* Determines number of iterations of loop whose ending condition
1498 is IV0 <= IV1. TYPE is the type of the iv. The number of
1499 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
1500 we know that this condition must eventually become false (we derived this
1501 earlier, and possibly set NITER->assumptions to make sure this
1502 is the case). BNDS bounds the difference IV1->base - IV0->base. */
1504 static bool
1505 number_of_iterations_le (struct loop *loop, tree type, affine_iv *iv0,
1506 affine_iv *iv1, struct tree_niter_desc *niter,
1507 bool exit_must_be_taken, bounds *bnds)
1509 tree assumption;
1510 tree type1 = type;
1511 if (POINTER_TYPE_P (type))
1512 type1 = sizetype;
1514 /* Say that IV0 is the control variable. Then IV0 <= IV1 iff
1515 IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
1516 value of the type. This we must know anyway, since if it is
1517 equal to this value, the loop rolls forever. We do not check
1518 this condition for pointer type ivs, as the code cannot rely on
1519 the object to that the pointer points being placed at the end of
1520 the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is
1521 not defined for pointers). */
1523 if (!exit_must_be_taken && !POINTER_TYPE_P (type))
1525 if (integer_nonzerop (iv0->step))
1526 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1527 iv1->base, TYPE_MAX_VALUE (type));
1528 else
1529 assumption = fold_build2 (NE_EXPR, boolean_type_node,
1530 iv0->base, TYPE_MIN_VALUE (type));
1532 if (integer_zerop (assumption))
1533 return false;
1534 if (!integer_nonzerop (assumption))
1535 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1536 niter->assumptions, assumption);
1539 if (integer_nonzerop (iv0->step))
1541 if (POINTER_TYPE_P (type))
1542 iv1->base = fold_build_pointer_plus_hwi (iv1->base, 1);
1543 else
1544 iv1->base = fold_build2 (PLUS_EXPR, type1, iv1->base,
1545 build_int_cst (type1, 1));
1547 else if (POINTER_TYPE_P (type))
1548 iv0->base = fold_build_pointer_plus_hwi (iv0->base, -1);
1549 else
1550 iv0->base = fold_build2 (MINUS_EXPR, type1,
1551 iv0->base, build_int_cst (type1, 1));
1553 bounds_add (bnds, 1, type1);
1555 return number_of_iterations_lt (loop, type, iv0, iv1, niter, exit_must_be_taken,
1556 bnds);
1559 /* Dumps description of affine induction variable IV to FILE. */
1561 static void
1562 dump_affine_iv (FILE *file, affine_iv *iv)
1564 if (!integer_zerop (iv->step))
1565 fprintf (file, "[");
1567 print_generic_expr (dump_file, iv->base, TDF_SLIM);
1569 if (!integer_zerop (iv->step))
1571 fprintf (file, ", + , ");
1572 print_generic_expr (dump_file, iv->step, TDF_SLIM);
1573 fprintf (file, "]%s", iv->no_overflow ? "(no_overflow)" : "");
1577 /* Determine the number of iterations according to condition (for staying
1578 inside loop) which compares two induction variables using comparison
1579 operator CODE. The induction variable on left side of the comparison
1580 is IV0, the right-hand side is IV1. Both induction variables must have
1581 type TYPE, which must be an integer or pointer type. The steps of the
1582 ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
1584 LOOP is the loop whose number of iterations we are determining.
1586 ONLY_EXIT is true if we are sure this is the only way the loop could be
1587 exited (including possibly non-returning function calls, exceptions, etc.)
1588 -- in this case we can use the information whether the control induction
1589 variables can overflow or not in a more efficient way.
1591 if EVERY_ITERATION is true, we know the test is executed on every iteration.
1593 The results (number of iterations and assumptions as described in
1594 comments at struct tree_niter_desc in tree-ssa-loop.h) are stored to NITER.
1595 Returns false if it fails to determine number of iterations, true if it
1596 was determined (possibly with some assumptions). */
1598 static bool
1599 number_of_iterations_cond (struct loop *loop,
1600 tree type, affine_iv *iv0, enum tree_code code,
1601 affine_iv *iv1, struct tree_niter_desc *niter,
1602 bool only_exit, bool every_iteration)
1604 bool exit_must_be_taken = false, ret;
1605 bounds bnds;
1607 /* If the test is not executed every iteration, wrapping may make the test
1608 to pass again.
1609 TODO: the overflow case can be still used as unreliable estimate of upper
1610 bound. But we have no API to pass it down to number of iterations code
1611 and, at present, it will not use it anyway. */
1612 if (!every_iteration
1613 && (!iv0->no_overflow || !iv1->no_overflow
1614 || code == NE_EXPR || code == EQ_EXPR))
1615 return false;
1617 /* The meaning of these assumptions is this:
1618 if !assumptions
1619 then the rest of information does not have to be valid
1620 if may_be_zero then the loop does not roll, even if
1621 niter != 0. */
1622 niter->assumptions = boolean_true_node;
1623 niter->may_be_zero = boolean_false_node;
1624 niter->niter = NULL_TREE;
1625 niter->max = 0;
1626 niter->bound = NULL_TREE;
1627 niter->cmp = ERROR_MARK;
1629 /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
1630 the control variable is on lhs. */
1631 if (code == GE_EXPR || code == GT_EXPR
1632 || (code == NE_EXPR && integer_zerop (iv0->step)))
1634 std::swap (iv0, iv1);
1635 code = swap_tree_comparison (code);
1638 if (POINTER_TYPE_P (type))
1640 /* Comparison of pointers is undefined unless both iv0 and iv1 point
1641 to the same object. If they do, the control variable cannot wrap
1642 (as wrap around the bounds of memory will never return a pointer
1643 that would be guaranteed to point to the same object, even if we
1644 avoid undefined behavior by casting to size_t and back). */
1645 iv0->no_overflow = true;
1646 iv1->no_overflow = true;
1649 /* If the control induction variable does not overflow and the only exit
1650 from the loop is the one that we analyze, we know it must be taken
1651 eventually. */
1652 if (only_exit)
1654 if (!integer_zerop (iv0->step) && iv0->no_overflow)
1655 exit_must_be_taken = true;
1656 else if (!integer_zerop (iv1->step) && iv1->no_overflow)
1657 exit_must_be_taken = true;
1660 /* We can handle the case when neither of the sides of the comparison is
1661 invariant, provided that the test is NE_EXPR. This rarely occurs in
1662 practice, but it is simple enough to manage. */
1663 if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1665 tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
1666 if (code != NE_EXPR)
1667 return false;
1669 iv0->step = fold_binary_to_constant (MINUS_EXPR, step_type,
1670 iv0->step, iv1->step);
1671 iv0->no_overflow = false;
1672 iv1->step = build_int_cst (step_type, 0);
1673 iv1->no_overflow = true;
1676 /* If the result of the comparison is a constant, the loop is weird. More
1677 precise handling would be possible, but the situation is not common enough
1678 to waste time on it. */
1679 if (integer_zerop (iv0->step) && integer_zerop (iv1->step))
1680 return false;
1682 /* Ignore loops of while (i-- < 10) type. */
1683 if (code != NE_EXPR)
1685 if (iv0->step && tree_int_cst_sign_bit (iv0->step))
1686 return false;
1688 if (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))
1689 return false;
1692 /* If the loop exits immediately, there is nothing to do. */
1693 tree tem = fold_binary (code, boolean_type_node, iv0->base, iv1->base);
1694 if (tem && integer_zerop (tem))
1696 niter->niter = build_int_cst (unsigned_type_for (type), 0);
1697 niter->max = 0;
1698 return true;
1701 /* OK, now we know we have a senseful loop. Handle several cases, depending
1702 on what comparison operator is used. */
1703 bound_difference (loop, iv1->base, iv0->base, &bnds);
1705 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file,
1708 "Analyzing # of iterations of loop %d\n", loop->num);
1710 fprintf (dump_file, " exit condition ");
1711 dump_affine_iv (dump_file, iv0);
1712 fprintf (dump_file, " %s ",
1713 code == NE_EXPR ? "!="
1714 : code == LT_EXPR ? "<"
1715 : "<=");
1716 dump_affine_iv (dump_file, iv1);
1717 fprintf (dump_file, "\n");
1719 fprintf (dump_file, " bounds on difference of bases: ");
1720 mpz_out_str (dump_file, 10, bnds.below);
1721 fprintf (dump_file, " ... ");
1722 mpz_out_str (dump_file, 10, bnds.up);
1723 fprintf (dump_file, "\n");
1726 switch (code)
1728 case NE_EXPR:
1729 gcc_assert (integer_zerop (iv1->step));
1730 ret = number_of_iterations_ne (loop, type, iv0, iv1->base, niter,
1731 exit_must_be_taken, &bnds);
1732 break;
1734 case LT_EXPR:
1735 ret = number_of_iterations_lt (loop, type, iv0, iv1, niter,
1736 exit_must_be_taken, &bnds);
1737 break;
1739 case LE_EXPR:
1740 ret = number_of_iterations_le (loop, type, iv0, iv1, niter,
1741 exit_must_be_taken, &bnds);
1742 break;
1744 default:
1745 gcc_unreachable ();
1748 mpz_clear (bnds.up);
1749 mpz_clear (bnds.below);
1751 if (dump_file && (dump_flags & TDF_DETAILS))
1753 if (ret)
1755 fprintf (dump_file, " result:\n");
1756 if (!integer_nonzerop (niter->assumptions))
1758 fprintf (dump_file, " under assumptions ");
1759 print_generic_expr (dump_file, niter->assumptions, TDF_SLIM);
1760 fprintf (dump_file, "\n");
1763 if (!integer_zerop (niter->may_be_zero))
1765 fprintf (dump_file, " zero if ");
1766 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1767 fprintf (dump_file, "\n");
1770 fprintf (dump_file, " # of iterations ");
1771 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1772 fprintf (dump_file, ", bounded by ");
1773 print_decu (niter->max, dump_file);
1774 fprintf (dump_file, "\n");
1776 else
1777 fprintf (dump_file, " failed\n\n");
1779 return ret;
1782 /* Substitute NEW for OLD in EXPR and fold the result. */
1784 static tree
1785 simplify_replace_tree (tree expr, tree old, tree new_tree)
1787 unsigned i, n;
1788 tree ret = NULL_TREE, e, se;
1790 if (!expr)
1791 return NULL_TREE;
1793 /* Do not bother to replace constants. */
1794 if (CONSTANT_CLASS_P (old))
1795 return expr;
1797 if (expr == old
1798 || operand_equal_p (expr, old, 0))
1799 return unshare_expr (new_tree);
1801 if (!EXPR_P (expr))
1802 return expr;
1804 n = TREE_OPERAND_LENGTH (expr);
1805 for (i = 0; i < n; i++)
1807 e = TREE_OPERAND (expr, i);
1808 se = simplify_replace_tree (e, old, new_tree);
1809 if (e == se)
1810 continue;
1812 if (!ret)
1813 ret = copy_node (expr);
1815 TREE_OPERAND (ret, i) = se;
1818 return (ret ? fold (ret) : expr);
1821 /* Expand definitions of ssa names in EXPR as long as they are simple
1822 enough, and return the new expression. If STOP is specified, stop
1823 expanding if EXPR equals to it. */
1825 tree
1826 expand_simple_operations (tree expr, tree stop)
1828 unsigned i, n;
1829 tree ret = NULL_TREE, e, ee, e1;
1830 enum tree_code code;
1831 gimple *stmt;
1833 if (expr == NULL_TREE)
1834 return expr;
1836 if (is_gimple_min_invariant (expr))
1837 return expr;
1839 code = TREE_CODE (expr);
1840 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1842 n = TREE_OPERAND_LENGTH (expr);
1843 for (i = 0; i < n; i++)
1845 e = TREE_OPERAND (expr, i);
1846 ee = expand_simple_operations (e, stop);
1847 if (e == ee)
1848 continue;
1850 if (!ret)
1851 ret = copy_node (expr);
1853 TREE_OPERAND (ret, i) = ee;
1856 if (!ret)
1857 return expr;
1859 fold_defer_overflow_warnings ();
1860 ret = fold (ret);
1861 fold_undefer_and_ignore_overflow_warnings ();
1862 return ret;
1865 /* Stop if it's not ssa name or the one we don't want to expand. */
1866 if (TREE_CODE (expr) != SSA_NAME || expr == stop)
1867 return expr;
1869 stmt = SSA_NAME_DEF_STMT (expr);
1870 if (gimple_code (stmt) == GIMPLE_PHI)
1872 basic_block src, dest;
1874 if (gimple_phi_num_args (stmt) != 1)
1875 return expr;
1876 e = PHI_ARG_DEF (stmt, 0);
1878 /* Avoid propagating through loop exit phi nodes, which
1879 could break loop-closed SSA form restrictions. */
1880 dest = gimple_bb (stmt);
1881 src = single_pred (dest);
1882 if (TREE_CODE (e) == SSA_NAME
1883 && src->loop_father != dest->loop_father)
1884 return expr;
1886 return expand_simple_operations (e, stop);
1888 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1889 return expr;
1891 /* Avoid expanding to expressions that contain SSA names that need
1892 to take part in abnormal coalescing. */
1893 ssa_op_iter iter;
1894 FOR_EACH_SSA_TREE_OPERAND (e, stmt, iter, SSA_OP_USE)
1895 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e))
1896 return expr;
1898 e = gimple_assign_rhs1 (stmt);
1899 code = gimple_assign_rhs_code (stmt);
1900 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1902 if (is_gimple_min_invariant (e))
1903 return e;
1905 if (code == SSA_NAME)
1906 return expand_simple_operations (e, stop);
1908 return expr;
1911 switch (code)
1913 CASE_CONVERT:
1914 /* Casts are simple. */
1915 ee = expand_simple_operations (e, stop);
1916 return fold_build1 (code, TREE_TYPE (expr), ee);
1918 case PLUS_EXPR:
1919 case MINUS_EXPR:
1920 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr))
1921 && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr)))
1922 return expr;
1923 /* Fallthru. */
1924 case POINTER_PLUS_EXPR:
1925 /* And increments and decrements by a constant are simple. */
1926 e1 = gimple_assign_rhs2 (stmt);
1927 if (!is_gimple_min_invariant (e1))
1928 return expr;
1930 ee = expand_simple_operations (e, stop);
1931 return fold_build2 (code, TREE_TYPE (expr), ee, e1);
1933 default:
1934 return expr;
1938 /* Tries to simplify EXPR using the condition COND. Returns the simplified
1939 expression (or EXPR unchanged, if no simplification was possible). */
1941 static tree
1942 tree_simplify_using_condition_1 (tree cond, tree expr, tree stop)
1944 bool changed;
1945 tree e, te, e0, e1, e2, notcond;
1946 enum tree_code code = TREE_CODE (expr);
1948 if (code == INTEGER_CST)
1949 return expr;
1951 if (code == TRUTH_OR_EXPR
1952 || code == TRUTH_AND_EXPR
1953 || code == COND_EXPR)
1955 changed = false;
1957 e0 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 0), stop);
1958 if (TREE_OPERAND (expr, 0) != e0)
1959 changed = true;
1961 e1 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 1), stop);
1962 if (TREE_OPERAND (expr, 1) != e1)
1963 changed = true;
1965 if (code == COND_EXPR)
1967 e2 = tree_simplify_using_condition_1 (cond, TREE_OPERAND (expr, 2), stop);
1968 if (TREE_OPERAND (expr, 2) != e2)
1969 changed = true;
1971 else
1972 e2 = NULL_TREE;
1974 if (changed)
1976 if (code == COND_EXPR)
1977 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
1978 else
1979 expr = fold_build2 (code, boolean_type_node, e0, e1);
1982 return expr;
1985 /* In case COND is equality, we may be able to simplify EXPR by copy/constant
1986 propagation, and vice versa. Fold does not handle this, since it is
1987 considered too expensive. */
1988 if (TREE_CODE (cond) == EQ_EXPR)
1990 e0 = TREE_OPERAND (cond, 0);
1991 e1 = TREE_OPERAND (cond, 1);
1993 /* We know that e0 == e1. Check whether we cannot simplify expr
1994 using this fact. */
1995 e = simplify_replace_tree (expr, e0, e1);
1996 if (integer_zerop (e) || integer_nonzerop (e))
1997 return e;
1999 e = simplify_replace_tree (expr, e1, e0);
2000 if (integer_zerop (e) || integer_nonzerop (e))
2001 return e;
2003 if (TREE_CODE (expr) == EQ_EXPR)
2005 e0 = TREE_OPERAND (expr, 0);
2006 e1 = TREE_OPERAND (expr, 1);
2008 /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */
2009 e = simplify_replace_tree (cond, e0, e1);
2010 if (integer_zerop (e))
2011 return e;
2012 e = simplify_replace_tree (cond, e1, e0);
2013 if (integer_zerop (e))
2014 return e;
2016 if (TREE_CODE (expr) == NE_EXPR)
2018 e0 = TREE_OPERAND (expr, 0);
2019 e1 = TREE_OPERAND (expr, 1);
2021 /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */
2022 e = simplify_replace_tree (cond, e0, e1);
2023 if (integer_zerop (e))
2024 return boolean_true_node;
2025 e = simplify_replace_tree (cond, e1, e0);
2026 if (integer_zerop (e))
2027 return boolean_true_node;
2030 te = expand_simple_operations (expr, stop);
2032 /* Check whether COND ==> EXPR. */
2033 notcond = invert_truthvalue (cond);
2034 e = fold_binary (TRUTH_OR_EXPR, boolean_type_node, notcond, te);
2035 if (e && integer_nonzerop (e))
2036 return e;
2038 /* Check whether COND ==> not EXPR. */
2039 e = fold_binary (TRUTH_AND_EXPR, boolean_type_node, cond, te);
2040 if (e && integer_zerop (e))
2041 return e;
2043 return expr;
2046 /* Tries to simplify EXPR using the condition COND. Returns the simplified
2047 expression (or EXPR unchanged, if no simplification was possible).
2048 Wrapper around tree_simplify_using_condition_1 that ensures that chains
2049 of simple operations in definitions of ssa names in COND are expanded,
2050 so that things like casts or incrementing the value of the bound before
2051 the loop do not cause us to fail. */
2053 static tree
2054 tree_simplify_using_condition (tree cond, tree expr, tree stop)
2056 cond = expand_simple_operations (cond, stop);
2058 return tree_simplify_using_condition_1 (cond, expr, stop);
2061 /* Tries to simplify EXPR using the conditions on entry to LOOP.
2062 Returns the simplified expression (or EXPR unchanged, if no
2063 simplification was possible). */
2065 tree
2066 simplify_using_initial_conditions (struct loop *loop, tree expr, tree stop)
2068 edge e;
2069 basic_block bb;
2070 gimple *stmt;
2071 tree cond;
2072 int cnt = 0;
2074 if (TREE_CODE (expr) == INTEGER_CST)
2075 return expr;
2077 /* Limit walking the dominators to avoid quadraticness in
2078 the number of BBs times the number of loops in degenerate
2079 cases. */
2080 for (bb = loop->header;
2081 bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && cnt < MAX_DOMINATORS_TO_WALK;
2082 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
2084 if (!single_pred_p (bb))
2085 continue;
2086 e = single_pred_edge (bb);
2088 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2089 continue;
2091 stmt = last_stmt (e->src);
2092 cond = fold_build2 (gimple_cond_code (stmt),
2093 boolean_type_node,
2094 gimple_cond_lhs (stmt),
2095 gimple_cond_rhs (stmt));
2096 if (e->flags & EDGE_FALSE_VALUE)
2097 cond = invert_truthvalue (cond);
2098 expr = tree_simplify_using_condition (cond, expr, stop);
2099 /* Break if EXPR is simplified to const values. */
2100 if (expr && (integer_zerop (expr) || integer_nonzerop (expr)))
2101 break;
2103 ++cnt;
2106 return expr;
2109 /* Tries to simplify EXPR using the evolutions of the loop invariants
2110 in the superloops of LOOP. Returns the simplified expression
2111 (or EXPR unchanged, if no simplification was possible). */
2113 static tree
2114 simplify_using_outer_evolutions (struct loop *loop, tree expr)
2116 enum tree_code code = TREE_CODE (expr);
2117 bool changed;
2118 tree e, e0, e1, e2;
2120 if (is_gimple_min_invariant (expr))
2121 return expr;
2123 if (code == TRUTH_OR_EXPR
2124 || code == TRUTH_AND_EXPR
2125 || code == COND_EXPR)
2127 changed = false;
2129 e0 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 0));
2130 if (TREE_OPERAND (expr, 0) != e0)
2131 changed = true;
2133 e1 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 1));
2134 if (TREE_OPERAND (expr, 1) != e1)
2135 changed = true;
2137 if (code == COND_EXPR)
2139 e2 = simplify_using_outer_evolutions (loop, TREE_OPERAND (expr, 2));
2140 if (TREE_OPERAND (expr, 2) != e2)
2141 changed = true;
2143 else
2144 e2 = NULL_TREE;
2146 if (changed)
2148 if (code == COND_EXPR)
2149 expr = fold_build3 (code, boolean_type_node, e0, e1, e2);
2150 else
2151 expr = fold_build2 (code, boolean_type_node, e0, e1);
2154 return expr;
2157 e = instantiate_parameters (loop, expr);
2158 if (is_gimple_min_invariant (e))
2159 return e;
2161 return expr;
2164 /* Returns true if EXIT is the only possible exit from LOOP. */
2166 bool
2167 loop_only_exit_p (const struct loop *loop, const_edge exit)
2169 basic_block *body;
2170 gimple_stmt_iterator bsi;
2171 unsigned i;
2173 if (exit != single_exit (loop))
2174 return false;
2176 body = get_loop_body (loop);
2177 for (i = 0; i < loop->num_nodes; i++)
2179 for (bsi = gsi_start_bb (body[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2180 if (stmt_can_terminate_bb_p (gsi_stmt (bsi)))
2182 free (body);
2183 return true;
2187 free (body);
2188 return true;
2191 /* Stores description of number of iterations of LOOP derived from
2192 EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful
2193 information could be derived (and fields of NITER have meaning described
2194 in comments at struct tree_niter_desc declaration), false otherwise.
2195 When EVERY_ITERATION is true, only tests that are known to be executed
2196 every iteration are considered (i.e. only test that alone bounds the loop).
2197 If AT_STMT is not NULL, this function stores LOOP's condition statement in
2198 it when returning true. */
2200 bool
2201 number_of_iterations_exit_assumptions (struct loop *loop, edge exit,
2202 struct tree_niter_desc *niter,
2203 gcond **at_stmt, bool every_iteration)
2205 gimple *last;
2206 gcond *stmt;
2207 tree type;
2208 tree op0, op1;
2209 enum tree_code code;
2210 affine_iv iv0, iv1;
2211 bool safe;
2213 /* Nothing to analyze if the loop is known to be infinite. */
2214 if (loop_constraint_set_p (loop, LOOP_C_INFINITE))
2215 return false;
2217 safe = dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src);
2219 if (every_iteration && !safe)
2220 return false;
2222 niter->assumptions = boolean_false_node;
2223 niter->control.base = NULL_TREE;
2224 niter->control.step = NULL_TREE;
2225 niter->control.no_overflow = false;
2226 last = last_stmt (exit->src);
2227 if (!last)
2228 return false;
2229 stmt = dyn_cast <gcond *> (last);
2230 if (!stmt)
2231 return false;
2233 /* We want the condition for staying inside loop. */
2234 code = gimple_cond_code (stmt);
2235 if (exit->flags & EDGE_TRUE_VALUE)
2236 code = invert_tree_comparison (code, false);
2238 switch (code)
2240 case GT_EXPR:
2241 case GE_EXPR:
2242 case LT_EXPR:
2243 case LE_EXPR:
2244 case NE_EXPR:
2245 break;
2247 default:
2248 return false;
2251 op0 = gimple_cond_lhs (stmt);
2252 op1 = gimple_cond_rhs (stmt);
2253 type = TREE_TYPE (op0);
2255 if (TREE_CODE (type) != INTEGER_TYPE
2256 && !POINTER_TYPE_P (type))
2257 return false;
2259 tree iv0_niters = NULL_TREE;
2260 if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
2261 op0, &iv0, &iv0_niters, false))
2262 return false;
2263 tree iv1_niters = NULL_TREE;
2264 if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
2265 op1, &iv1, &iv1_niters, false))
2266 return false;
2267 /* Give up on complicated case. */
2268 if (iv0_niters && iv1_niters)
2269 return false;
2271 /* We don't want to see undefined signed overflow warnings while
2272 computing the number of iterations. */
2273 fold_defer_overflow_warnings ();
2275 iv0.base = expand_simple_operations (iv0.base);
2276 iv1.base = expand_simple_operations (iv1.base);
2277 if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
2278 loop_only_exit_p (loop, exit), safe))
2280 fold_undefer_and_ignore_overflow_warnings ();
2281 return false;
2284 /* Incorporate additional assumption implied by control iv. */
2285 tree iv_niters = iv0_niters ? iv0_niters : iv1_niters;
2286 if (iv_niters)
2288 tree assumption = fold_build2 (LE_EXPR, boolean_type_node, niter->niter,
2289 fold_convert (TREE_TYPE (niter->niter),
2290 iv_niters));
2292 if (!integer_nonzerop (assumption))
2293 niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2294 niter->assumptions, assumption);
2296 /* Refine upper bound if possible. */
2297 if (TREE_CODE (iv_niters) == INTEGER_CST
2298 && niter->max > wi::to_widest (iv_niters))
2299 niter->max = wi::to_widest (iv_niters);
2302 /* There is no assumptions if the loop is known to be finite. */
2303 if (!integer_zerop (niter->assumptions)
2304 && loop_constraint_set_p (loop, LOOP_C_FINITE))
2305 niter->assumptions = boolean_true_node;
2307 if (optimize >= 3)
2309 niter->assumptions = simplify_using_outer_evolutions (loop,
2310 niter->assumptions);
2311 niter->may_be_zero = simplify_using_outer_evolutions (loop,
2312 niter->may_be_zero);
2313 niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
2316 niter->assumptions
2317 = simplify_using_initial_conditions (loop,
2318 niter->assumptions);
2319 niter->may_be_zero
2320 = simplify_using_initial_conditions (loop,
2321 niter->may_be_zero);
2323 fold_undefer_and_ignore_overflow_warnings ();
2325 /* If NITER has simplified into a constant, update MAX. */
2326 if (TREE_CODE (niter->niter) == INTEGER_CST)
2327 niter->max = wi::to_widest (niter->niter);
2329 if (at_stmt)
2330 *at_stmt = stmt;
2332 return (!integer_zerop (niter->assumptions));
2335 /* Like number_of_iterations_exit, but return TRUE only if the niter
2336 information holds unconditionally. */
2338 bool
2339 number_of_iterations_exit (struct loop *loop, edge exit,
2340 struct tree_niter_desc *niter,
2341 bool warn, bool every_iteration)
2343 gcond *stmt;
2344 if (!number_of_iterations_exit_assumptions (loop, exit, niter,
2345 &stmt, every_iteration))
2346 return false;
2348 if (integer_nonzerop (niter->assumptions))
2349 return true;
2351 if (warn)
2353 const char *wording;
2355 wording = N_("missed loop optimization, the loop counter may overflow");
2356 warning_at (gimple_location_safe (stmt),
2357 OPT_Wunsafe_loop_optimizations, "%s", gettext (wording));
2360 return false;
2363 /* Try to determine the number of iterations of LOOP. If we succeed,
2364 expression giving number of iterations is returned and *EXIT is
2365 set to the edge from that the information is obtained. Otherwise
2366 chrec_dont_know is returned. */
2368 tree
2369 find_loop_niter (struct loop *loop, edge *exit)
2371 unsigned i;
2372 vec<edge> exits = get_loop_exit_edges (loop);
2373 edge ex;
2374 tree niter = NULL_TREE, aniter;
2375 struct tree_niter_desc desc;
2377 *exit = NULL;
2378 FOR_EACH_VEC_ELT (exits, i, ex)
2380 if (!number_of_iterations_exit (loop, ex, &desc, false))
2381 continue;
2383 if (integer_nonzerop (desc.may_be_zero))
2385 /* We exit in the first iteration through this exit.
2386 We won't find anything better. */
2387 niter = build_int_cst (unsigned_type_node, 0);
2388 *exit = ex;
2389 break;
2392 if (!integer_zerop (desc.may_be_zero))
2393 continue;
2395 aniter = desc.niter;
2397 if (!niter)
2399 /* Nothing recorded yet. */
2400 niter = aniter;
2401 *exit = ex;
2402 continue;
2405 /* Prefer constants, the lower the better. */
2406 if (TREE_CODE (aniter) != INTEGER_CST)
2407 continue;
2409 if (TREE_CODE (niter) != INTEGER_CST)
2411 niter = aniter;
2412 *exit = ex;
2413 continue;
2416 if (tree_int_cst_lt (aniter, niter))
2418 niter = aniter;
2419 *exit = ex;
2420 continue;
2423 exits.release ();
2425 return niter ? niter : chrec_dont_know;
2428 /* Return true if loop is known to have bounded number of iterations. */
2430 bool
2431 finite_loop_p (struct loop *loop)
2433 widest_int nit;
2434 int flags;
2436 flags = flags_from_decl_or_type (current_function_decl);
2437 if ((flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE))
2439 if (dump_file && (dump_flags & TDF_DETAILS))
2440 fprintf (dump_file, "Found loop %i to be finite: it is within pure or const function.\n",
2441 loop->num);
2442 return true;
2445 if (loop->any_upper_bound
2446 || max_loop_iterations (loop, &nit))
2448 if (dump_file && (dump_flags & TDF_DETAILS))
2449 fprintf (dump_file, "Found loop %i to be finite: upper bound found.\n",
2450 loop->num);
2451 return true;
2453 return false;
2458 Analysis of a number of iterations of a loop by a brute-force evaluation.
2462 /* Bound on the number of iterations we try to evaluate. */
2464 #define MAX_ITERATIONS_TO_TRACK \
2465 ((unsigned) PARAM_VALUE (PARAM_MAX_ITERATIONS_TO_TRACK))
2467 /* Returns the loop phi node of LOOP such that ssa name X is derived from its
2468 result by a chain of operations such that all but exactly one of their
2469 operands are constants. */
2471 static gphi *
2472 chain_of_csts_start (struct loop *loop, tree x)
2474 gimple *stmt = SSA_NAME_DEF_STMT (x);
2475 tree use;
2476 basic_block bb = gimple_bb (stmt);
2477 enum tree_code code;
2479 if (!bb
2480 || !flow_bb_inside_loop_p (loop, bb))
2481 return NULL;
2483 if (gimple_code (stmt) == GIMPLE_PHI)
2485 if (bb == loop->header)
2486 return as_a <gphi *> (stmt);
2488 return NULL;
2491 if (gimple_code (stmt) != GIMPLE_ASSIGN
2492 || gimple_assign_rhs_class (stmt) == GIMPLE_TERNARY_RHS)
2493 return NULL;
2495 code = gimple_assign_rhs_code (stmt);
2496 if (gimple_references_memory_p (stmt)
2497 || TREE_CODE_CLASS (code) == tcc_reference
2498 || (code == ADDR_EXPR
2499 && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
2500 return NULL;
2502 use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2503 if (use == NULL_TREE)
2504 return NULL;
2506 return chain_of_csts_start (loop, use);
2509 /* Determines whether the expression X is derived from a result of a phi node
2510 in header of LOOP such that
2512 * the derivation of X consists only from operations with constants
2513 * the initial value of the phi node is constant
2514 * the value of the phi node in the next iteration can be derived from the
2515 value in the current iteration by a chain of operations with constants.
2517 If such phi node exists, it is returned, otherwise NULL is returned. */
2519 static gphi *
2520 get_base_for (struct loop *loop, tree x)
2522 gphi *phi;
2523 tree init, next;
2525 if (is_gimple_min_invariant (x))
2526 return NULL;
2528 phi = chain_of_csts_start (loop, x);
2529 if (!phi)
2530 return NULL;
2532 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2533 next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2535 if (TREE_CODE (next) != SSA_NAME)
2536 return NULL;
2538 if (!is_gimple_min_invariant (init))
2539 return NULL;
2541 if (chain_of_csts_start (loop, next) != phi)
2542 return NULL;
2544 return phi;
2547 /* Given an expression X, then
2549 * if X is NULL_TREE, we return the constant BASE.
2550 * otherwise X is a SSA name, whose value in the considered loop is derived
2551 by a chain of operations with constant from a result of a phi node in
2552 the header of the loop. Then we return value of X when the value of the
2553 result of this phi node is given by the constant BASE. */
2555 static tree
2556 get_val_for (tree x, tree base)
2558 gimple *stmt;
2560 gcc_checking_assert (is_gimple_min_invariant (base));
2562 if (!x)
2563 return base;
2565 stmt = SSA_NAME_DEF_STMT (x);
2566 if (gimple_code (stmt) == GIMPLE_PHI)
2567 return base;
2569 gcc_checking_assert (is_gimple_assign (stmt));
2571 /* STMT must be either an assignment of a single SSA name or an
2572 expression involving an SSA name and a constant. Try to fold that
2573 expression using the value for the SSA name. */
2574 if (gimple_assign_ssa_name_copy_p (stmt))
2575 return get_val_for (gimple_assign_rhs1 (stmt), base);
2576 else if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS
2577 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2579 return fold_build1 (gimple_assign_rhs_code (stmt),
2580 gimple_expr_type (stmt),
2581 get_val_for (gimple_assign_rhs1 (stmt), base));
2583 else if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
2585 tree rhs1 = gimple_assign_rhs1 (stmt);
2586 tree rhs2 = gimple_assign_rhs2 (stmt);
2587 if (TREE_CODE (rhs1) == SSA_NAME)
2588 rhs1 = get_val_for (rhs1, base);
2589 else if (TREE_CODE (rhs2) == SSA_NAME)
2590 rhs2 = get_val_for (rhs2, base);
2591 else
2592 gcc_unreachable ();
2593 return fold_build2 (gimple_assign_rhs_code (stmt),
2594 gimple_expr_type (stmt), rhs1, rhs2);
2596 else
2597 gcc_unreachable ();
2601 /* Tries to count the number of iterations of LOOP till it exits by EXIT
2602 by brute force -- i.e. by determining the value of the operands of the
2603 condition at EXIT in first few iterations of the loop (assuming that
2604 these values are constant) and determining the first one in that the
2605 condition is not satisfied. Returns the constant giving the number
2606 of the iterations of LOOP if successful, chrec_dont_know otherwise. */
2608 tree
2609 loop_niter_by_eval (struct loop *loop, edge exit)
2611 tree acnd;
2612 tree op[2], val[2], next[2], aval[2];
2613 gphi *phi;
2614 gimple *cond;
2615 unsigned i, j;
2616 enum tree_code cmp;
2618 cond = last_stmt (exit->src);
2619 if (!cond || gimple_code (cond) != GIMPLE_COND)
2620 return chrec_dont_know;
2622 cmp = gimple_cond_code (cond);
2623 if (exit->flags & EDGE_TRUE_VALUE)
2624 cmp = invert_tree_comparison (cmp, false);
2626 switch (cmp)
2628 case EQ_EXPR:
2629 case NE_EXPR:
2630 case GT_EXPR:
2631 case GE_EXPR:
2632 case LT_EXPR:
2633 case LE_EXPR:
2634 op[0] = gimple_cond_lhs (cond);
2635 op[1] = gimple_cond_rhs (cond);
2636 break;
2638 default:
2639 return chrec_dont_know;
2642 for (j = 0; j < 2; j++)
2644 if (is_gimple_min_invariant (op[j]))
2646 val[j] = op[j];
2647 next[j] = NULL_TREE;
2648 op[j] = NULL_TREE;
2650 else
2652 phi = get_base_for (loop, op[j]);
2653 if (!phi)
2654 return chrec_dont_know;
2655 val[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2656 next[j] = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2660 /* Don't issue signed overflow warnings. */
2661 fold_defer_overflow_warnings ();
2663 for (i = 0; i < MAX_ITERATIONS_TO_TRACK; i++)
2665 for (j = 0; j < 2; j++)
2666 aval[j] = get_val_for (op[j], val[j]);
2668 acnd = fold_binary (cmp, boolean_type_node, aval[0], aval[1]);
2669 if (acnd && integer_zerop (acnd))
2671 fold_undefer_and_ignore_overflow_warnings ();
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 fprintf (dump_file,
2674 "Proved that loop %d iterates %d times using brute force.\n",
2675 loop->num, i);
2676 return build_int_cst (unsigned_type_node, i);
2679 for (j = 0; j < 2; j++)
2681 val[j] = get_val_for (next[j], val[j]);
2682 if (!is_gimple_min_invariant (val[j]))
2684 fold_undefer_and_ignore_overflow_warnings ();
2685 return chrec_dont_know;
2690 fold_undefer_and_ignore_overflow_warnings ();
2692 return chrec_dont_know;
2695 /* Finds the exit of the LOOP by that the loop exits after a constant
2696 number of iterations and stores the exit edge to *EXIT. The constant
2697 giving the number of iterations of LOOP is returned. The number of
2698 iterations is determined using loop_niter_by_eval (i.e. by brute force
2699 evaluation). If we are unable to find the exit for that loop_niter_by_eval
2700 determines the number of iterations, chrec_dont_know is returned. */
2702 tree
2703 find_loop_niter_by_eval (struct loop *loop, edge *exit)
2705 unsigned i;
2706 vec<edge> exits = get_loop_exit_edges (loop);
2707 edge ex;
2708 tree niter = NULL_TREE, aniter;
2710 *exit = NULL;
2712 /* Loops with multiple exits are expensive to handle and less important. */
2713 if (!flag_expensive_optimizations
2714 && exits.length () > 1)
2716 exits.release ();
2717 return chrec_dont_know;
2720 FOR_EACH_VEC_ELT (exits, i, ex)
2722 if (!just_once_each_iteration_p (loop, ex->src))
2723 continue;
2725 aniter = loop_niter_by_eval (loop, ex);
2726 if (chrec_contains_undetermined (aniter))
2727 continue;
2729 if (niter
2730 && !tree_int_cst_lt (aniter, niter))
2731 continue;
2733 niter = aniter;
2734 *exit = ex;
2736 exits.release ();
2738 return niter ? niter : chrec_dont_know;
2743 Analysis of upper bounds on number of iterations of a loop.
2747 static widest_int derive_constant_upper_bound_ops (tree, tree,
2748 enum tree_code, tree);
2750 /* Returns a constant upper bound on the value of the right-hand side of
2751 an assignment statement STMT. */
2753 static widest_int
2754 derive_constant_upper_bound_assign (gimple *stmt)
2756 enum tree_code code = gimple_assign_rhs_code (stmt);
2757 tree op0 = gimple_assign_rhs1 (stmt);
2758 tree op1 = gimple_assign_rhs2 (stmt);
2760 return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt)),
2761 op0, code, op1);
2764 /* Returns a constant upper bound on the value of expression VAL. VAL
2765 is considered to be unsigned. If its type is signed, its value must
2766 be nonnegative. */
2768 static widest_int
2769 derive_constant_upper_bound (tree val)
2771 enum tree_code code;
2772 tree op0, op1, op2;
2774 extract_ops_from_tree (val, &code, &op0, &op1, &op2);
2775 return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
2778 /* Returns a constant upper bound on the value of expression OP0 CODE OP1,
2779 whose type is TYPE. The expression is considered to be unsigned. If
2780 its type is signed, its value must be nonnegative. */
2782 static widest_int
2783 derive_constant_upper_bound_ops (tree type, tree op0,
2784 enum tree_code code, tree op1)
2786 tree subtype, maxt;
2787 widest_int bnd, max, cst;
2788 gimple *stmt;
2790 if (INTEGRAL_TYPE_P (type))
2791 maxt = TYPE_MAX_VALUE (type);
2792 else
2793 maxt = upper_bound_in_type (type, type);
2795 max = wi::to_widest (maxt);
2797 switch (code)
2799 case INTEGER_CST:
2800 return wi::to_widest (op0);
2802 CASE_CONVERT:
2803 subtype = TREE_TYPE (op0);
2804 if (!TYPE_UNSIGNED (subtype)
2805 /* If TYPE is also signed, the fact that VAL is nonnegative implies
2806 that OP0 is nonnegative. */
2807 && TYPE_UNSIGNED (type)
2808 && !tree_expr_nonnegative_p (op0))
2810 /* If we cannot prove that the casted expression is nonnegative,
2811 we cannot establish more useful upper bound than the precision
2812 of the type gives us. */
2813 return max;
2816 /* We now know that op0 is an nonnegative value. Try deriving an upper
2817 bound for it. */
2818 bnd = derive_constant_upper_bound (op0);
2820 /* If the bound does not fit in TYPE, max. value of TYPE could be
2821 attained. */
2822 if (wi::ltu_p (max, bnd))
2823 return max;
2825 return bnd;
2827 case PLUS_EXPR:
2828 case POINTER_PLUS_EXPR:
2829 case MINUS_EXPR:
2830 if (TREE_CODE (op1) != INTEGER_CST
2831 || !tree_expr_nonnegative_p (op0))
2832 return max;
2834 /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to
2835 choose the most logical way how to treat this constant regardless
2836 of the signedness of the type. */
2837 cst = wi::sext (wi::to_widest (op1), TYPE_PRECISION (type));
2838 if (code != MINUS_EXPR)
2839 cst = -cst;
2841 bnd = derive_constant_upper_bound (op0);
2843 if (wi::neg_p (cst))
2845 cst = -cst;
2846 /* Avoid CST == 0x80000... */
2847 if (wi::neg_p (cst))
2848 return max;
2850 /* OP0 + CST. We need to check that
2851 BND <= MAX (type) - CST. */
2853 widest_int mmax = max - cst;
2854 if (wi::leu_p (bnd, mmax))
2855 return max;
2857 return bnd + cst;
2859 else
2861 /* OP0 - CST, where CST >= 0.
2863 If TYPE is signed, we have already verified that OP0 >= 0, and we
2864 know that the result is nonnegative. This implies that
2865 VAL <= BND - CST.
2867 If TYPE is unsigned, we must additionally know that OP0 >= CST,
2868 otherwise the operation underflows.
2871 /* This should only happen if the type is unsigned; however, for
2872 buggy programs that use overflowing signed arithmetics even with
2873 -fno-wrapv, this condition may also be true for signed values. */
2874 if (wi::ltu_p (bnd, cst))
2875 return max;
2877 if (TYPE_UNSIGNED (type))
2879 tree tem = fold_binary (GE_EXPR, boolean_type_node, op0,
2880 wide_int_to_tree (type, cst));
2881 if (!tem || integer_nonzerop (tem))
2882 return max;
2885 bnd -= cst;
2888 return bnd;
2890 case FLOOR_DIV_EXPR:
2891 case EXACT_DIV_EXPR:
2892 if (TREE_CODE (op1) != INTEGER_CST
2893 || tree_int_cst_sign_bit (op1))
2894 return max;
2896 bnd = derive_constant_upper_bound (op0);
2897 return wi::udiv_floor (bnd, wi::to_widest (op1));
2899 case BIT_AND_EXPR:
2900 if (TREE_CODE (op1) != INTEGER_CST
2901 || tree_int_cst_sign_bit (op1))
2902 return max;
2903 return wi::to_widest (op1);
2905 case SSA_NAME:
2906 stmt = SSA_NAME_DEF_STMT (op0);
2907 if (gimple_code (stmt) != GIMPLE_ASSIGN
2908 || gimple_assign_lhs (stmt) != op0)
2909 return max;
2910 return derive_constant_upper_bound_assign (stmt);
2912 default:
2913 return max;
2917 /* Emit a -Waggressive-loop-optimizations warning if needed. */
2919 static void
2920 do_warn_aggressive_loop_optimizations (struct loop *loop,
2921 widest_int i_bound, gimple *stmt)
2923 /* Don't warn if the loop doesn't have known constant bound. */
2924 if (!loop->nb_iterations
2925 || TREE_CODE (loop->nb_iterations) != INTEGER_CST
2926 || !warn_aggressive_loop_optimizations
2927 /* To avoid warning multiple times for the same loop,
2928 only start warning when we preserve loops. */
2929 || (cfun->curr_properties & PROP_loops) == 0
2930 /* Only warn once per loop. */
2931 || loop->warned_aggressive_loop_optimizations
2932 /* Only warn if undefined behavior gives us lower estimate than the
2933 known constant bound. */
2934 || wi::cmpu (i_bound, wi::to_widest (loop->nb_iterations)) >= 0
2935 /* And undefined behavior happens unconditionally. */
2936 || !dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (stmt)))
2937 return;
2939 edge e = single_exit (loop);
2940 if (e == NULL)
2941 return;
2943 gimple *estmt = last_stmt (e->src);
2944 char buf[WIDE_INT_PRINT_BUFFER_SIZE];
2945 print_dec (i_bound, buf, TYPE_UNSIGNED (TREE_TYPE (loop->nb_iterations))
2946 ? UNSIGNED : SIGNED);
2947 if (warning_at (gimple_location (stmt), OPT_Waggressive_loop_optimizations,
2948 "iteration %s invokes undefined behavior", buf))
2949 inform (gimple_location (estmt), "within this loop");
2950 loop->warned_aggressive_loop_optimizations = true;
2953 /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT
2954 is true if the loop is exited immediately after STMT, and this exit
2955 is taken at last when the STMT is executed BOUND + 1 times.
2956 REALISTIC is true if BOUND is expected to be close to the real number
2957 of iterations. UPPER is true if we are sure the loop iterates at most
2958 BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */
2960 static void
2961 record_estimate (struct loop *loop, tree bound, const widest_int &i_bound,
2962 gimple *at_stmt, bool is_exit, bool realistic, bool upper)
2964 widest_int delta;
2966 if (dump_file && (dump_flags & TDF_DETAILS))
2968 fprintf (dump_file, "Statement %s", is_exit ? "(exit)" : "");
2969 print_gimple_stmt (dump_file, at_stmt, 0, TDF_SLIM);
2970 fprintf (dump_file, " is %sexecuted at most ",
2971 upper ? "" : "probably ");
2972 print_generic_expr (dump_file, bound, TDF_SLIM);
2973 fprintf (dump_file, " (bounded by ");
2974 print_decu (i_bound, dump_file);
2975 fprintf (dump_file, ") + 1 times in loop %d.\n", loop->num);
2978 /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the
2979 real number of iterations. */
2980 if (TREE_CODE (bound) != INTEGER_CST)
2981 realistic = false;
2982 else
2983 gcc_checking_assert (i_bound == wi::to_widest (bound));
2985 /* If we have a guaranteed upper bound, record it in the appropriate
2986 list, unless this is an !is_exit bound (i.e. undefined behavior in
2987 at_stmt) in a loop with known constant number of iterations. */
2988 if (upper
2989 && (is_exit
2990 || loop->nb_iterations == NULL_TREE
2991 || TREE_CODE (loop->nb_iterations) != INTEGER_CST))
2993 struct nb_iter_bound *elt = ggc_alloc<nb_iter_bound> ();
2995 elt->bound = i_bound;
2996 elt->stmt = at_stmt;
2997 elt->is_exit = is_exit;
2998 elt->next = loop->bounds;
2999 loop->bounds = elt;
3002 /* If statement is executed on every path to the loop latch, we can directly
3003 infer the upper bound on the # of iterations of the loop. */
3004 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (at_stmt)))
3005 upper = false;
3007 /* Update the number of iteration estimates according to the bound.
3008 If at_stmt is an exit then the loop latch is executed at most BOUND times,
3009 otherwise it can be executed BOUND + 1 times. We will lower the estimate
3010 later if such statement must be executed on last iteration */
3011 if (is_exit)
3012 delta = 0;
3013 else
3014 delta = 1;
3015 widest_int new_i_bound = i_bound + delta;
3017 /* If an overflow occurred, ignore the result. */
3018 if (wi::ltu_p (new_i_bound, delta))
3019 return;
3021 if (upper && !is_exit)
3022 do_warn_aggressive_loop_optimizations (loop, new_i_bound, at_stmt);
3023 record_niter_bound (loop, new_i_bound, realistic, upper);
3026 /* Records the control iv analyzed in NITER for LOOP if the iv is valid
3027 and doesn't overflow. */
3029 static void
3030 record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
3032 struct control_iv *iv;
3034 if (!niter->control.base || !niter->control.step)
3035 return;
3037 if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
3038 return;
3040 iv = ggc_alloc<control_iv> ();
3041 iv->base = niter->control.base;
3042 iv->step = niter->control.step;
3043 iv->next = loop->control_ivs;
3044 loop->control_ivs = iv;
3046 return;
3049 /* Record the estimate on number of iterations of LOOP based on the fact that
3050 the induction variable BASE + STEP * i evaluated in STMT does not wrap and
3051 its values belong to the range <LOW, HIGH>. REALISTIC is true if the
3052 estimated number of iterations is expected to be close to the real one.
3053 UPPER is true if we are sure the induction variable does not wrap. */
3055 static void
3056 record_nonwrapping_iv (struct loop *loop, tree base, tree step, gimple *stmt,
3057 tree low, tree high, bool realistic, bool upper)
3059 tree niter_bound, extreme, delta;
3060 tree type = TREE_TYPE (base), unsigned_type;
3061 tree orig_base = base;
3063 if (TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3064 return;
3066 if (dump_file && (dump_flags & TDF_DETAILS))
3068 fprintf (dump_file, "Induction variable (");
3069 print_generic_expr (dump_file, TREE_TYPE (base), TDF_SLIM);
3070 fprintf (dump_file, ") ");
3071 print_generic_expr (dump_file, base, TDF_SLIM);
3072 fprintf (dump_file, " + ");
3073 print_generic_expr (dump_file, step, TDF_SLIM);
3074 fprintf (dump_file, " * iteration does not wrap in statement ");
3075 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3076 fprintf (dump_file, " in loop %d.\n", loop->num);
3079 unsigned_type = unsigned_type_for (type);
3080 base = fold_convert (unsigned_type, base);
3081 step = fold_convert (unsigned_type, step);
3083 if (tree_int_cst_sign_bit (step))
3085 wide_int min, max;
3086 extreme = fold_convert (unsigned_type, low);
3087 if (TREE_CODE (orig_base) == SSA_NAME
3088 && TREE_CODE (high) == INTEGER_CST
3089 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
3090 && get_range_info (orig_base, &min, &max) == VR_RANGE
3091 && wi::gts_p (high, max))
3092 base = wide_int_to_tree (unsigned_type, max);
3093 else if (TREE_CODE (base) != INTEGER_CST
3094 && dominated_by_p (CDI_DOMINATORS,
3095 loop->latch, gimple_bb (stmt)))
3096 base = fold_convert (unsigned_type, high);
3097 delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
3098 step = fold_build1 (NEGATE_EXPR, unsigned_type, step);
3100 else
3102 wide_int min, max;
3103 extreme = fold_convert (unsigned_type, high);
3104 if (TREE_CODE (orig_base) == SSA_NAME
3105 && TREE_CODE (low) == INTEGER_CST
3106 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base))
3107 && get_range_info (orig_base, &min, &max) == VR_RANGE
3108 && wi::gts_p (min, low))
3109 base = wide_int_to_tree (unsigned_type, min);
3110 else if (TREE_CODE (base) != INTEGER_CST
3111 && dominated_by_p (CDI_DOMINATORS,
3112 loop->latch, gimple_bb (stmt)))
3113 base = fold_convert (unsigned_type, low);
3114 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
3117 /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value
3118 would get out of the range. */
3119 niter_bound = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step);
3120 widest_int max = derive_constant_upper_bound (niter_bound);
3121 record_estimate (loop, niter_bound, max, stmt, false, realistic, upper);
3124 /* Determine information about number of iterations a LOOP from the index
3125 IDX of a data reference accessed in STMT. RELIABLE is true if STMT is
3126 guaranteed to be executed in every iteration of LOOP. Callback for
3127 for_each_index. */
3129 struct ilb_data
3131 struct loop *loop;
3132 gimple *stmt;
3135 static bool
3136 idx_infer_loop_bounds (tree base, tree *idx, void *dta)
3138 struct ilb_data *data = (struct ilb_data *) dta;
3139 tree ev, init, step;
3140 tree low, high, type, next;
3141 bool sign, upper = true, at_end = false;
3142 struct loop *loop = data->loop;
3144 if (TREE_CODE (base) != ARRAY_REF)
3145 return true;
3147 /* For arrays at the end of the structure, we are not guaranteed that they
3148 do not really extend over their declared size. However, for arrays of
3149 size greater than one, this is unlikely to be intended. */
3150 if (array_at_struct_end_p (base))
3152 at_end = true;
3153 upper = false;
3156 struct loop *dloop = loop_containing_stmt (data->stmt);
3157 if (!dloop)
3158 return true;
3160 ev = analyze_scalar_evolution (dloop, *idx);
3161 ev = instantiate_parameters (loop, ev);
3162 init = initial_condition (ev);
3163 step = evolution_part_in_loop_num (ev, loop->num);
3165 if (!init
3166 || !step
3167 || TREE_CODE (step) != INTEGER_CST
3168 || integer_zerop (step)
3169 || tree_contains_chrecs (init, NULL)
3170 || chrec_contains_symbols_defined_in_loop (init, loop->num))
3171 return true;
3173 low = array_ref_low_bound (base);
3174 high = array_ref_up_bound (base);
3176 /* The case of nonconstant bounds could be handled, but it would be
3177 complicated. */
3178 if (TREE_CODE (low) != INTEGER_CST
3179 || !high
3180 || TREE_CODE (high) != INTEGER_CST)
3181 return true;
3182 sign = tree_int_cst_sign_bit (step);
3183 type = TREE_TYPE (step);
3185 /* The array of length 1 at the end of a structure most likely extends
3186 beyond its bounds. */
3187 if (at_end
3188 && operand_equal_p (low, high, 0))
3189 return true;
3191 /* In case the relevant bound of the array does not fit in type, or
3192 it does, but bound + step (in type) still belongs into the range of the
3193 array, the index may wrap and still stay within the range of the array
3194 (consider e.g. if the array is indexed by the full range of
3195 unsigned char).
3197 To make things simpler, we require both bounds to fit into type, although
3198 there are cases where this would not be strictly necessary. */
3199 if (!int_fits_type_p (high, type)
3200 || !int_fits_type_p (low, type))
3201 return true;
3202 low = fold_convert (type, low);
3203 high = fold_convert (type, high);
3205 if (sign)
3206 next = fold_binary (PLUS_EXPR, type, low, step);
3207 else
3208 next = fold_binary (PLUS_EXPR, type, high, step);
3210 if (tree_int_cst_compare (low, next) <= 0
3211 && tree_int_cst_compare (next, high) <= 0)
3212 return true;
3214 /* If access is not executed on every iteration, we must ensure that overlow
3215 may not make the access valid later. */
3216 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt))
3217 && scev_probably_wraps_p (NULL_TREE,
3218 initial_condition_in_loop_num (ev, loop->num),
3219 step, data->stmt, loop, true))
3220 upper = false;
3222 record_nonwrapping_iv (loop, init, step, data->stmt, low, high, false, upper);
3223 return true;
3226 /* Determine information about number of iterations a LOOP from the bounds
3227 of arrays in the data reference REF accessed in STMT. RELIABLE is true if
3228 STMT is guaranteed to be executed in every iteration of LOOP.*/
3230 static void
3231 infer_loop_bounds_from_ref (struct loop *loop, gimple *stmt, tree ref)
3233 struct ilb_data data;
3235 data.loop = loop;
3236 data.stmt = stmt;
3237 for_each_index (&ref, idx_infer_loop_bounds, &data);
3240 /* Determine information about number of iterations of a LOOP from the way
3241 arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be
3242 executed in every iteration of LOOP. */
3244 static void
3245 infer_loop_bounds_from_array (struct loop *loop, gimple *stmt)
3247 if (is_gimple_assign (stmt))
3249 tree op0 = gimple_assign_lhs (stmt);
3250 tree op1 = gimple_assign_rhs1 (stmt);
3252 /* For each memory access, analyze its access function
3253 and record a bound on the loop iteration domain. */
3254 if (REFERENCE_CLASS_P (op0))
3255 infer_loop_bounds_from_ref (loop, stmt, op0);
3257 if (REFERENCE_CLASS_P (op1))
3258 infer_loop_bounds_from_ref (loop, stmt, op1);
3260 else if (is_gimple_call (stmt))
3262 tree arg, lhs;
3263 unsigned i, n = gimple_call_num_args (stmt);
3265 lhs = gimple_call_lhs (stmt);
3266 if (lhs && REFERENCE_CLASS_P (lhs))
3267 infer_loop_bounds_from_ref (loop, stmt, lhs);
3269 for (i = 0; i < n; i++)
3271 arg = gimple_call_arg (stmt, i);
3272 if (REFERENCE_CLASS_P (arg))
3273 infer_loop_bounds_from_ref (loop, stmt, arg);
3278 /* Determine information about number of iterations of a LOOP from the fact
3279 that pointer arithmetics in STMT does not overflow. */
3281 static void
3282 infer_loop_bounds_from_pointer_arith (struct loop *loop, gimple *stmt)
3284 tree def, base, step, scev, type, low, high;
3285 tree var, ptr;
3287 if (!is_gimple_assign (stmt)
3288 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
3289 return;
3291 def = gimple_assign_lhs (stmt);
3292 if (TREE_CODE (def) != SSA_NAME)
3293 return;
3295 type = TREE_TYPE (def);
3296 if (!nowrap_type_p (type))
3297 return;
3299 ptr = gimple_assign_rhs1 (stmt);
3300 if (!expr_invariant_in_loop_p (loop, ptr))
3301 return;
3303 var = gimple_assign_rhs2 (stmt);
3304 if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var)))
3305 return;
3307 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3308 if (chrec_contains_undetermined (scev))
3309 return;
3311 base = initial_condition_in_loop_num (scev, loop->num);
3312 step = evolution_part_in_loop_num (scev, loop->num);
3314 if (!base || !step
3315 || TREE_CODE (step) != INTEGER_CST
3316 || tree_contains_chrecs (base, NULL)
3317 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3318 return;
3320 low = lower_bound_in_type (type, type);
3321 high = upper_bound_in_type (type, type);
3323 /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot
3324 produce a NULL pointer. The contrary would mean NULL points to an object,
3325 while NULL is supposed to compare unequal with the address of all objects.
3326 Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a
3327 NULL pointer since that would mean wrapping, which we assume here not to
3328 happen. So, we can exclude NULL from the valid range of pointer
3329 arithmetic. */
3330 if (flag_delete_null_pointer_checks && int_cst_value (low) == 0)
3331 low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type)));
3333 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3336 /* Determine information about number of iterations of a LOOP from the fact
3337 that signed arithmetics in STMT does not overflow. */
3339 static void
3340 infer_loop_bounds_from_signedness (struct loop *loop, gimple *stmt)
3342 tree def, base, step, scev, type, low, high;
3344 if (gimple_code (stmt) != GIMPLE_ASSIGN)
3345 return;
3347 def = gimple_assign_lhs (stmt);
3349 if (TREE_CODE (def) != SSA_NAME)
3350 return;
3352 type = TREE_TYPE (def);
3353 if (!INTEGRAL_TYPE_P (type)
3354 || !TYPE_OVERFLOW_UNDEFINED (type))
3355 return;
3357 scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
3358 if (chrec_contains_undetermined (scev))
3359 return;
3361 base = initial_condition_in_loop_num (scev, loop->num);
3362 step = evolution_part_in_loop_num (scev, loop->num);
3364 if (!base || !step
3365 || TREE_CODE (step) != INTEGER_CST
3366 || tree_contains_chrecs (base, NULL)
3367 || chrec_contains_symbols_defined_in_loop (base, loop->num))
3368 return;
3370 low = lower_bound_in_type (type, type);
3371 high = upper_bound_in_type (type, type);
3373 record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true);
3376 /* The following analyzers are extracting informations on the bounds
3377 of LOOP from the following undefined behaviors:
3379 - data references should not access elements over the statically
3380 allocated size,
3382 - signed variables should not overflow when flag_wrapv is not set.
3385 static void
3386 infer_loop_bounds_from_undefined (struct loop *loop)
3388 unsigned i;
3389 basic_block *bbs;
3390 gimple_stmt_iterator bsi;
3391 basic_block bb;
3392 bool reliable;
3394 bbs = get_loop_body (loop);
3396 for (i = 0; i < loop->num_nodes; i++)
3398 bb = bbs[i];
3400 /* If BB is not executed in each iteration of the loop, we cannot
3401 use the operations in it to infer reliable upper bound on the
3402 # of iterations of the loop. However, we can use it as a guess.
3403 Reliable guesses come only from array bounds. */
3404 reliable = dominated_by_p (CDI_DOMINATORS, loop->latch, bb);
3406 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
3408 gimple *stmt = gsi_stmt (bsi);
3410 infer_loop_bounds_from_array (loop, stmt);
3412 if (reliable)
3414 infer_loop_bounds_from_signedness (loop, stmt);
3415 infer_loop_bounds_from_pointer_arith (loop, stmt);
3421 free (bbs);
3424 /* Compare wide ints, callback for qsort. */
3426 static int
3427 wide_int_cmp (const void *p1, const void *p2)
3429 const widest_int *d1 = (const widest_int *) p1;
3430 const widest_int *d2 = (const widest_int *) p2;
3431 return wi::cmpu (*d1, *d2);
3434 /* Return index of BOUND in BOUNDS array sorted in increasing order.
3435 Lookup by binary search. */
3437 static int
3438 bound_index (vec<widest_int> bounds, const widest_int &bound)
3440 unsigned int end = bounds.length ();
3441 unsigned int begin = 0;
3443 /* Find a matching index by means of a binary search. */
3444 while (begin != end)
3446 unsigned int middle = (begin + end) / 2;
3447 widest_int index = bounds[middle];
3449 if (index == bound)
3450 return middle;
3451 else if (wi::ltu_p (index, bound))
3452 begin = middle + 1;
3453 else
3454 end = middle;
3456 gcc_unreachable ();
3459 /* We recorded loop bounds only for statements dominating loop latch (and thus
3460 executed each loop iteration). If there are any bounds on statements not
3461 dominating the loop latch we can improve the estimate by walking the loop
3462 body and seeing if every path from loop header to loop latch contains
3463 some bounded statement. */
3465 static void
3466 discover_iteration_bound_by_body_walk (struct loop *loop)
3468 struct nb_iter_bound *elt;
3469 auto_vec<widest_int> bounds;
3470 vec<vec<basic_block> > queues = vNULL;
3471 vec<basic_block> queue = vNULL;
3472 ptrdiff_t queue_index;
3473 ptrdiff_t latch_index = 0;
3475 /* Discover what bounds may interest us. */
3476 for (elt = loop->bounds; elt; elt = elt->next)
3478 widest_int bound = elt->bound;
3480 /* Exit terminates loop at given iteration, while non-exits produce undefined
3481 effect on the next iteration. */
3482 if (!elt->is_exit)
3484 bound += 1;
3485 /* If an overflow occurred, ignore the result. */
3486 if (bound == 0)
3487 continue;
3490 if (!loop->any_upper_bound
3491 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3492 bounds.safe_push (bound);
3495 /* Exit early if there is nothing to do. */
3496 if (!bounds.exists ())
3497 return;
3499 if (dump_file && (dump_flags & TDF_DETAILS))
3500 fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
3502 /* Sort the bounds in decreasing order. */
3503 bounds.qsort (wide_int_cmp);
3505 /* For every basic block record the lowest bound that is guaranteed to
3506 terminate the loop. */
3508 hash_map<basic_block, ptrdiff_t> bb_bounds;
3509 for (elt = loop->bounds; elt; elt = elt->next)
3511 widest_int bound = elt->bound;
3512 if (!elt->is_exit)
3514 bound += 1;
3515 /* If an overflow occurred, ignore the result. */
3516 if (bound == 0)
3517 continue;
3520 if (!loop->any_upper_bound
3521 || wi::ltu_p (bound, loop->nb_iterations_upper_bound))
3523 ptrdiff_t index = bound_index (bounds, bound);
3524 ptrdiff_t *entry = bb_bounds.get (gimple_bb (elt->stmt));
3525 if (!entry)
3526 bb_bounds.put (gimple_bb (elt->stmt), index);
3527 else if ((ptrdiff_t)*entry > index)
3528 *entry = index;
3532 hash_map<basic_block, ptrdiff_t> block_priority;
3534 /* Perform shortest path discovery loop->header ... loop->latch.
3536 The "distance" is given by the smallest loop bound of basic block
3537 present in the path and we look for path with largest smallest bound
3538 on it.
3540 To avoid the need for fibonacci heap on double ints we simply compress
3541 double ints into indexes to BOUNDS array and then represent the queue
3542 as arrays of queues for every index.
3543 Index of BOUNDS.length() means that the execution of given BB has
3544 no bounds determined.
3546 VISITED is a pointer map translating basic block into smallest index
3547 it was inserted into the priority queue with. */
3548 latch_index = -1;
3550 /* Start walk in loop header with index set to infinite bound. */
3551 queue_index = bounds.length ();
3552 queues.safe_grow_cleared (queue_index + 1);
3553 queue.safe_push (loop->header);
3554 queues[queue_index] = queue;
3555 block_priority.put (loop->header, queue_index);
3557 for (; queue_index >= 0; queue_index--)
3559 if (latch_index < queue_index)
3561 while (queues[queue_index].length ())
3563 basic_block bb;
3564 ptrdiff_t bound_index = queue_index;
3565 edge e;
3566 edge_iterator ei;
3568 queue = queues[queue_index];
3569 bb = queue.pop ();
3571 /* OK, we later inserted the BB with lower priority, skip it. */
3572 if (*block_priority.get (bb) > queue_index)
3573 continue;
3575 /* See if we can improve the bound. */
3576 ptrdiff_t *entry = bb_bounds.get (bb);
3577 if (entry && *entry < bound_index)
3578 bound_index = *entry;
3580 /* Insert succesors into the queue, watch for latch edge
3581 and record greatest index we saw. */
3582 FOR_EACH_EDGE (e, ei, bb->succs)
3584 bool insert = false;
3586 if (loop_exit_edge_p (loop, e))
3587 continue;
3589 if (e == loop_latch_edge (loop)
3590 && latch_index < bound_index)
3591 latch_index = bound_index;
3592 else if (!(entry = block_priority.get (e->dest)))
3594 insert = true;
3595 block_priority.put (e->dest, bound_index);
3597 else if (*entry < bound_index)
3599 insert = true;
3600 *entry = bound_index;
3603 if (insert)
3604 queues[bound_index].safe_push (e->dest);
3608 queues[queue_index].release ();
3611 gcc_assert (latch_index >= 0);
3612 if ((unsigned)latch_index < bounds.length ())
3614 if (dump_file && (dump_flags & TDF_DETAILS))
3616 fprintf (dump_file, "Found better loop bound ");
3617 print_decu (bounds[latch_index], dump_file);
3618 fprintf (dump_file, "\n");
3620 record_niter_bound (loop, bounds[latch_index], false, true);
3623 queues.release ();
3626 /* See if every path cross the loop goes through a statement that is known
3627 to not execute at the last iteration. In that case we can decrese iteration
3628 count by 1. */
3630 static void
3631 maybe_lower_iteration_bound (struct loop *loop)
3633 hash_set<gimple *> *not_executed_last_iteration = NULL;
3634 struct nb_iter_bound *elt;
3635 bool found_exit = false;
3636 auto_vec<basic_block> queue;
3637 bitmap visited;
3639 /* Collect all statements with interesting (i.e. lower than
3640 nb_iterations_upper_bound) bound on them.
3642 TODO: Due to the way record_estimate choose estimates to store, the bounds
3643 will be always nb_iterations_upper_bound-1. We can change this to record
3644 also statements not dominating the loop latch and update the walk bellow
3645 to the shortest path algorthm. */
3646 for (elt = loop->bounds; elt; elt = elt->next)
3648 if (!elt->is_exit
3649 && wi::ltu_p (elt->bound, loop->nb_iterations_upper_bound))
3651 if (!not_executed_last_iteration)
3652 not_executed_last_iteration = new hash_set<gimple *>;
3653 not_executed_last_iteration->add (elt->stmt);
3656 if (!not_executed_last_iteration)
3657 return;
3659 /* Start DFS walk in the loop header and see if we can reach the
3660 loop latch or any of the exits (including statements with side
3661 effects that may terminate the loop otherwise) without visiting
3662 any of the statements known to have undefined effect on the last
3663 iteration. */
3664 queue.safe_push (loop->header);
3665 visited = BITMAP_ALLOC (NULL);
3666 bitmap_set_bit (visited, loop->header->index);
3667 found_exit = false;
3671 basic_block bb = queue.pop ();
3672 gimple_stmt_iterator gsi;
3673 bool stmt_found = false;
3675 /* Loop for possible exits and statements bounding the execution. */
3676 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3678 gimple *stmt = gsi_stmt (gsi);
3679 if (not_executed_last_iteration->contains (stmt))
3681 stmt_found = true;
3682 break;
3684 if (gimple_has_side_effects (stmt))
3686 found_exit = true;
3687 break;
3690 if (found_exit)
3691 break;
3693 /* If no bounding statement is found, continue the walk. */
3694 if (!stmt_found)
3696 edge e;
3697 edge_iterator ei;
3699 FOR_EACH_EDGE (e, ei, bb->succs)
3701 if (loop_exit_edge_p (loop, e)
3702 || e == loop_latch_edge (loop))
3704 found_exit = true;
3705 break;
3707 if (bitmap_set_bit (visited, e->dest->index))
3708 queue.safe_push (e->dest);
3712 while (queue.length () && !found_exit);
3714 /* If every path through the loop reach bounding statement before exit,
3715 then we know the last iteration of the loop will have undefined effect
3716 and we can decrease number of iterations. */
3718 if (!found_exit)
3720 if (dump_file && (dump_flags & TDF_DETAILS))
3721 fprintf (dump_file, "Reducing loop iteration estimate by 1; "
3722 "undefined statement must be executed at the last iteration.\n");
3723 record_niter_bound (loop, loop->nb_iterations_upper_bound - 1,
3724 false, true);
3727 BITMAP_FREE (visited);
3728 delete not_executed_last_iteration;
3731 /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P
3732 is true also use estimates derived from undefined behavior. */
3734 static void
3735 estimate_numbers_of_iterations_loop (struct loop *loop)
3737 vec<edge> exits;
3738 tree niter, type;
3739 unsigned i;
3740 struct tree_niter_desc niter_desc;
3741 edge ex;
3742 widest_int bound;
3743 edge likely_exit;
3745 /* Give up if we already have tried to compute an estimation. */
3746 if (loop->estimate_state != EST_NOT_COMPUTED)
3747 return;
3749 loop->estimate_state = EST_AVAILABLE;
3751 /* If we have a measured profile, use it to estimate the number of
3752 iterations. Normally this is recorded by branch_prob right after
3753 reading the profile. In case we however found a new loop, record the
3754 information here.
3756 Explicitly check for profile status so we do not report
3757 wrong prediction hitrates for guessed loop iterations heuristics.
3758 Do not recompute already recorded bounds - we ought to be better on
3759 updating iteration bounds than updating profile in general and thus
3760 recomputing iteration bounds later in the compilation process will just
3761 introduce random roundoff errors. */
3762 if (!loop->any_estimate
3763 && loop->header->count != 0
3764 && profile_status_for_fn (cfun) >= PROFILE_READ)
3766 gcov_type nit = expected_loop_iterations_unbounded (loop);
3767 bound = gcov_type_to_wide_int (nit);
3768 record_niter_bound (loop, bound, true, false);
3771 /* Ensure that loop->nb_iterations is computed if possible. If it turns out
3772 to be constant, we avoid undefined behavior implied bounds and instead
3773 diagnose those loops with -Waggressive-loop-optimizations. */
3774 number_of_latch_executions (loop);
3776 exits = get_loop_exit_edges (loop);
3777 likely_exit = single_likely_exit (loop);
3778 FOR_EACH_VEC_ELT (exits, i, ex)
3780 if (!number_of_iterations_exit (loop, ex, &niter_desc, false, false))
3781 continue;
3783 niter = niter_desc.niter;
3784 type = TREE_TYPE (niter);
3785 if (TREE_CODE (niter_desc.may_be_zero) != INTEGER_CST)
3786 niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
3787 build_int_cst (type, 0),
3788 niter);
3789 record_estimate (loop, niter, niter_desc.max,
3790 last_stmt (ex->src),
3791 true, ex == likely_exit, true);
3792 record_control_iv (loop, &niter_desc);
3794 exits.release ();
3796 if (flag_aggressive_loop_optimizations)
3797 infer_loop_bounds_from_undefined (loop);
3799 discover_iteration_bound_by_body_walk (loop);
3801 maybe_lower_iteration_bound (loop);
3803 /* If we know the exact number of iterations of this loop, try to
3804 not break code with undefined behavior by not recording smaller
3805 maximum number of iterations. */
3806 if (loop->nb_iterations
3807 && TREE_CODE (loop->nb_iterations) == INTEGER_CST)
3809 loop->any_upper_bound = true;
3810 loop->nb_iterations_upper_bound = wi::to_widest (loop->nb_iterations);
3814 /* Sets NIT to the estimated number of executions of the latch of the
3815 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
3816 large as the number of iterations. If we have no reliable estimate,
3817 the function returns false, otherwise returns true. */
3819 bool
3820 estimated_loop_iterations (struct loop *loop, widest_int *nit)
3822 /* When SCEV information is available, try to update loop iterations
3823 estimate. Otherwise just return whatever we recorded earlier. */
3824 if (scev_initialized_p ())
3825 estimate_numbers_of_iterations_loop (loop);
3827 return (get_estimated_loop_iterations (loop, nit));
3830 /* Similar to estimated_loop_iterations, but returns the estimate only
3831 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3832 on the number of iterations of LOOP could not be derived, returns -1. */
3834 HOST_WIDE_INT
3835 estimated_loop_iterations_int (struct loop *loop)
3837 widest_int nit;
3838 HOST_WIDE_INT hwi_nit;
3840 if (!estimated_loop_iterations (loop, &nit))
3841 return -1;
3843 if (!wi::fits_shwi_p (nit))
3844 return -1;
3845 hwi_nit = nit.to_shwi ();
3847 return hwi_nit < 0 ? -1 : hwi_nit;
3851 /* Sets NIT to an upper bound for the maximum number of executions of the
3852 latch of the LOOP. If we have no reliable estimate, the function returns
3853 false, otherwise returns true. */
3855 bool
3856 max_loop_iterations (struct loop *loop, widest_int *nit)
3858 /* When SCEV information is available, try to update loop iterations
3859 estimate. Otherwise just return whatever we recorded earlier. */
3860 if (scev_initialized_p ())
3861 estimate_numbers_of_iterations_loop (loop);
3863 return get_max_loop_iterations (loop, nit);
3866 /* Similar to max_loop_iterations, but returns the estimate only
3867 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3868 on the number of iterations of LOOP could not be derived, returns -1. */
3870 HOST_WIDE_INT
3871 max_loop_iterations_int (struct loop *loop)
3873 widest_int nit;
3874 HOST_WIDE_INT hwi_nit;
3876 if (!max_loop_iterations (loop, &nit))
3877 return -1;
3879 if (!wi::fits_shwi_p (nit))
3880 return -1;
3881 hwi_nit = nit.to_shwi ();
3883 return hwi_nit < 0 ? -1 : hwi_nit;
3886 /* Sets NIT to an likely upper bound for the maximum number of executions of the
3887 latch of the LOOP. If we have no reliable estimate, the function returns
3888 false, otherwise returns true. */
3890 bool
3891 likely_max_loop_iterations (struct loop *loop, widest_int *nit)
3893 /* When SCEV information is available, try to update loop iterations
3894 estimate. Otherwise just return whatever we recorded earlier. */
3895 if (scev_initialized_p ())
3896 estimate_numbers_of_iterations_loop (loop);
3898 return get_likely_max_loop_iterations (loop, nit);
3901 /* Similar to max_loop_iterations, but returns the estimate only
3902 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3903 on the number of iterations of LOOP could not be derived, returns -1. */
3905 HOST_WIDE_INT
3906 likely_max_loop_iterations_int (struct loop *loop)
3908 widest_int nit;
3909 HOST_WIDE_INT hwi_nit;
3911 if (!likely_max_loop_iterations (loop, &nit))
3912 return -1;
3914 if (!wi::fits_shwi_p (nit))
3915 return -1;
3916 hwi_nit = nit.to_shwi ();
3918 return hwi_nit < 0 ? -1 : hwi_nit;
3921 /* Returns an estimate for the number of executions of statements
3922 in the LOOP. For statements before the loop exit, this exceeds
3923 the number of execution of the latch by one. */
3925 HOST_WIDE_INT
3926 estimated_stmt_executions_int (struct loop *loop)
3928 HOST_WIDE_INT nit = estimated_loop_iterations_int (loop);
3929 HOST_WIDE_INT snit;
3931 if (nit == -1)
3932 return -1;
3934 snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1);
3936 /* If the computation overflows, return -1. */
3937 return snit < 0 ? -1 : snit;
3940 /* Sets NIT to the maximum number of executions of the latch of the
3941 LOOP, plus one. If we have no reliable estimate, the function returns
3942 false, otherwise returns true. */
3944 bool
3945 max_stmt_executions (struct loop *loop, widest_int *nit)
3947 widest_int nit_minus_one;
3949 if (!max_loop_iterations (loop, nit))
3950 return false;
3952 nit_minus_one = *nit;
3954 *nit += 1;
3956 return wi::gtu_p (*nit, nit_minus_one);
3959 /* Sets NIT to the estimated maximum number of executions of the latch of the
3960 LOOP, plus one. If we have no likely estimate, the function returns
3961 false, otherwise returns true. */
3963 bool
3964 likely_max_stmt_executions (struct loop *loop, widest_int *nit)
3966 widest_int nit_minus_one;
3968 if (!likely_max_loop_iterations (loop, nit))
3969 return false;
3971 nit_minus_one = *nit;
3973 *nit += 1;
3975 return wi::gtu_p (*nit, nit_minus_one);
3978 /* Sets NIT to the estimated number of executions of the latch of the
3979 LOOP, plus one. If we have no reliable estimate, the function returns
3980 false, otherwise returns true. */
3982 bool
3983 estimated_stmt_executions (struct loop *loop, widest_int *nit)
3985 widest_int nit_minus_one;
3987 if (!estimated_loop_iterations (loop, nit))
3988 return false;
3990 nit_minus_one = *nit;
3992 *nit += 1;
3994 return wi::gtu_p (*nit, nit_minus_one);
3997 /* Records estimates on numbers of iterations of loops. */
3999 void
4000 estimate_numbers_of_iterations (void)
4002 struct loop *loop;
4004 /* We don't want to issue signed overflow warnings while getting
4005 loop iteration estimates. */
4006 fold_defer_overflow_warnings ();
4008 FOR_EACH_LOOP (loop, 0)
4010 estimate_numbers_of_iterations_loop (loop);
4013 fold_undefer_and_ignore_overflow_warnings ();
4016 /* Returns true if statement S1 dominates statement S2. */
4018 bool
4019 stmt_dominates_stmt_p (gimple *s1, gimple *s2)
4021 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
4023 if (!bb1
4024 || s1 == s2)
4025 return true;
4027 if (bb1 == bb2)
4029 gimple_stmt_iterator bsi;
4031 if (gimple_code (s2) == GIMPLE_PHI)
4032 return false;
4034 if (gimple_code (s1) == GIMPLE_PHI)
4035 return true;
4037 for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
4038 if (gsi_stmt (bsi) == s1)
4039 return true;
4041 return false;
4044 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
4047 /* Returns true when we can prove that the number of executions of
4048 STMT in the loop is at most NITER, according to the bound on
4049 the number of executions of the statement NITER_BOUND->stmt recorded in
4050 NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
4052 ??? This code can become quite a CPU hog - we can have many bounds,
4053 and large basic block forcing stmt_dominates_stmt_p to be queried
4054 many times on a large basic blocks, so the whole thing is O(n^2)
4055 for scev_probably_wraps_p invocation (that can be done n times).
4057 It would make more sense (and give better answers) to remember BB
4058 bounds computed by discover_iteration_bound_by_body_walk. */
4060 static bool
4061 n_of_executions_at_most (gimple *stmt,
4062 struct nb_iter_bound *niter_bound,
4063 tree niter)
4065 widest_int bound = niter_bound->bound;
4066 tree nit_type = TREE_TYPE (niter), e;
4067 enum tree_code cmp;
4069 gcc_assert (TYPE_UNSIGNED (nit_type));
4071 /* If the bound does not even fit into NIT_TYPE, it cannot tell us that
4072 the number of iterations is small. */
4073 if (!wi::fits_to_tree_p (bound, nit_type))
4074 return false;
4076 /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
4077 times. This means that:
4079 -- if NITER_BOUND->is_exit is true, then everything after
4080 it at most NITER_BOUND->bound times.
4082 -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT
4083 is executed, then NITER_BOUND->stmt is executed as well in the same
4084 iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
4086 If we can determine that NITER_BOUND->stmt is always executed
4087 after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
4088 We conclude that if both statements belong to the same
4089 basic block and STMT is before NITER_BOUND->stmt and there are no
4090 statements with side effects in between. */
4092 if (niter_bound->is_exit)
4094 if (stmt == niter_bound->stmt
4095 || !stmt_dominates_stmt_p (niter_bound->stmt, stmt))
4096 return false;
4097 cmp = GE_EXPR;
4099 else
4101 if (!stmt_dominates_stmt_p (niter_bound->stmt, stmt))
4103 gimple_stmt_iterator bsi;
4104 if (gimple_bb (stmt) != gimple_bb (niter_bound->stmt)
4105 || gimple_code (stmt) == GIMPLE_PHI
4106 || gimple_code (niter_bound->stmt) == GIMPLE_PHI)
4107 return false;
4109 /* By stmt_dominates_stmt_p we already know that STMT appears
4110 before NITER_BOUND->STMT. Still need to test that the loop
4111 can not be terinated by a side effect in between. */
4112 for (bsi = gsi_for_stmt (stmt); gsi_stmt (bsi) != niter_bound->stmt;
4113 gsi_next (&bsi))
4114 if (gimple_has_side_effects (gsi_stmt (bsi)))
4115 return false;
4116 bound += 1;
4117 if (bound == 0
4118 || !wi::fits_to_tree_p (bound, nit_type))
4119 return false;
4121 cmp = GT_EXPR;
4124 e = fold_binary (cmp, boolean_type_node,
4125 niter, wide_int_to_tree (nit_type, bound));
4126 return e && integer_nonzerop (e);
4129 /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
4131 bool
4132 nowrap_type_p (tree type)
4134 if (ANY_INTEGRAL_TYPE_P (type)
4135 && TYPE_OVERFLOW_UNDEFINED (type))
4136 return true;
4138 if (POINTER_TYPE_P (type))
4139 return true;
4141 return false;
4144 /* Return true if we can prove LOOP is exited before evolution of induction
4145 variabled {BASE, STEP} overflows with respect to its type bound. */
4147 static bool
4148 loop_exits_before_overflow (tree base, tree step,
4149 gimple *at_stmt, struct loop *loop)
4151 widest_int niter;
4152 struct control_iv *civ;
4153 struct nb_iter_bound *bound;
4154 tree e, delta, step_abs, unsigned_base;
4155 tree type = TREE_TYPE (step);
4156 tree unsigned_type, valid_niter;
4158 /* Don't issue signed overflow warnings. */
4159 fold_defer_overflow_warnings ();
4161 /* Compute the number of iterations before we reach the bound of the
4162 type, and verify that the loop is exited before this occurs. */
4163 unsigned_type = unsigned_type_for (type);
4164 unsigned_base = fold_convert (unsigned_type, base);
4166 if (tree_int_cst_sign_bit (step))
4168 tree extreme = fold_convert (unsigned_type,
4169 lower_bound_in_type (type, type));
4170 delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
4171 step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
4172 fold_convert (unsigned_type, step));
4174 else
4176 tree extreme = fold_convert (unsigned_type,
4177 upper_bound_in_type (type, type));
4178 delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
4179 step_abs = fold_convert (unsigned_type, step);
4182 valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
4184 estimate_numbers_of_iterations_loop (loop);
4186 if (max_loop_iterations (loop, &niter)
4187 && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
4188 && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
4189 wide_int_to_tree (TREE_TYPE (valid_niter),
4190 niter))) != NULL
4191 && integer_nonzerop (e))
4193 fold_undefer_and_ignore_overflow_warnings ();
4194 return true;
4196 if (at_stmt)
4197 for (bound = loop->bounds; bound; bound = bound->next)
4199 if (n_of_executions_at_most (at_stmt, bound, valid_niter))
4201 fold_undefer_and_ignore_overflow_warnings ();
4202 return true;
4205 fold_undefer_and_ignore_overflow_warnings ();
4207 /* Try to prove loop is exited before {base, step} overflows with the
4208 help of analyzed loop control IV. This is done only for IVs with
4209 constant step because otherwise we don't have the information. */
4210 if (TREE_CODE (step) == INTEGER_CST)
4212 tree stop = (TREE_CODE (base) == SSA_NAME) ? base : NULL;
4214 for (civ = loop->control_ivs; civ; civ = civ->next)
4216 enum tree_code code;
4217 tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
4219 /* Have to consider type difference because operand_equal_p ignores
4220 that for constants. */
4221 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
4222 || element_precision (type) != element_precision (civ_type))
4223 continue;
4225 /* Only consider control IV with same step. */
4226 if (!operand_equal_p (step, civ->step, 0))
4227 continue;
4229 /* Done proving if this is a no-overflow control IV. */
4230 if (operand_equal_p (base, civ->base, 0)
4231 /* Control IV is recorded after expanding simple operations,
4232 Here we compare it against expanded base too. */
4233 || operand_equal_p (expand_simple_operations (base),
4234 civ->base, 0))
4235 return true;
4237 /* If this is a before stepping control IV, in other words, we have
4239 {civ_base, step} = {base + step, step}
4241 Because civ {base + step, step} doesn't overflow during loop
4242 iterations, {base, step} will not overflow if we can prove the
4243 operation "base + step" does not overflow. Specifically, we try
4244 to prove below conditions are satisfied:
4246 base <= UPPER_BOUND (type) - step ;;step > 0
4247 base >= LOWER_BOUND (type) - step ;;step < 0
4249 by proving the reverse conditions are false using loop's initial
4250 condition. */
4251 if (POINTER_TYPE_P (TREE_TYPE (base)))
4252 code = POINTER_PLUS_EXPR;
4253 else
4254 code = PLUS_EXPR;
4256 stepped = fold_build2 (code, TREE_TYPE (base), base, step);
4257 if (operand_equal_p (stepped, civ->base, 0))
4259 if (tree_int_cst_sign_bit (step))
4261 code = LT_EXPR;
4262 extreme = lower_bound_in_type (type, type);
4264 else
4266 code = GT_EXPR;
4267 extreme = upper_bound_in_type (type, type);
4269 extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
4270 e = fold_build2 (code, boolean_type_node, base, extreme);
4271 e = simplify_using_initial_conditions (loop, e, stop);
4272 if (integer_zerop (e))
4273 return true;
4278 return false;
4281 /* VAR is scev variable whose evolution part is constant STEP, this function
4282 proves that VAR can't overflow by using value range info. If VAR's value
4283 range is [MIN, MAX], it can be proven by:
4284 MAX + step doesn't overflow ; if step > 0
4286 MIN + step doesn't underflow ; if step < 0.
4288 We can only do this if var is computed in every loop iteration, i.e, var's
4289 definition has to dominate loop latch. Consider below example:
4292 unsigned int i;
4294 <bb 3>:
4296 <bb 4>:
4297 # RANGE [0, 4294967294] NONZERO 65535
4298 # i_21 = PHI <0(3), i_18(9)>
4299 if (i_21 != 0)
4300 goto <bb 6>;
4301 else
4302 goto <bb 8>;
4304 <bb 6>:
4305 # RANGE [0, 65533] NONZERO 65535
4306 _6 = i_21 + 4294967295;
4307 # RANGE [0, 65533] NONZERO 65535
4308 _7 = (long unsigned int) _6;
4309 # RANGE [0, 524264] NONZERO 524280
4310 _8 = _7 * 8;
4311 # PT = nonlocal escaped
4312 _9 = a_14 + _8;
4313 *_9 = 0;
4315 <bb 8>:
4316 # RANGE [1, 65535] NONZERO 65535
4317 i_18 = i_21 + 1;
4318 if (i_18 >= 65535)
4319 goto <bb 10>;
4320 else
4321 goto <bb 9>;
4323 <bb 9>:
4324 goto <bb 4>;
4326 <bb 10>:
4327 return;
4330 VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we
4331 can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value
4332 sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than
4333 (4294967295, 4294967296, ...). */
4335 static bool
4336 scev_var_range_cant_overflow (tree var, tree step, struct loop *loop)
4338 tree type;
4339 wide_int minv, maxv, diff, step_wi;
4340 enum value_range_type rtype;
4342 if (TREE_CODE (step) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (var)))
4343 return false;
4345 /* Check if VAR evaluates in every loop iteration. It's not the case
4346 if VAR is default definition or does not dominate loop's latch. */
4347 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
4348 if (!def_bb || !dominated_by_p (CDI_DOMINATORS, loop->latch, def_bb))
4349 return false;
4351 rtype = get_range_info (var, &minv, &maxv);
4352 if (rtype != VR_RANGE)
4353 return false;
4355 /* VAR is a scev whose evolution part is STEP and value range info
4356 is [MIN, MAX], we can prove its no-overflowness by conditions:
4358 type_MAX - MAX >= step ; if step > 0
4359 MIN - type_MIN >= |step| ; if step < 0.
4361 Or VAR must take value outside of value range, which is not true. */
4362 step_wi = step;
4363 type = TREE_TYPE (var);
4364 if (tree_int_cst_sign_bit (step))
4366 diff = lower_bound_in_type (type, type);
4367 diff = minv - diff;
4368 step_wi = - step_wi;
4370 else
4372 diff = upper_bound_in_type (type, type);
4373 diff = diff - maxv;
4376 return (wi::geu_p (diff, step_wi));
4379 /* Return false only when the induction variable BASE + STEP * I is
4380 known to not overflow: i.e. when the number of iterations is small
4381 enough with respect to the step and initial condition in order to
4382 keep the evolution confined in TYPEs bounds. Return true when the
4383 iv is known to overflow or when the property is not computable.
4385 USE_OVERFLOW_SEMANTICS is true if this function should assume that
4386 the rules for overflow of the given language apply (e.g., that signed
4387 arithmetics in C does not overflow).
4389 If VAR is a ssa variable, this function also returns false if VAR can
4390 be proven not overflow with value range info. */
4392 bool
4393 scev_probably_wraps_p (tree var, tree base, tree step,
4394 gimple *at_stmt, struct loop *loop,
4395 bool use_overflow_semantics)
4397 /* FIXME: We really need something like
4398 http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
4400 We used to test for the following situation that frequently appears
4401 during address arithmetics:
4403 D.1621_13 = (long unsigned intD.4) D.1620_12;
4404 D.1622_14 = D.1621_13 * 8;
4405 D.1623_15 = (doubleD.29 *) D.1622_14;
4407 And derived that the sequence corresponding to D_14
4408 can be proved to not wrap because it is used for computing a
4409 memory access; however, this is not really the case -- for example,
4410 if D_12 = (unsigned char) [254,+,1], then D_14 has values
4411 2032, 2040, 0, 8, ..., but the code is still legal. */
4413 if (chrec_contains_undetermined (base)
4414 || chrec_contains_undetermined (step))
4415 return true;
4417 if (integer_zerop (step))
4418 return false;
4420 /* If we can use the fact that signed and pointer arithmetics does not
4421 wrap, we are done. */
4422 if (use_overflow_semantics && nowrap_type_p (TREE_TYPE (base)))
4423 return false;
4425 /* To be able to use estimates on number of iterations of the loop,
4426 we must have an upper bound on the absolute value of the step. */
4427 if (TREE_CODE (step) != INTEGER_CST)
4428 return true;
4430 /* Check if var can be proven not overflow with value range info. */
4431 if (var && TREE_CODE (var) == SSA_NAME
4432 && scev_var_range_cant_overflow (var, step, loop))
4433 return false;
4435 if (loop_exits_before_overflow (base, step, at_stmt, loop))
4436 return false;
4438 /* At this point we still don't have a proof that the iv does not
4439 overflow: give up. */
4440 return true;
4443 /* Frees the information on upper bounds on numbers of iterations of LOOP. */
4445 void
4446 free_numbers_of_iterations_estimates_loop (struct loop *loop)
4448 struct control_iv *civ;
4449 struct nb_iter_bound *bound;
4451 loop->nb_iterations = NULL;
4452 loop->estimate_state = EST_NOT_COMPUTED;
4453 for (bound = loop->bounds; bound;)
4455 struct nb_iter_bound *next = bound->next;
4456 ggc_free (bound);
4457 bound = next;
4459 loop->bounds = NULL;
4461 for (civ = loop->control_ivs; civ;)
4463 struct control_iv *next = civ->next;
4464 ggc_free (civ);
4465 civ = next;
4467 loop->control_ivs = NULL;
4470 /* Frees the information on upper bounds on numbers of iterations of loops. */
4472 void
4473 free_numbers_of_iterations_estimates (function *fn)
4475 struct loop *loop;
4477 FOR_EACH_LOOP_FN (fn, loop, 0)
4479 free_numbers_of_iterations_estimates_loop (loop);
4483 /* Substitute value VAL for ssa name NAME inside expressions held
4484 at LOOP. */
4486 void
4487 substitute_in_loop_info (struct loop *loop, tree name, tree val)
4489 loop->nb_iterations = simplify_replace_tree (loop->nb_iterations, name, val);