1 /* Functions to determine/estimate number of iterations of a loop.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "gimple-pretty-print.h"
30 #include "diagnostic-core.h"
31 #include "stor-layout.h"
32 #include "fold-const.h"
36 #include "gimple-iterator.h"
38 #include "tree-ssa-loop-ivopts.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop.h"
42 #include "tree-chrec.h"
43 #include "tree-scalar-evolution.h"
47 /* The maximum number of dominator BBs we search for conditions
48 of loop header copies we use for simplifying a conditional
50 #define MAX_DOMINATORS_TO_WALK 8
54 Analysis of number of iterations of an affine exit test.
58 /* Bounds on some value, BELOW <= X <= UP. */
66 /* Splits expression EXPR to a variable part VAR and constant OFFSET. */
69 split_to_var_and_offset (tree expr
, tree
*var
, mpz_t offset
)
71 tree type
= TREE_TYPE (expr
);
76 mpz_set_ui (offset
, 0);
78 switch (TREE_CODE (expr
))
85 case POINTER_PLUS_EXPR
:
86 op0
= TREE_OPERAND (expr
, 0);
87 op1
= TREE_OPERAND (expr
, 1);
89 if (TREE_CODE (op1
) != INTEGER_CST
)
93 /* Always sign extend the offset. */
94 wi::to_mpz (op1
, offset
, SIGNED
);
96 mpz_neg (offset
, offset
);
100 *var
= build_int_cst_type (type
, 0);
101 wi::to_mpz (expr
, offset
, TYPE_SIGN (type
));
109 /* From condition C0 CMP C1 derives information regarding the value range
110 of VAR, which is of TYPE. Results are stored in to BELOW and UP. */
113 refine_value_range_using_guard (tree type
, tree var
,
114 tree c0
, enum tree_code cmp
, tree c1
,
115 mpz_t below
, mpz_t up
)
117 tree varc0
, varc1
, ctype
;
119 mpz_t mint
, maxt
, minc1
, maxc1
;
121 bool no_wrap
= nowrap_type_p (type
);
123 signop sgn
= TYPE_SIGN (type
);
131 STRIP_SIGN_NOPS (c0
);
132 STRIP_SIGN_NOPS (c1
);
133 ctype
= TREE_TYPE (c0
);
134 if (!useless_type_conversion_p (ctype
, type
))
140 /* We could derive quite precise information from EQ_EXPR, however,
141 such a guard is unlikely to appear, so we do not bother with
146 /* NE_EXPR comparisons do not contain much of useful information,
147 except for cases of comparing with bounds. */
148 if (TREE_CODE (c1
) != INTEGER_CST
149 || !INTEGRAL_TYPE_P (type
))
152 /* Ensure that the condition speaks about an expression in the same
154 ctype
= TREE_TYPE (c0
);
155 if (TYPE_PRECISION (ctype
) != TYPE_PRECISION (type
))
157 c0
= fold_convert (type
, c0
);
158 c1
= fold_convert (type
, c1
);
160 if (operand_equal_p (var
, c0
, 0))
164 /* Case of comparing VAR with its below/up bounds. */
166 wi::to_mpz (c1
, valc1
, TYPE_SIGN (type
));
167 if (mpz_cmp (valc1
, below
) == 0)
169 if (mpz_cmp (valc1
, up
) == 0)
176 /* Case of comparing with the bounds of the type. */
177 wide_int min
= wi::min_value (type
);
178 wide_int max
= wi::max_value (type
);
180 if (wi::eq_p (c1
, min
))
182 if (wi::eq_p (c1
, max
))
186 /* Quick return if no useful information. */
198 split_to_var_and_offset (expand_simple_operations (c0
), &varc0
, offc0
);
199 split_to_var_and_offset (expand_simple_operations (c1
), &varc1
, offc1
);
201 /* We are only interested in comparisons of expressions based on VAR. */
202 if (operand_equal_p (var
, varc1
, 0))
204 std::swap (varc0
, varc1
);
205 mpz_swap (offc0
, offc1
);
206 cmp
= swap_tree_comparison (cmp
);
208 else if (!operand_equal_p (var
, varc0
, 0))
217 get_type_static_bounds (type
, mint
, maxt
);
220 /* Setup range information for varc1. */
221 if (integer_zerop (varc1
))
223 wi::to_mpz (integer_zero_node
, minc1
, TYPE_SIGN (type
));
224 wi::to_mpz (integer_zero_node
, maxc1
, TYPE_SIGN (type
));
226 else if (TREE_CODE (varc1
) == SSA_NAME
227 && INTEGRAL_TYPE_P (type
)
228 && get_range_info (varc1
, &minv
, &maxv
) == VR_RANGE
)
230 gcc_assert (wi::le_p (minv
, maxv
, sgn
));
231 wi::to_mpz (minv
, minc1
, sgn
);
232 wi::to_mpz (maxv
, maxc1
, sgn
);
236 mpz_set (minc1
, mint
);
237 mpz_set (maxc1
, maxt
);
240 /* Compute valid range information for varc1 + offc1. Note nothing
241 useful can be derived if it overflows or underflows. Overflow or
242 underflow could happen when:
244 offc1 > 0 && varc1 + offc1 > MAX_VAL (type)
245 offc1 < 0 && varc1 + offc1 < MIN_VAL (type). */
246 mpz_add (minc1
, minc1
, offc1
);
247 mpz_add (maxc1
, maxc1
, offc1
);
249 || mpz_sgn (offc1
) == 0
250 || (mpz_sgn (offc1
) < 0 && mpz_cmp (minc1
, mint
) >= 0)
251 || (mpz_sgn (offc1
) > 0 && mpz_cmp (maxc1
, maxt
) <= 0));
255 if (mpz_cmp (minc1
, mint
) < 0)
256 mpz_set (minc1
, mint
);
257 if (mpz_cmp (maxc1
, maxt
) > 0)
258 mpz_set (maxc1
, maxt
);
263 mpz_sub_ui (maxc1
, maxc1
, 1);
268 mpz_add_ui (minc1
, minc1
, 1);
271 /* Compute range information for varc0. If there is no overflow,
272 the condition implied that
274 (varc0) cmp (varc1 + offc1 - offc0)
276 We can possibly improve the upper bound of varc0 if cmp is LE_EXPR,
277 or the below bound if cmp is GE_EXPR.
279 To prove there is no overflow/underflow, we need to check below
281 1) cmp == LE_EXPR && offc0 > 0
283 (varc0 + offc0) doesn't overflow
284 && (varc1 + offc1 - offc0) doesn't underflow
286 2) cmp == LE_EXPR && offc0 < 0
288 (varc0 + offc0) doesn't underflow
289 && (varc1 + offc1 - offc0) doesn't overfloe
291 In this case, (varc0 + offc0) will never underflow if we can
292 prove (varc1 + offc1 - offc0) doesn't overflow.
294 3) cmp == GE_EXPR && offc0 < 0
296 (varc0 + offc0) doesn't underflow
297 && (varc1 + offc1 - offc0) doesn't overflow
299 4) cmp == GE_EXPR && offc0 > 0
301 (varc0 + offc0) doesn't overflow
302 && (varc1 + offc1 - offc0) doesn't underflow
304 In this case, (varc0 + offc0) will never overflow if we can
305 prove (varc1 + offc1 - offc0) doesn't underflow.
307 Note we only handle case 2 and 4 in below code. */
309 mpz_sub (minc1
, minc1
, offc0
);
310 mpz_sub (maxc1
, maxc1
, offc0
);
312 || mpz_sgn (offc0
) == 0
314 && mpz_sgn (offc0
) < 0 && mpz_cmp (maxc1
, maxt
) <= 0)
316 && mpz_sgn (offc0
) > 0 && mpz_cmp (minc1
, mint
) >= 0));
322 if (mpz_cmp (up
, maxc1
) > 0)
327 if (mpz_cmp (below
, minc1
) < 0)
328 mpz_set (below
, minc1
);
340 /* Stores estimate on the minimum/maximum value of the expression VAR + OFF
341 in TYPE to MIN and MAX. */
344 determine_value_range (struct loop
*loop
, tree type
, tree var
, mpz_t off
,
345 mpz_t min
, mpz_t max
)
351 enum value_range_type rtype
= VR_VARYING
;
353 /* If the expression is a constant, we know its value exactly. */
354 if (integer_zerop (var
))
361 get_type_static_bounds (type
, min
, max
);
363 /* See if we have some range info from VRP. */
364 if (TREE_CODE (var
) == SSA_NAME
&& INTEGRAL_TYPE_P (type
))
366 edge e
= loop_preheader_edge (loop
);
367 signop sgn
= TYPE_SIGN (type
);
370 /* Either for VAR itself... */
371 rtype
= get_range_info (var
, &minv
, &maxv
);
372 /* Or for PHI results in loop->header where VAR is used as
373 PHI argument from the loop preheader edge. */
374 for (gsi
= gsi_start_phis (loop
->header
); !gsi_end_p (gsi
); gsi_next (&gsi
))
376 gphi
*phi
= gsi
.phi ();
378 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == var
379 && (get_range_info (gimple_phi_result (phi
), &minc
, &maxc
)
382 if (rtype
!= VR_RANGE
)
390 minv
= wi::max (minv
, minc
, sgn
);
391 maxv
= wi::min (maxv
, maxc
, sgn
);
392 /* If the PHI result range are inconsistent with
393 the VAR range, give up on looking at the PHI
394 results. This can happen if VR_UNDEFINED is
396 if (wi::gt_p (minv
, maxv
, sgn
))
398 rtype
= get_range_info (var
, &minv
, &maxv
);
406 if (rtype
!= VR_RANGE
)
413 gcc_assert (wi::le_p (minv
, maxv
, sgn
));
414 wi::to_mpz (minv
, minm
, sgn
);
415 wi::to_mpz (maxv
, maxm
, sgn
);
417 /* Now walk the dominators of the loop header and use the entry
418 guards to refine the estimates. */
419 for (bb
= loop
->header
;
420 bb
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
) && cnt
< MAX_DOMINATORS_TO_WALK
;
421 bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
))
428 if (!single_pred_p (bb
))
430 e
= single_pred_edge (bb
);
432 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
435 cond
= last_stmt (e
->src
);
436 c0
= gimple_cond_lhs (cond
);
437 cmp
= gimple_cond_code (cond
);
438 c1
= gimple_cond_rhs (cond
);
440 if (e
->flags
& EDGE_FALSE_VALUE
)
441 cmp
= invert_tree_comparison (cmp
, false);
443 refine_value_range_using_guard (type
, var
, c0
, cmp
, c1
, minm
, maxm
);
447 mpz_add (minm
, minm
, off
);
448 mpz_add (maxm
, maxm
, off
);
449 /* If the computation may not wrap or off is zero, then this
450 is always fine. If off is negative and minv + off isn't
451 smaller than type's minimum, or off is positive and
452 maxv + off isn't bigger than type's maximum, use the more
453 precise range too. */
454 if (nowrap_type_p (type
)
455 || mpz_sgn (off
) == 0
456 || (mpz_sgn (off
) < 0 && mpz_cmp (minm
, min
) >= 0)
457 || (mpz_sgn (off
) > 0 && mpz_cmp (maxm
, max
) <= 0))
469 /* If the computation may wrap, we know nothing about the value, except for
470 the range of the type. */
471 if (!nowrap_type_p (type
))
474 /* Since the addition of OFF does not wrap, if OFF is positive, then we may
475 add it to MIN, otherwise to MAX. */
476 if (mpz_sgn (off
) < 0)
477 mpz_add (max
, max
, off
);
479 mpz_add (min
, min
, off
);
482 /* Stores the bounds on the difference of the values of the expressions
483 (var + X) and (var + Y), computed in TYPE, to BNDS. */
486 bound_difference_of_offsetted_base (tree type
, mpz_t x
, mpz_t y
,
489 int rel
= mpz_cmp (x
, y
);
490 bool may_wrap
= !nowrap_type_p (type
);
493 /* If X == Y, then the expressions are always equal.
494 If X > Y, there are the following possibilities:
495 a) neither of var + X and var + Y overflow or underflow, or both of
496 them do. Then their difference is X - Y.
497 b) var + X overflows, and var + Y does not. Then the values of the
498 expressions are var + X - M and var + Y, where M is the range of
499 the type, and their difference is X - Y - M.
500 c) var + Y underflows and var + X does not. Their difference again
502 Therefore, if the arithmetics in type does not overflow, then the
503 bounds are (X - Y, X - Y), otherwise they are (X - Y - M, X - Y)
504 Similarly, if X < Y, the bounds are either (X - Y, X - Y) or
505 (X - Y, X - Y + M). */
509 mpz_set_ui (bnds
->below
, 0);
510 mpz_set_ui (bnds
->up
, 0);
515 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type
)), m
, UNSIGNED
);
516 mpz_add_ui (m
, m
, 1);
517 mpz_sub (bnds
->up
, x
, y
);
518 mpz_set (bnds
->below
, bnds
->up
);
523 mpz_sub (bnds
->below
, bnds
->below
, m
);
525 mpz_add (bnds
->up
, bnds
->up
, m
);
531 /* From condition C0 CMP C1 derives information regarding the
532 difference of values of VARX + OFFX and VARY + OFFY, computed in TYPE,
533 and stores it to BNDS. */
536 refine_bounds_using_guard (tree type
, tree varx
, mpz_t offx
,
537 tree vary
, mpz_t offy
,
538 tree c0
, enum tree_code cmp
, tree c1
,
541 tree varc0
, varc1
, ctype
;
542 mpz_t offc0
, offc1
, loffx
, loffy
, bnd
;
544 bool no_wrap
= nowrap_type_p (type
);
553 STRIP_SIGN_NOPS (c0
);
554 STRIP_SIGN_NOPS (c1
);
555 ctype
= TREE_TYPE (c0
);
556 if (!useless_type_conversion_p (ctype
, type
))
562 /* We could derive quite precise information from EQ_EXPR, however, such
563 a guard is unlikely to appear, so we do not bother with handling
568 /* NE_EXPR comparisons do not contain much of useful information, except for
569 special case of comparing with the bounds of the type. */
570 if (TREE_CODE (c1
) != INTEGER_CST
571 || !INTEGRAL_TYPE_P (type
))
574 /* Ensure that the condition speaks about an expression in the same type
576 ctype
= TREE_TYPE (c0
);
577 if (TYPE_PRECISION (ctype
) != TYPE_PRECISION (type
))
579 c0
= fold_convert (type
, c0
);
580 c1
= fold_convert (type
, c1
);
582 if (TYPE_MIN_VALUE (type
)
583 && operand_equal_p (c1
, TYPE_MIN_VALUE (type
), 0))
588 if (TYPE_MAX_VALUE (type
)
589 && operand_equal_p (c1
, TYPE_MAX_VALUE (type
), 0))
602 split_to_var_and_offset (expand_simple_operations (c0
), &varc0
, offc0
);
603 split_to_var_and_offset (expand_simple_operations (c1
), &varc1
, offc1
);
605 /* We are only interested in comparisons of expressions based on VARX and
606 VARY. TODO -- we might also be able to derive some bounds from
607 expressions containing just one of the variables. */
609 if (operand_equal_p (varx
, varc1
, 0))
611 std::swap (varc0
, varc1
);
612 mpz_swap (offc0
, offc1
);
613 cmp
= swap_tree_comparison (cmp
);
616 if (!operand_equal_p (varx
, varc0
, 0)
617 || !operand_equal_p (vary
, varc1
, 0))
620 mpz_init_set (loffx
, offx
);
621 mpz_init_set (loffy
, offy
);
623 if (cmp
== GT_EXPR
|| cmp
== GE_EXPR
)
625 std::swap (varx
, vary
);
626 mpz_swap (offc0
, offc1
);
627 mpz_swap (loffx
, loffy
);
628 cmp
= swap_tree_comparison (cmp
);
632 /* If there is no overflow, the condition implies that
634 (VARX + OFFX) cmp (VARY + OFFY) + (OFFX - OFFY + OFFC1 - OFFC0).
636 The overflows and underflows may complicate things a bit; each
637 overflow decreases the appropriate offset by M, and underflow
638 increases it by M. The above inequality would not necessarily be
641 -- VARX + OFFX underflows and VARX + OFFC0 does not, or
642 VARX + OFFC0 overflows, but VARX + OFFX does not.
643 This may only happen if OFFX < OFFC0.
644 -- VARY + OFFY overflows and VARY + OFFC1 does not, or
645 VARY + OFFC1 underflows and VARY + OFFY does not.
646 This may only happen if OFFY > OFFC1. */
655 x_ok
= (integer_zerop (varx
)
656 || mpz_cmp (loffx
, offc0
) >= 0);
657 y_ok
= (integer_zerop (vary
)
658 || mpz_cmp (loffy
, offc1
) <= 0);
664 mpz_sub (bnd
, loffx
, loffy
);
665 mpz_add (bnd
, bnd
, offc1
);
666 mpz_sub (bnd
, bnd
, offc0
);
669 mpz_sub_ui (bnd
, bnd
, 1);
674 if (mpz_cmp (bnds
->below
, bnd
) < 0)
675 mpz_set (bnds
->below
, bnd
);
679 if (mpz_cmp (bnd
, bnds
->up
) < 0)
680 mpz_set (bnds
->up
, bnd
);
692 /* Stores the bounds on the value of the expression X - Y in LOOP to BNDS.
693 The subtraction is considered to be performed in arbitrary precision,
696 We do not attempt to be too clever regarding the value ranges of X and
697 Y; most of the time, they are just integers or ssa names offsetted by
698 integer. However, we try to use the information contained in the
699 comparisons before the loop (usually created by loop header copying). */
702 bound_difference (struct loop
*loop
, tree x
, tree y
, bounds
*bnds
)
704 tree type
= TREE_TYPE (x
);
707 mpz_t minx
, maxx
, miny
, maxy
;
715 /* Get rid of unnecessary casts, but preserve the value of
720 mpz_init (bnds
->below
);
724 split_to_var_and_offset (x
, &varx
, offx
);
725 split_to_var_and_offset (y
, &vary
, offy
);
727 if (!integer_zerop (varx
)
728 && operand_equal_p (varx
, vary
, 0))
730 /* Special case VARX == VARY -- we just need to compare the
731 offsets. The matters are a bit more complicated in the
732 case addition of offsets may wrap. */
733 bound_difference_of_offsetted_base (type
, offx
, offy
, bnds
);
737 /* Otherwise, use the value ranges to determine the initial
738 estimates on below and up. */
743 determine_value_range (loop
, type
, varx
, offx
, minx
, maxx
);
744 determine_value_range (loop
, type
, vary
, offy
, miny
, maxy
);
746 mpz_sub (bnds
->below
, minx
, maxy
);
747 mpz_sub (bnds
->up
, maxx
, miny
);
754 /* If both X and Y are constants, we cannot get any more precise. */
755 if (integer_zerop (varx
) && integer_zerop (vary
))
758 /* Now walk the dominators of the loop header and use the entry
759 guards to refine the estimates. */
760 for (bb
= loop
->header
;
761 bb
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
) && cnt
< MAX_DOMINATORS_TO_WALK
;
762 bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
))
764 if (!single_pred_p (bb
))
766 e
= single_pred_edge (bb
);
768 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
771 cond
= last_stmt (e
->src
);
772 c0
= gimple_cond_lhs (cond
);
773 cmp
= gimple_cond_code (cond
);
774 c1
= gimple_cond_rhs (cond
);
776 if (e
->flags
& EDGE_FALSE_VALUE
)
777 cmp
= invert_tree_comparison (cmp
, false);
779 refine_bounds_using_guard (type
, varx
, offx
, vary
, offy
,
789 /* Update the bounds in BNDS that restrict the value of X to the bounds
790 that restrict the value of X + DELTA. X can be obtained as a
791 difference of two values in TYPE. */
794 bounds_add (bounds
*bnds
, const widest_int
&delta
, tree type
)
799 wi::to_mpz (delta
, mdelta
, SIGNED
);
802 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type
)), max
, UNSIGNED
);
804 mpz_add (bnds
->up
, bnds
->up
, mdelta
);
805 mpz_add (bnds
->below
, bnds
->below
, mdelta
);
807 if (mpz_cmp (bnds
->up
, max
) > 0)
808 mpz_set (bnds
->up
, max
);
811 if (mpz_cmp (bnds
->below
, max
) < 0)
812 mpz_set (bnds
->below
, max
);
818 /* Update the bounds in BNDS that restrict the value of X to the bounds
819 that restrict the value of -X. */
822 bounds_negate (bounds
*bnds
)
826 mpz_init_set (tmp
, bnds
->up
);
827 mpz_neg (bnds
->up
, bnds
->below
);
828 mpz_neg (bnds
->below
, tmp
);
832 /* Returns inverse of X modulo 2^s, where MASK = 2^s-1. */
835 inverse (tree x
, tree mask
)
837 tree type
= TREE_TYPE (x
);
839 unsigned ctr
= tree_floor_log2 (mask
);
841 if (TYPE_PRECISION (type
) <= HOST_BITS_PER_WIDE_INT
)
843 unsigned HOST_WIDE_INT ix
;
844 unsigned HOST_WIDE_INT imask
;
845 unsigned HOST_WIDE_INT irslt
= 1;
847 gcc_assert (cst_and_fits_in_hwi (x
));
848 gcc_assert (cst_and_fits_in_hwi (mask
));
850 ix
= int_cst_value (x
);
851 imask
= int_cst_value (mask
);
860 rslt
= build_int_cst_type (type
, irslt
);
864 rslt
= build_int_cst (type
, 1);
867 rslt
= int_const_binop (MULT_EXPR
, rslt
, x
);
868 x
= int_const_binop (MULT_EXPR
, x
, x
);
870 rslt
= int_const_binop (BIT_AND_EXPR
, rslt
, mask
);
876 /* Derives the upper bound BND on the number of executions of loop with exit
877 condition S * i <> C. If NO_OVERFLOW is true, then the control variable of
878 the loop does not overflow. EXIT_MUST_BE_TAKEN is true if we are guaranteed
879 that the loop ends through this exit, i.e., the induction variable ever
880 reaches the value of C.
882 The value C is equal to final - base, where final and base are the final and
883 initial value of the actual induction variable in the analysed loop. BNDS
884 bounds the value of this difference when computed in signed type with
885 unbounded range, while the computation of C is performed in an unsigned
886 type with the range matching the range of the type of the induction variable.
887 In particular, BNDS.up contains an upper bound on C in the following cases:
888 -- if the iv must reach its final value without overflow, i.e., if
889 NO_OVERFLOW && EXIT_MUST_BE_TAKEN is true, or
890 -- if final >= base, which we know to hold when BNDS.below >= 0. */
893 number_of_iterations_ne_max (mpz_t bnd
, bool no_overflow
, tree c
, tree s
,
894 bounds
*bnds
, bool exit_must_be_taken
)
898 tree type
= TREE_TYPE (c
);
899 bool bnds_u_valid
= ((no_overflow
&& exit_must_be_taken
)
900 || mpz_sgn (bnds
->below
) >= 0);
903 || (TREE_CODE (c
) == INTEGER_CST
904 && TREE_CODE (s
) == INTEGER_CST
905 && wi::mod_trunc (c
, s
, TYPE_SIGN (type
)) == 0)
906 || (TYPE_OVERFLOW_UNDEFINED (type
)
907 && multiple_of_p (type
, c
, s
)))
909 /* If C is an exact multiple of S, then its value will be reached before
910 the induction variable overflows (unless the loop is exited in some
911 other way before). Note that the actual induction variable in the
912 loop (which ranges from base to final instead of from 0 to C) may
913 overflow, in which case BNDS.up will not be giving a correct upper
914 bound on C; thus, BNDS_U_VALID had to be computed in advance. */
916 exit_must_be_taken
= true;
919 /* If the induction variable can overflow, the number of iterations is at
920 most the period of the control variable (or infinite, but in that case
921 the whole # of iterations analysis will fail). */
924 max
= wi::mask
<widest_int
> (TYPE_PRECISION (type
) - wi::ctz (s
), false);
925 wi::to_mpz (max
, bnd
, UNSIGNED
);
929 /* Now we know that the induction variable does not overflow, so the loop
930 iterates at most (range of type / S) times. */
931 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type
)), bnd
, UNSIGNED
);
933 /* If the induction variable is guaranteed to reach the value of C before
935 if (exit_must_be_taken
)
937 /* ... then we can strengthen this to C / S, and possibly we can use
938 the upper bound on C given by BNDS. */
939 if (TREE_CODE (c
) == INTEGER_CST
)
940 wi::to_mpz (c
, bnd
, UNSIGNED
);
941 else if (bnds_u_valid
)
942 mpz_set (bnd
, bnds
->up
);
946 wi::to_mpz (s
, d
, UNSIGNED
);
947 mpz_fdiv_q (bnd
, bnd
, d
);
951 /* Determines number of iterations of loop whose ending condition
952 is IV <> FINAL. TYPE is the type of the iv. The number of
953 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
954 we know that the exit must be taken eventually, i.e., that the IV
955 ever reaches the value FINAL (we derived this earlier, and possibly set
956 NITER->assumptions to make sure this is the case). BNDS contains the
957 bounds on the difference FINAL - IV->base. */
960 number_of_iterations_ne (struct loop
*loop
, tree type
, affine_iv
*iv
,
961 tree final
, struct tree_niter_desc
*niter
,
962 bool exit_must_be_taken
, bounds
*bnds
)
964 tree niter_type
= unsigned_type_for (type
);
965 tree s
, c
, d
, bits
, assumption
, tmp
, bound
;
968 niter
->control
= *iv
;
969 niter
->bound
= final
;
970 niter
->cmp
= NE_EXPR
;
972 /* Rearrange the terms so that we get inequality S * i <> C, with S
973 positive. Also cast everything to the unsigned type. If IV does
974 not overflow, BNDS bounds the value of C. Also, this is the
975 case if the computation |FINAL - IV->base| does not overflow, i.e.,
976 if BNDS->below in the result is nonnegative. */
977 if (tree_int_cst_sign_bit (iv
->step
))
979 s
= fold_convert (niter_type
,
980 fold_build1 (NEGATE_EXPR
, type
, iv
->step
));
981 c
= fold_build2 (MINUS_EXPR
, niter_type
,
982 fold_convert (niter_type
, iv
->base
),
983 fold_convert (niter_type
, final
));
984 bounds_negate (bnds
);
988 s
= fold_convert (niter_type
, iv
->step
);
989 c
= fold_build2 (MINUS_EXPR
, niter_type
,
990 fold_convert (niter_type
, final
),
991 fold_convert (niter_type
, iv
->base
));
995 number_of_iterations_ne_max (max
, iv
->no_overflow
, c
, s
, bnds
,
997 niter
->max
= widest_int::from (wi::from_mpz (niter_type
, max
, false),
998 TYPE_SIGN (niter_type
));
1001 /* Compute no-overflow information for the control iv. This can be
1002 proven when below two conditions are satisfied:
1004 1) IV evaluates toward FINAL at beginning, i.e:
1005 base <= FINAL ; step > 0
1006 base >= FINAL ; step < 0
1008 2) |FINAL - base| is an exact multiple of step.
1010 Unfortunately, it's hard to prove above conditions after pass loop-ch
1011 because loop with exit condition (IV != FINAL) usually will be guarded
1012 by initial-condition (IV.base - IV.step != FINAL). In this case, we
1013 can alternatively try to prove below conditions:
1015 1') IV evaluates toward FINAL at beginning, i.e:
1016 new_base = base - step < FINAL ; step > 0
1017 && base - step doesn't underflow
1018 new_base = base - step > FINAL ; step < 0
1019 && base - step doesn't overflow
1021 2') |FINAL - new_base| is an exact multiple of step.
1023 Please refer to PR34114 as an example of loop-ch's impact, also refer
1024 to PR72817 as an example why condition 2') is necessary.
1026 Note, for NE_EXPR, base equals to FINAL is a special case, in
1027 which the loop exits immediately, and the iv does not overflow. */
1028 if (!niter
->control
.no_overflow
1029 && (integer_onep (s
) || multiple_of_p (type
, c
, s
)))
1031 tree t
, cond
, new_c
, relaxed_cond
= boolean_false_node
;
1033 if (tree_int_cst_sign_bit (iv
->step
))
1035 cond
= fold_build2 (GE_EXPR
, boolean_type_node
, iv
->base
, final
);
1036 if (TREE_CODE (type
) == INTEGER_TYPE
)
1038 /* Only when base - step doesn't overflow. */
1039 t
= TYPE_MAX_VALUE (type
);
1040 t
= fold_build2 (PLUS_EXPR
, type
, t
, iv
->step
);
1041 t
= fold_build2 (GE_EXPR
, boolean_type_node
, t
, iv
->base
);
1042 if (integer_nonzerop (t
))
1044 t
= fold_build2 (MINUS_EXPR
, type
, iv
->base
, iv
->step
);
1045 new_c
= fold_build2 (MINUS_EXPR
, niter_type
,
1046 fold_convert (niter_type
, t
),
1047 fold_convert (niter_type
, final
));
1048 if (multiple_of_p (type
, new_c
, s
))
1049 relaxed_cond
= fold_build2 (GT_EXPR
, boolean_type_node
,
1056 cond
= fold_build2 (LE_EXPR
, boolean_type_node
, iv
->base
, final
);
1057 if (TREE_CODE (type
) == INTEGER_TYPE
)
1059 /* Only when base - step doesn't underflow. */
1060 t
= TYPE_MIN_VALUE (type
);
1061 t
= fold_build2 (PLUS_EXPR
, type
, t
, iv
->step
);
1062 t
= fold_build2 (LE_EXPR
, boolean_type_node
, t
, iv
->base
);
1063 if (integer_nonzerop (t
))
1065 t
= fold_build2 (MINUS_EXPR
, type
, iv
->base
, iv
->step
);
1066 new_c
= fold_build2 (MINUS_EXPR
, niter_type
,
1067 fold_convert (niter_type
, final
),
1068 fold_convert (niter_type
, t
));
1069 if (multiple_of_p (type
, new_c
, s
))
1070 relaxed_cond
= fold_build2 (LT_EXPR
, boolean_type_node
,
1076 t
= simplify_using_initial_conditions (loop
, cond
);
1077 if (!t
|| !integer_onep (t
))
1078 t
= simplify_using_initial_conditions (loop
, relaxed_cond
);
1080 if (t
&& integer_onep (t
))
1081 niter
->control
.no_overflow
= true;
1084 /* First the trivial cases -- when the step is 1. */
1085 if (integer_onep (s
))
1090 if (niter
->control
.no_overflow
&& multiple_of_p (type
, c
, s
))
1092 niter
->niter
= fold_build2 (FLOOR_DIV_EXPR
, niter_type
, c
, s
);
1096 /* Let nsd (step, size of mode) = d. If d does not divide c, the loop
1097 is infinite. Otherwise, the number of iterations is
1098 (inverse(s/d) * (c/d)) mod (size of mode/d). */
1099 bits
= num_ending_zeros (s
);
1100 bound
= build_low_bits_mask (niter_type
,
1101 (TYPE_PRECISION (niter_type
)
1102 - tree_to_uhwi (bits
)));
1104 d
= fold_binary_to_constant (LSHIFT_EXPR
, niter_type
,
1105 build_int_cst (niter_type
, 1), bits
);
1106 s
= fold_binary_to_constant (RSHIFT_EXPR
, niter_type
, s
, bits
);
1108 if (!exit_must_be_taken
)
1110 /* If we cannot assume that the exit is taken eventually, record the
1111 assumptions for divisibility of c. */
1112 assumption
= fold_build2 (FLOOR_MOD_EXPR
, niter_type
, c
, d
);
1113 assumption
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1114 assumption
, build_int_cst (niter_type
, 0));
1115 if (!integer_nonzerop (assumption
))
1116 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1117 niter
->assumptions
, assumption
);
1120 c
= fold_build2 (EXACT_DIV_EXPR
, niter_type
, c
, d
);
1121 tmp
= fold_build2 (MULT_EXPR
, niter_type
, c
, inverse (s
, bound
));
1122 niter
->niter
= fold_build2 (BIT_AND_EXPR
, niter_type
, tmp
, bound
);
1126 /* Checks whether we can determine the final value of the control variable
1127 of the loop with ending condition IV0 < IV1 (computed in TYPE).
1128 DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
1129 of the step. The assumptions necessary to ensure that the computation
1130 of the final value does not overflow are recorded in NITER. If we
1131 find the final value, we adjust DELTA and return TRUE. Otherwise
1132 we return false. BNDS bounds the value of IV1->base - IV0->base,
1133 and will be updated by the same amount as DELTA. EXIT_MUST_BE_TAKEN is
1134 true if we know that the exit must be taken eventually. */
1137 number_of_iterations_lt_to_ne (tree type
, affine_iv
*iv0
, affine_iv
*iv1
,
1138 struct tree_niter_desc
*niter
,
1139 tree
*delta
, tree step
,
1140 bool exit_must_be_taken
, bounds
*bnds
)
1142 tree niter_type
= TREE_TYPE (step
);
1143 tree mod
= fold_build2 (FLOOR_MOD_EXPR
, niter_type
, *delta
, step
);
1146 tree assumption
= boolean_true_node
, bound
, noloop
;
1147 bool ret
= false, fv_comp_no_overflow
;
1149 if (POINTER_TYPE_P (type
))
1152 if (TREE_CODE (mod
) != INTEGER_CST
)
1154 if (integer_nonzerop (mod
))
1155 mod
= fold_build2 (MINUS_EXPR
, niter_type
, step
, mod
);
1156 tmod
= fold_convert (type1
, mod
);
1159 wi::to_mpz (mod
, mmod
, UNSIGNED
);
1160 mpz_neg (mmod
, mmod
);
1162 /* If the induction variable does not overflow and the exit is taken,
1163 then the computation of the final value does not overflow. This is
1164 also obviously the case if the new final value is equal to the
1165 current one. Finally, we postulate this for pointer type variables,
1166 as the code cannot rely on the object to that the pointer points being
1167 placed at the end of the address space (and more pragmatically,
1168 TYPE_{MIN,MAX}_VALUE is not defined for pointers). */
1169 if (integer_zerop (mod
) || POINTER_TYPE_P (type
))
1170 fv_comp_no_overflow
= true;
1171 else if (!exit_must_be_taken
)
1172 fv_comp_no_overflow
= false;
1174 fv_comp_no_overflow
=
1175 (iv0
->no_overflow
&& integer_nonzerop (iv0
->step
))
1176 || (iv1
->no_overflow
&& integer_nonzerop (iv1
->step
));
1178 if (integer_nonzerop (iv0
->step
))
1180 /* The final value of the iv is iv1->base + MOD, assuming that this
1181 computation does not overflow, and that
1182 iv0->base <= iv1->base + MOD. */
1183 if (!fv_comp_no_overflow
)
1185 bound
= fold_build2 (MINUS_EXPR
, type1
,
1186 TYPE_MAX_VALUE (type1
), tmod
);
1187 assumption
= fold_build2 (LE_EXPR
, boolean_type_node
,
1189 if (integer_zerop (assumption
))
1192 if (mpz_cmp (mmod
, bnds
->below
) < 0)
1193 noloop
= boolean_false_node
;
1194 else if (POINTER_TYPE_P (type
))
1195 noloop
= fold_build2 (GT_EXPR
, boolean_type_node
,
1197 fold_build_pointer_plus (iv1
->base
, tmod
));
1199 noloop
= fold_build2 (GT_EXPR
, boolean_type_node
,
1201 fold_build2 (PLUS_EXPR
, type1
,
1206 /* The final value of the iv is iv0->base - MOD, assuming that this
1207 computation does not overflow, and that
1208 iv0->base - MOD <= iv1->base. */
1209 if (!fv_comp_no_overflow
)
1211 bound
= fold_build2 (PLUS_EXPR
, type1
,
1212 TYPE_MIN_VALUE (type1
), tmod
);
1213 assumption
= fold_build2 (GE_EXPR
, boolean_type_node
,
1215 if (integer_zerop (assumption
))
1218 if (mpz_cmp (mmod
, bnds
->below
) < 0)
1219 noloop
= boolean_false_node
;
1220 else if (POINTER_TYPE_P (type
))
1221 noloop
= fold_build2 (GT_EXPR
, boolean_type_node
,
1222 fold_build_pointer_plus (iv0
->base
,
1223 fold_build1 (NEGATE_EXPR
,
1227 noloop
= fold_build2 (GT_EXPR
, boolean_type_node
,
1228 fold_build2 (MINUS_EXPR
, type1
,
1233 if (!integer_nonzerop (assumption
))
1234 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1237 if (!integer_zerop (noloop
))
1238 niter
->may_be_zero
= fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1241 bounds_add (bnds
, wi::to_widest (mod
), type
);
1242 *delta
= fold_build2 (PLUS_EXPR
, niter_type
, *delta
, mod
);
1250 /* Add assertions to NITER that ensure that the control variable of the loop
1251 with ending condition IV0 < IV1 does not overflow. Types of IV0 and IV1
1252 are TYPE. Returns false if we can prove that there is an overflow, true
1253 otherwise. STEP is the absolute value of the step. */
1256 assert_no_overflow_lt (tree type
, affine_iv
*iv0
, affine_iv
*iv1
,
1257 struct tree_niter_desc
*niter
, tree step
)
1259 tree bound
, d
, assumption
, diff
;
1260 tree niter_type
= TREE_TYPE (step
);
1262 if (integer_nonzerop (iv0
->step
))
1264 /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
1265 if (iv0
->no_overflow
)
1268 /* If iv0->base is a constant, we can determine the last value before
1269 overflow precisely; otherwise we conservatively assume
1272 if (TREE_CODE (iv0
->base
) == INTEGER_CST
)
1274 d
= fold_build2 (MINUS_EXPR
, niter_type
,
1275 fold_convert (niter_type
, TYPE_MAX_VALUE (type
)),
1276 fold_convert (niter_type
, iv0
->base
));
1277 diff
= fold_build2 (FLOOR_MOD_EXPR
, niter_type
, d
, step
);
1280 diff
= fold_build2 (MINUS_EXPR
, niter_type
, step
,
1281 build_int_cst (niter_type
, 1));
1282 bound
= fold_build2 (MINUS_EXPR
, type
,
1283 TYPE_MAX_VALUE (type
), fold_convert (type
, diff
));
1284 assumption
= fold_build2 (LE_EXPR
, boolean_type_node
,
1289 /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
1290 if (iv1
->no_overflow
)
1293 if (TREE_CODE (iv1
->base
) == INTEGER_CST
)
1295 d
= fold_build2 (MINUS_EXPR
, niter_type
,
1296 fold_convert (niter_type
, iv1
->base
),
1297 fold_convert (niter_type
, TYPE_MIN_VALUE (type
)));
1298 diff
= fold_build2 (FLOOR_MOD_EXPR
, niter_type
, d
, step
);
1301 diff
= fold_build2 (MINUS_EXPR
, niter_type
, step
,
1302 build_int_cst (niter_type
, 1));
1303 bound
= fold_build2 (PLUS_EXPR
, type
,
1304 TYPE_MIN_VALUE (type
), fold_convert (type
, diff
));
1305 assumption
= fold_build2 (GE_EXPR
, boolean_type_node
,
1309 if (integer_zerop (assumption
))
1311 if (!integer_nonzerop (assumption
))
1312 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1313 niter
->assumptions
, assumption
);
1315 iv0
->no_overflow
= true;
1316 iv1
->no_overflow
= true;
1320 /* Add an assumption to NITER that a loop whose ending condition
1321 is IV0 < IV1 rolls. TYPE is the type of the control iv. BNDS
1322 bounds the value of IV1->base - IV0->base. */
1325 assert_loop_rolls_lt (tree type
, affine_iv
*iv0
, affine_iv
*iv1
,
1326 struct tree_niter_desc
*niter
, bounds
*bnds
)
1328 tree assumption
= boolean_true_node
, bound
, diff
;
1329 tree mbz
, mbzl
, mbzr
, type1
;
1330 bool rolls_p
, no_overflow_p
;
1334 /* We are going to compute the number of iterations as
1335 (iv1->base - iv0->base + step - 1) / step, computed in the unsigned
1336 variant of TYPE. This formula only works if
1338 -step + 1 <= (iv1->base - iv0->base) <= MAX - step + 1
1340 (where MAX is the maximum value of the unsigned variant of TYPE, and
1341 the computations in this formula are performed in full precision,
1342 i.e., without overflows).
1344 Usually, for loops with exit condition iv0->base + step * i < iv1->base,
1345 we have a condition of the form iv0->base - step < iv1->base before the loop,
1346 and for loops iv0->base < iv1->base - step * i the condition
1347 iv0->base < iv1->base + step, due to loop header copying, which enable us
1348 to prove the lower bound.
1350 The upper bound is more complicated. Unless the expressions for initial
1351 and final value themselves contain enough information, we usually cannot
1352 derive it from the context. */
1354 /* First check whether the answer does not follow from the bounds we gathered
1356 if (integer_nonzerop (iv0
->step
))
1357 dstep
= wi::to_widest (iv0
->step
);
1360 dstep
= wi::sext (wi::to_widest (iv1
->step
), TYPE_PRECISION (type
));
1365 wi::to_mpz (dstep
, mstep
, UNSIGNED
);
1366 mpz_neg (mstep
, mstep
);
1367 mpz_add_ui (mstep
, mstep
, 1);
1369 rolls_p
= mpz_cmp (mstep
, bnds
->below
) <= 0;
1372 wi::to_mpz (wi::minus_one (TYPE_PRECISION (type
)), max
, UNSIGNED
);
1373 mpz_add (max
, max
, mstep
);
1374 no_overflow_p
= (mpz_cmp (bnds
->up
, max
) <= 0
1375 /* For pointers, only values lying inside a single object
1376 can be compared or manipulated by pointer arithmetics.
1377 Gcc in general does not allow or handle objects larger
1378 than half of the address space, hence the upper bound
1379 is satisfied for pointers. */
1380 || POINTER_TYPE_P (type
));
1384 if (rolls_p
&& no_overflow_p
)
1388 if (POINTER_TYPE_P (type
))
1391 /* Now the hard part; we must formulate the assumption(s) as expressions, and
1392 we must be careful not to introduce overflow. */
1394 if (integer_nonzerop (iv0
->step
))
1396 diff
= fold_build2 (MINUS_EXPR
, type1
,
1397 iv0
->step
, build_int_cst (type1
, 1));
1399 /* We need to know that iv0->base >= MIN + iv0->step - 1. Since
1400 0 address never belongs to any object, we can assume this for
1402 if (!POINTER_TYPE_P (type
))
1404 bound
= fold_build2 (PLUS_EXPR
, type1
,
1405 TYPE_MIN_VALUE (type
), diff
);
1406 assumption
= fold_build2 (GE_EXPR
, boolean_type_node
,
1410 /* And then we can compute iv0->base - diff, and compare it with
1412 mbzl
= fold_build2 (MINUS_EXPR
, type1
,
1413 fold_convert (type1
, iv0
->base
), diff
);
1414 mbzr
= fold_convert (type1
, iv1
->base
);
1418 diff
= fold_build2 (PLUS_EXPR
, type1
,
1419 iv1
->step
, build_int_cst (type1
, 1));
1421 if (!POINTER_TYPE_P (type
))
1423 bound
= fold_build2 (PLUS_EXPR
, type1
,
1424 TYPE_MAX_VALUE (type
), diff
);
1425 assumption
= fold_build2 (LE_EXPR
, boolean_type_node
,
1429 mbzl
= fold_convert (type1
, iv0
->base
);
1430 mbzr
= fold_build2 (MINUS_EXPR
, type1
,
1431 fold_convert (type1
, iv1
->base
), diff
);
1434 if (!integer_nonzerop (assumption
))
1435 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1436 niter
->assumptions
, assumption
);
1439 mbz
= fold_build2 (GT_EXPR
, boolean_type_node
, mbzl
, mbzr
);
1440 niter
->may_be_zero
= fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
1441 niter
->may_be_zero
, mbz
);
1445 /* Determines number of iterations of loop whose ending condition
1446 is IV0 < IV1. TYPE is the type of the iv. The number of
1447 iterations is stored to NITER. BNDS bounds the difference
1448 IV1->base - IV0->base. EXIT_MUST_BE_TAKEN is true if we know
1449 that the exit must be taken eventually. */
1452 number_of_iterations_lt (struct loop
*loop
, tree type
, affine_iv
*iv0
,
1453 affine_iv
*iv1
, struct tree_niter_desc
*niter
,
1454 bool exit_must_be_taken
, bounds
*bnds
)
1456 tree niter_type
= unsigned_type_for (type
);
1457 tree delta
, step
, s
;
1460 if (integer_nonzerop (iv0
->step
))
1462 niter
->control
= *iv0
;
1463 niter
->cmp
= LT_EXPR
;
1464 niter
->bound
= iv1
->base
;
1468 niter
->control
= *iv1
;
1469 niter
->cmp
= GT_EXPR
;
1470 niter
->bound
= iv0
->base
;
1473 delta
= fold_build2 (MINUS_EXPR
, niter_type
,
1474 fold_convert (niter_type
, iv1
->base
),
1475 fold_convert (niter_type
, iv0
->base
));
1477 /* First handle the special case that the step is +-1. */
1478 if ((integer_onep (iv0
->step
) && integer_zerop (iv1
->step
))
1479 || (integer_all_onesp (iv1
->step
) && integer_zerop (iv0
->step
)))
1481 /* for (i = iv0->base; i < iv1->base; i++)
1485 for (i = iv1->base; i > iv0->base; i--).
1487 In both cases # of iterations is iv1->base - iv0->base, assuming that
1488 iv1->base >= iv0->base.
1490 First try to derive a lower bound on the value of
1491 iv1->base - iv0->base, computed in full precision. If the difference
1492 is nonnegative, we are done, otherwise we must record the
1495 if (mpz_sgn (bnds
->below
) < 0)
1496 niter
->may_be_zero
= fold_build2 (LT_EXPR
, boolean_type_node
,
1497 iv1
->base
, iv0
->base
);
1498 niter
->niter
= delta
;
1499 niter
->max
= widest_int::from (wi::from_mpz (niter_type
, bnds
->up
, false),
1500 TYPE_SIGN (niter_type
));
1501 niter
->control
.no_overflow
= true;
1505 if (integer_nonzerop (iv0
->step
))
1506 step
= fold_convert (niter_type
, iv0
->step
);
1508 step
= fold_convert (niter_type
,
1509 fold_build1 (NEGATE_EXPR
, type
, iv1
->step
));
1511 /* If we can determine the final value of the control iv exactly, we can
1512 transform the condition to != comparison. In particular, this will be
1513 the case if DELTA is constant. */
1514 if (number_of_iterations_lt_to_ne (type
, iv0
, iv1
, niter
, &delta
, step
,
1515 exit_must_be_taken
, bnds
))
1519 zps
.base
= build_int_cst (niter_type
, 0);
1521 /* number_of_iterations_lt_to_ne will add assumptions that ensure that
1522 zps does not overflow. */
1523 zps
.no_overflow
= true;
1525 return number_of_iterations_ne (loop
, type
, &zps
,
1526 delta
, niter
, true, bnds
);
1529 /* Make sure that the control iv does not overflow. */
1530 if (!assert_no_overflow_lt (type
, iv0
, iv1
, niter
, step
))
1533 /* We determine the number of iterations as (delta + step - 1) / step. For
1534 this to work, we must know that iv1->base >= iv0->base - step + 1,
1535 otherwise the loop does not roll. */
1536 assert_loop_rolls_lt (type
, iv0
, iv1
, niter
, bnds
);
1538 s
= fold_build2 (MINUS_EXPR
, niter_type
,
1539 step
, build_int_cst (niter_type
, 1));
1540 delta
= fold_build2 (PLUS_EXPR
, niter_type
, delta
, s
);
1541 niter
->niter
= fold_build2 (FLOOR_DIV_EXPR
, niter_type
, delta
, step
);
1545 wi::to_mpz (step
, mstep
, UNSIGNED
);
1546 mpz_add (tmp
, bnds
->up
, mstep
);
1547 mpz_sub_ui (tmp
, tmp
, 1);
1548 mpz_fdiv_q (tmp
, tmp
, mstep
);
1549 niter
->max
= widest_int::from (wi::from_mpz (niter_type
, tmp
, false),
1550 TYPE_SIGN (niter_type
));
1557 /* Determines number of iterations of loop whose ending condition
1558 is IV0 <= IV1. TYPE is the type of the iv. The number of
1559 iterations is stored to NITER. EXIT_MUST_BE_TAKEN is true if
1560 we know that this condition must eventually become false (we derived this
1561 earlier, and possibly set NITER->assumptions to make sure this
1562 is the case). BNDS bounds the difference IV1->base - IV0->base. */
1565 number_of_iterations_le (struct loop
*loop
, tree type
, affine_iv
*iv0
,
1566 affine_iv
*iv1
, struct tree_niter_desc
*niter
,
1567 bool exit_must_be_taken
, bounds
*bnds
)
1571 if (POINTER_TYPE_P (type
))
1574 /* Say that IV0 is the control variable. Then IV0 <= IV1 iff
1575 IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
1576 value of the type. This we must know anyway, since if it is
1577 equal to this value, the loop rolls forever. We do not check
1578 this condition for pointer type ivs, as the code cannot rely on
1579 the object to that the pointer points being placed at the end of
1580 the address space (and more pragmatically, TYPE_{MIN,MAX}_VALUE is
1581 not defined for pointers). */
1583 if (!exit_must_be_taken
&& !POINTER_TYPE_P (type
))
1585 if (integer_nonzerop (iv0
->step
))
1586 assumption
= fold_build2 (NE_EXPR
, boolean_type_node
,
1587 iv1
->base
, TYPE_MAX_VALUE (type
));
1589 assumption
= fold_build2 (NE_EXPR
, boolean_type_node
,
1590 iv0
->base
, TYPE_MIN_VALUE (type
));
1592 if (integer_zerop (assumption
))
1594 if (!integer_nonzerop (assumption
))
1595 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
1596 niter
->assumptions
, assumption
);
1599 if (integer_nonzerop (iv0
->step
))
1601 if (POINTER_TYPE_P (type
))
1602 iv1
->base
= fold_build_pointer_plus_hwi (iv1
->base
, 1);
1604 iv1
->base
= fold_build2 (PLUS_EXPR
, type1
, iv1
->base
,
1605 build_int_cst (type1
, 1));
1607 else if (POINTER_TYPE_P (type
))
1608 iv0
->base
= fold_build_pointer_plus_hwi (iv0
->base
, -1);
1610 iv0
->base
= fold_build2 (MINUS_EXPR
, type1
,
1611 iv0
->base
, build_int_cst (type1
, 1));
1613 bounds_add (bnds
, 1, type1
);
1615 return number_of_iterations_lt (loop
, type
, iv0
, iv1
, niter
, exit_must_be_taken
,
1619 /* Dumps description of affine induction variable IV to FILE. */
1622 dump_affine_iv (FILE *file
, affine_iv
*iv
)
1624 if (!integer_zerop (iv
->step
))
1625 fprintf (file
, "[");
1627 print_generic_expr (dump_file
, iv
->base
, TDF_SLIM
);
1629 if (!integer_zerop (iv
->step
))
1631 fprintf (file
, ", + , ");
1632 print_generic_expr (dump_file
, iv
->step
, TDF_SLIM
);
1633 fprintf (file
, "]%s", iv
->no_overflow
? "(no_overflow)" : "");
1637 /* Determine the number of iterations according to condition (for staying
1638 inside loop) which compares two induction variables using comparison
1639 operator CODE. The induction variable on left side of the comparison
1640 is IV0, the right-hand side is IV1. Both induction variables must have
1641 type TYPE, which must be an integer or pointer type. The steps of the
1642 ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
1644 LOOP is the loop whose number of iterations we are determining.
1646 ONLY_EXIT is true if we are sure this is the only way the loop could be
1647 exited (including possibly non-returning function calls, exceptions, etc.)
1648 -- in this case we can use the information whether the control induction
1649 variables can overflow or not in a more efficient way.
1651 if EVERY_ITERATION is true, we know the test is executed on every iteration.
1653 The results (number of iterations and assumptions as described in
1654 comments at struct tree_niter_desc in tree-ssa-loop.h) are stored to NITER.
1655 Returns false if it fails to determine number of iterations, true if it
1656 was determined (possibly with some assumptions). */
1659 number_of_iterations_cond (struct loop
*loop
,
1660 tree type
, affine_iv
*iv0
, enum tree_code code
,
1661 affine_iv
*iv1
, struct tree_niter_desc
*niter
,
1662 bool only_exit
, bool every_iteration
)
1664 bool exit_must_be_taken
= false, ret
;
1667 /* If the test is not executed every iteration, wrapping may make the test
1669 TODO: the overflow case can be still used as unreliable estimate of upper
1670 bound. But we have no API to pass it down to number of iterations code
1671 and, at present, it will not use it anyway. */
1672 if (!every_iteration
1673 && (!iv0
->no_overflow
|| !iv1
->no_overflow
1674 || code
== NE_EXPR
|| code
== EQ_EXPR
))
1677 /* The meaning of these assumptions is this:
1679 then the rest of information does not have to be valid
1680 if may_be_zero then the loop does not roll, even if
1682 niter
->assumptions
= boolean_true_node
;
1683 niter
->may_be_zero
= boolean_false_node
;
1684 niter
->niter
= NULL_TREE
;
1686 niter
->bound
= NULL_TREE
;
1687 niter
->cmp
= ERROR_MARK
;
1689 /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
1690 the control variable is on lhs. */
1691 if (code
== GE_EXPR
|| code
== GT_EXPR
1692 || (code
== NE_EXPR
&& integer_zerop (iv0
->step
)))
1694 std::swap (iv0
, iv1
);
1695 code
= swap_tree_comparison (code
);
1698 if (POINTER_TYPE_P (type
))
1700 /* Comparison of pointers is undefined unless both iv0 and iv1 point
1701 to the same object. If they do, the control variable cannot wrap
1702 (as wrap around the bounds of memory will never return a pointer
1703 that would be guaranteed to point to the same object, even if we
1704 avoid undefined behavior by casting to size_t and back). */
1705 iv0
->no_overflow
= true;
1706 iv1
->no_overflow
= true;
1709 /* If the control induction variable does not overflow and the only exit
1710 from the loop is the one that we analyze, we know it must be taken
1714 if (!integer_zerop (iv0
->step
) && iv0
->no_overflow
)
1715 exit_must_be_taken
= true;
1716 else if (!integer_zerop (iv1
->step
) && iv1
->no_overflow
)
1717 exit_must_be_taken
= true;
1720 /* We can handle the case when neither of the sides of the comparison is
1721 invariant, provided that the test is NE_EXPR. This rarely occurs in
1722 practice, but it is simple enough to manage. */
1723 if (!integer_zerop (iv0
->step
) && !integer_zerop (iv1
->step
))
1725 tree step_type
= POINTER_TYPE_P (type
) ? sizetype
: type
;
1726 if (code
!= NE_EXPR
)
1729 iv0
->step
= fold_binary_to_constant (MINUS_EXPR
, step_type
,
1730 iv0
->step
, iv1
->step
);
1731 iv0
->no_overflow
= false;
1732 iv1
->step
= build_int_cst (step_type
, 0);
1733 iv1
->no_overflow
= true;
1736 /* If the result of the comparison is a constant, the loop is weird. More
1737 precise handling would be possible, but the situation is not common enough
1738 to waste time on it. */
1739 if (integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
))
1742 /* Ignore loops of while (i-- < 10) type. */
1743 if (code
!= NE_EXPR
)
1745 if (iv0
->step
&& tree_int_cst_sign_bit (iv0
->step
))
1748 if (!integer_zerop (iv1
->step
) && !tree_int_cst_sign_bit (iv1
->step
))
1752 /* If the loop exits immediately, there is nothing to do. */
1753 tree tem
= fold_binary (code
, boolean_type_node
, iv0
->base
, iv1
->base
);
1754 if (tem
&& integer_zerop (tem
))
1756 niter
->niter
= build_int_cst (unsigned_type_for (type
), 0);
1761 /* OK, now we know we have a senseful loop. Handle several cases, depending
1762 on what comparison operator is used. */
1763 bound_difference (loop
, iv1
->base
, iv0
->base
, &bnds
);
1765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1768 "Analyzing # of iterations of loop %d\n", loop
->num
);
1770 fprintf (dump_file
, " exit condition ");
1771 dump_affine_iv (dump_file
, iv0
);
1772 fprintf (dump_file
, " %s ",
1773 code
== NE_EXPR
? "!="
1774 : code
== LT_EXPR
? "<"
1776 dump_affine_iv (dump_file
, iv1
);
1777 fprintf (dump_file
, "\n");
1779 fprintf (dump_file
, " bounds on difference of bases: ");
1780 mpz_out_str (dump_file
, 10, bnds
.below
);
1781 fprintf (dump_file
, " ... ");
1782 mpz_out_str (dump_file
, 10, bnds
.up
);
1783 fprintf (dump_file
, "\n");
1789 gcc_assert (integer_zerop (iv1
->step
));
1790 ret
= number_of_iterations_ne (loop
, type
, iv0
, iv1
->base
, niter
,
1791 exit_must_be_taken
, &bnds
);
1795 ret
= number_of_iterations_lt (loop
, type
, iv0
, iv1
, niter
,
1796 exit_must_be_taken
, &bnds
);
1800 ret
= number_of_iterations_le (loop
, type
, iv0
, iv1
, niter
,
1801 exit_must_be_taken
, &bnds
);
1808 mpz_clear (bnds
.up
);
1809 mpz_clear (bnds
.below
);
1811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1815 fprintf (dump_file
, " result:\n");
1816 if (!integer_nonzerop (niter
->assumptions
))
1818 fprintf (dump_file
, " under assumptions ");
1819 print_generic_expr (dump_file
, niter
->assumptions
, TDF_SLIM
);
1820 fprintf (dump_file
, "\n");
1823 if (!integer_zerop (niter
->may_be_zero
))
1825 fprintf (dump_file
, " zero if ");
1826 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1827 fprintf (dump_file
, "\n");
1830 fprintf (dump_file
, " # of iterations ");
1831 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1832 fprintf (dump_file
, ", bounded by ");
1833 print_decu (niter
->max
, dump_file
);
1834 fprintf (dump_file
, "\n");
1837 fprintf (dump_file
, " failed\n\n");
1842 /* Substitute NEW for OLD in EXPR and fold the result. */
1845 simplify_replace_tree (tree expr
, tree old
, tree new_tree
)
1848 tree ret
= NULL_TREE
, e
, se
;
1853 /* Do not bother to replace constants. */
1854 if (CONSTANT_CLASS_P (old
))
1858 || operand_equal_p (expr
, old
, 0))
1859 return unshare_expr (new_tree
);
1864 n
= TREE_OPERAND_LENGTH (expr
);
1865 for (i
= 0; i
< n
; i
++)
1867 e
= TREE_OPERAND (expr
, i
);
1868 se
= simplify_replace_tree (e
, old
, new_tree
);
1873 ret
= copy_node (expr
);
1875 TREE_OPERAND (ret
, i
) = se
;
1878 return (ret
? fold (ret
) : expr
);
1881 /* Expand definitions of ssa names in EXPR as long as they are simple
1882 enough, and return the new expression. If STOP is specified, stop
1883 expanding if EXPR equals to it. */
1886 expand_simple_operations (tree expr
, tree stop
)
1889 tree ret
= NULL_TREE
, e
, ee
, e1
;
1890 enum tree_code code
;
1893 if (expr
== NULL_TREE
)
1896 if (is_gimple_min_invariant (expr
))
1899 code
= TREE_CODE (expr
);
1900 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1902 n
= TREE_OPERAND_LENGTH (expr
);
1903 for (i
= 0; i
< n
; i
++)
1905 e
= TREE_OPERAND (expr
, i
);
1906 ee
= expand_simple_operations (e
, stop
);
1911 ret
= copy_node (expr
);
1913 TREE_OPERAND (ret
, i
) = ee
;
1919 fold_defer_overflow_warnings ();
1921 fold_undefer_and_ignore_overflow_warnings ();
1925 /* Stop if it's not ssa name or the one we don't want to expand. */
1926 if (TREE_CODE (expr
) != SSA_NAME
|| expr
== stop
)
1929 stmt
= SSA_NAME_DEF_STMT (expr
);
1930 if (gimple_code (stmt
) == GIMPLE_PHI
)
1932 basic_block src
, dest
;
1934 if (gimple_phi_num_args (stmt
) != 1)
1936 e
= PHI_ARG_DEF (stmt
, 0);
1938 /* Avoid propagating through loop exit phi nodes, which
1939 could break loop-closed SSA form restrictions. */
1940 dest
= gimple_bb (stmt
);
1941 src
= single_pred (dest
);
1942 if (TREE_CODE (e
) == SSA_NAME
1943 && src
->loop_father
!= dest
->loop_father
)
1946 return expand_simple_operations (e
, stop
);
1948 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1951 /* Avoid expanding to expressions that contain SSA names that need
1952 to take part in abnormal coalescing. */
1954 FOR_EACH_SSA_TREE_OPERAND (e
, stmt
, iter
, SSA_OP_USE
)
1955 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (e
))
1958 e
= gimple_assign_rhs1 (stmt
);
1959 code
= gimple_assign_rhs_code (stmt
);
1960 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1962 if (is_gimple_min_invariant (e
))
1965 if (code
== SSA_NAME
)
1966 return expand_simple_operations (e
, stop
);
1974 /* Casts are simple. */
1975 ee
= expand_simple_operations (e
, stop
);
1976 return fold_build1 (code
, TREE_TYPE (expr
), ee
);
1980 if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1981 && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr
)))
1984 case POINTER_PLUS_EXPR
:
1985 /* And increments and decrements by a constant are simple. */
1986 e1
= gimple_assign_rhs2 (stmt
);
1987 if (!is_gimple_min_invariant (e1
))
1990 ee
= expand_simple_operations (e
, stop
);
1991 return fold_build2 (code
, TREE_TYPE (expr
), ee
, e1
);
1998 /* Tries to simplify EXPR using the condition COND. Returns the simplified
1999 expression (or EXPR unchanged, if no simplification was possible). */
2002 tree_simplify_using_condition_1 (tree cond
, tree expr
)
2005 tree e
, e0
, e1
, e2
, notcond
;
2006 enum tree_code code
= TREE_CODE (expr
);
2008 if (code
== INTEGER_CST
)
2011 if (code
== TRUTH_OR_EXPR
2012 || code
== TRUTH_AND_EXPR
2013 || code
== COND_EXPR
)
2017 e0
= tree_simplify_using_condition_1 (cond
, TREE_OPERAND (expr
, 0));
2018 if (TREE_OPERAND (expr
, 0) != e0
)
2021 e1
= tree_simplify_using_condition_1 (cond
, TREE_OPERAND (expr
, 1));
2022 if (TREE_OPERAND (expr
, 1) != e1
)
2025 if (code
== COND_EXPR
)
2027 e2
= tree_simplify_using_condition_1 (cond
, TREE_OPERAND (expr
, 2));
2028 if (TREE_OPERAND (expr
, 2) != e2
)
2036 if (code
== COND_EXPR
)
2037 expr
= fold_build3 (code
, boolean_type_node
, e0
, e1
, e2
);
2039 expr
= fold_build2 (code
, boolean_type_node
, e0
, e1
);
2045 /* In case COND is equality, we may be able to simplify EXPR by copy/constant
2046 propagation, and vice versa. Fold does not handle this, since it is
2047 considered too expensive. */
2048 if (TREE_CODE (cond
) == EQ_EXPR
)
2050 e0
= TREE_OPERAND (cond
, 0);
2051 e1
= TREE_OPERAND (cond
, 1);
2053 /* We know that e0 == e1. Check whether we cannot simplify expr
2055 e
= simplify_replace_tree (expr
, e0
, e1
);
2056 if (integer_zerop (e
) || integer_nonzerop (e
))
2059 e
= simplify_replace_tree (expr
, e1
, e0
);
2060 if (integer_zerop (e
) || integer_nonzerop (e
))
2063 if (TREE_CODE (expr
) == EQ_EXPR
)
2065 e0
= TREE_OPERAND (expr
, 0);
2066 e1
= TREE_OPERAND (expr
, 1);
2068 /* If e0 == e1 (EXPR) implies !COND, then EXPR cannot be true. */
2069 e
= simplify_replace_tree (cond
, e0
, e1
);
2070 if (integer_zerop (e
))
2072 e
= simplify_replace_tree (cond
, e1
, e0
);
2073 if (integer_zerop (e
))
2076 if (TREE_CODE (expr
) == NE_EXPR
)
2078 e0
= TREE_OPERAND (expr
, 0);
2079 e1
= TREE_OPERAND (expr
, 1);
2081 /* If e0 == e1 (!EXPR) implies !COND, then EXPR must be true. */
2082 e
= simplify_replace_tree (cond
, e0
, e1
);
2083 if (integer_zerop (e
))
2084 return boolean_true_node
;
2085 e
= simplify_replace_tree (cond
, e1
, e0
);
2086 if (integer_zerop (e
))
2087 return boolean_true_node
;
2090 /* Check whether COND ==> EXPR. */
2091 notcond
= invert_truthvalue (cond
);
2092 e
= fold_binary (TRUTH_OR_EXPR
, boolean_type_node
, notcond
, expr
);
2093 if (e
&& integer_nonzerop (e
))
2096 /* Check whether COND ==> not EXPR. */
2097 e
= fold_binary (TRUTH_AND_EXPR
, boolean_type_node
, cond
, expr
);
2098 if (e
&& integer_zerop (e
))
2104 /* Tries to simplify EXPR using the condition COND. Returns the simplified
2105 expression (or EXPR unchanged, if no simplification was possible).
2106 Wrapper around tree_simplify_using_condition_1 that ensures that chains
2107 of simple operations in definitions of ssa names in COND are expanded,
2108 so that things like casts or incrementing the value of the bound before
2109 the loop do not cause us to fail. */
2112 tree_simplify_using_condition (tree cond
, tree expr
)
2114 cond
= expand_simple_operations (cond
);
2116 return tree_simplify_using_condition_1 (cond
, expr
);
2119 /* Tries to simplify EXPR using the conditions on entry to LOOP.
2120 Returns the simplified expression (or EXPR unchanged, if no
2121 simplification was possible). */
2124 simplify_using_initial_conditions (struct loop
*loop
, tree expr
)
2129 tree cond
, expanded
, backup
;
2132 if (TREE_CODE (expr
) == INTEGER_CST
)
2135 backup
= expanded
= expand_simple_operations (expr
);
2137 /* Limit walking the dominators to avoid quadraticness in
2138 the number of BBs times the number of loops in degenerate
2140 for (bb
= loop
->header
;
2141 bb
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
) && cnt
< MAX_DOMINATORS_TO_WALK
;
2142 bb
= get_immediate_dominator (CDI_DOMINATORS
, bb
))
2144 if (!single_pred_p (bb
))
2146 e
= single_pred_edge (bb
);
2148 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2151 stmt
= last_stmt (e
->src
);
2152 cond
= fold_build2 (gimple_cond_code (stmt
),
2154 gimple_cond_lhs (stmt
),
2155 gimple_cond_rhs (stmt
));
2156 if (e
->flags
& EDGE_FALSE_VALUE
)
2157 cond
= invert_truthvalue (cond
);
2158 expanded
= tree_simplify_using_condition (cond
, expanded
);
2159 /* Break if EXPR is simplified to const values. */
2161 && (integer_zerop (expanded
) || integer_nonzerop (expanded
)))
2167 /* Return the original expression if no simplification is done. */
2168 return operand_equal_p (backup
, expanded
, 0) ? expr
: expanded
;
2171 /* Tries to simplify EXPR using the evolutions of the loop invariants
2172 in the superloops of LOOP. Returns the simplified expression
2173 (or EXPR unchanged, if no simplification was possible). */
2176 simplify_using_outer_evolutions (struct loop
*loop
, tree expr
)
2178 enum tree_code code
= TREE_CODE (expr
);
2182 if (is_gimple_min_invariant (expr
))
2185 if (code
== TRUTH_OR_EXPR
2186 || code
== TRUTH_AND_EXPR
2187 || code
== COND_EXPR
)
2191 e0
= simplify_using_outer_evolutions (loop
, TREE_OPERAND (expr
, 0));
2192 if (TREE_OPERAND (expr
, 0) != e0
)
2195 e1
= simplify_using_outer_evolutions (loop
, TREE_OPERAND (expr
, 1));
2196 if (TREE_OPERAND (expr
, 1) != e1
)
2199 if (code
== COND_EXPR
)
2201 e2
= simplify_using_outer_evolutions (loop
, TREE_OPERAND (expr
, 2));
2202 if (TREE_OPERAND (expr
, 2) != e2
)
2210 if (code
== COND_EXPR
)
2211 expr
= fold_build3 (code
, boolean_type_node
, e0
, e1
, e2
);
2213 expr
= fold_build2 (code
, boolean_type_node
, e0
, e1
);
2219 e
= instantiate_parameters (loop
, expr
);
2220 if (is_gimple_min_invariant (e
))
2226 /* Returns true if EXIT is the only possible exit from LOOP. */
2229 loop_only_exit_p (const struct loop
*loop
, const_edge exit
)
2232 gimple_stmt_iterator bsi
;
2235 if (exit
!= single_exit (loop
))
2238 body
= get_loop_body (loop
);
2239 for (i
= 0; i
< loop
->num_nodes
; i
++)
2241 for (bsi
= gsi_start_bb (body
[i
]); !gsi_end_p (bsi
); gsi_next (&bsi
))
2242 if (stmt_can_terminate_bb_p (gsi_stmt (bsi
)))
2253 /* Stores description of number of iterations of LOOP derived from
2254 EXIT (an exit edge of the LOOP) in NITER. Returns true if some useful
2255 information could be derived (and fields of NITER have meaning described
2256 in comments at struct tree_niter_desc declaration), false otherwise.
2257 When EVERY_ITERATION is true, only tests that are known to be executed
2258 every iteration are considered (i.e. only test that alone bounds the loop).
2259 If AT_STMT is not NULL, this function stores LOOP's condition statement in
2260 it when returning true. */
2263 number_of_iterations_exit_assumptions (struct loop
*loop
, edge exit
,
2264 struct tree_niter_desc
*niter
,
2265 gcond
**at_stmt
, bool every_iteration
)
2271 enum tree_code code
;
2275 /* Nothing to analyze if the loop is known to be infinite. */
2276 if (loop_constraint_set_p (loop
, LOOP_C_INFINITE
))
2279 safe
= dominated_by_p (CDI_DOMINATORS
, loop
->latch
, exit
->src
);
2281 if (every_iteration
&& !safe
)
2284 niter
->assumptions
= boolean_false_node
;
2285 niter
->control
.base
= NULL_TREE
;
2286 niter
->control
.step
= NULL_TREE
;
2287 niter
->control
.no_overflow
= false;
2288 last
= last_stmt (exit
->src
);
2291 stmt
= dyn_cast
<gcond
*> (last
);
2295 /* We want the condition for staying inside loop. */
2296 code
= gimple_cond_code (stmt
);
2297 if (exit
->flags
& EDGE_TRUE_VALUE
)
2298 code
= invert_tree_comparison (code
, false);
2313 op0
= gimple_cond_lhs (stmt
);
2314 op1
= gimple_cond_rhs (stmt
);
2315 type
= TREE_TYPE (op0
);
2317 if (TREE_CODE (type
) != INTEGER_TYPE
2318 && !POINTER_TYPE_P (type
))
2321 tree iv0_niters
= NULL_TREE
;
2322 if (!simple_iv_with_niters (loop
, loop_containing_stmt (stmt
),
2323 op0
, &iv0
, &iv0_niters
, false))
2325 tree iv1_niters
= NULL_TREE
;
2326 if (!simple_iv_with_niters (loop
, loop_containing_stmt (stmt
),
2327 op1
, &iv1
, &iv1_niters
, false))
2329 /* Give up on complicated case. */
2330 if (iv0_niters
&& iv1_niters
)
2333 /* We don't want to see undefined signed overflow warnings while
2334 computing the number of iterations. */
2335 fold_defer_overflow_warnings ();
2337 iv0
.base
= expand_simple_operations (iv0
.base
);
2338 iv1
.base
= expand_simple_operations (iv1
.base
);
2339 if (!number_of_iterations_cond (loop
, type
, &iv0
, code
, &iv1
, niter
,
2340 loop_only_exit_p (loop
, exit
), safe
))
2342 fold_undefer_and_ignore_overflow_warnings ();
2346 /* Incorporate additional assumption implied by control iv. */
2347 tree iv_niters
= iv0_niters
? iv0_niters
: iv1_niters
;
2350 tree assumption
= fold_build2 (LE_EXPR
, boolean_type_node
, niter
->niter
,
2351 fold_convert (TREE_TYPE (niter
->niter
),
2354 if (!integer_nonzerop (assumption
))
2355 niter
->assumptions
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2356 niter
->assumptions
, assumption
);
2358 /* Refine upper bound if possible. */
2359 if (TREE_CODE (iv_niters
) == INTEGER_CST
2360 && niter
->max
> wi::to_widest (iv_niters
))
2361 niter
->max
= wi::to_widest (iv_niters
);
2364 /* There is no assumptions if the loop is known to be finite. */
2365 if (!integer_zerop (niter
->assumptions
)
2366 && loop_constraint_set_p (loop
, LOOP_C_FINITE
))
2367 niter
->assumptions
= boolean_true_node
;
2371 niter
->assumptions
= simplify_using_outer_evolutions (loop
,
2372 niter
->assumptions
);
2373 niter
->may_be_zero
= simplify_using_outer_evolutions (loop
,
2374 niter
->may_be_zero
);
2375 niter
->niter
= simplify_using_outer_evolutions (loop
, niter
->niter
);
2379 = simplify_using_initial_conditions (loop
,
2380 niter
->assumptions
);
2382 = simplify_using_initial_conditions (loop
,
2383 niter
->may_be_zero
);
2385 fold_undefer_and_ignore_overflow_warnings ();
2387 /* If NITER has simplified into a constant, update MAX. */
2388 if (TREE_CODE (niter
->niter
) == INTEGER_CST
)
2389 niter
->max
= wi::to_widest (niter
->niter
);
2394 return (!integer_zerop (niter
->assumptions
));
2397 /* Like number_of_iterations_exit_assumptions, but return TRUE only if
2398 the niter information holds unconditionally. */
2401 number_of_iterations_exit (struct loop
*loop
, edge exit
,
2402 struct tree_niter_desc
*niter
,
2403 bool warn
, bool every_iteration
)
2406 if (!number_of_iterations_exit_assumptions (loop
, exit
, niter
,
2407 &stmt
, every_iteration
))
2410 if (integer_nonzerop (niter
->assumptions
))
2414 warning_at (gimple_location_safe (stmt
),
2415 OPT_Wunsafe_loop_optimizations
,
2416 "missed loop optimization, the loop counter may overflow");
2421 /* Try to determine the number of iterations of LOOP. If we succeed,
2422 expression giving number of iterations is returned and *EXIT is
2423 set to the edge from that the information is obtained. Otherwise
2424 chrec_dont_know is returned. */
2427 find_loop_niter (struct loop
*loop
, edge
*exit
)
2430 vec
<edge
> exits
= get_loop_exit_edges (loop
);
2432 tree niter
= NULL_TREE
, aniter
;
2433 struct tree_niter_desc desc
;
2436 FOR_EACH_VEC_ELT (exits
, i
, ex
)
2438 if (!number_of_iterations_exit (loop
, ex
, &desc
, false))
2441 if (integer_nonzerop (desc
.may_be_zero
))
2443 /* We exit in the first iteration through this exit.
2444 We won't find anything better. */
2445 niter
= build_int_cst (unsigned_type_node
, 0);
2450 if (!integer_zerop (desc
.may_be_zero
))
2453 aniter
= desc
.niter
;
2457 /* Nothing recorded yet. */
2463 /* Prefer constants, the lower the better. */
2464 if (TREE_CODE (aniter
) != INTEGER_CST
)
2467 if (TREE_CODE (niter
) != INTEGER_CST
)
2474 if (tree_int_cst_lt (aniter
, niter
))
2483 return niter
? niter
: chrec_dont_know
;
2486 /* Return true if loop is known to have bounded number of iterations. */
2489 finite_loop_p (struct loop
*loop
)
2494 flags
= flags_from_decl_or_type (current_function_decl
);
2495 if ((flags
& (ECF_CONST
|ECF_PURE
)) && !(flags
& ECF_LOOPING_CONST_OR_PURE
))
2497 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2498 fprintf (dump_file
, "Found loop %i to be finite: it is within pure or const function.\n",
2503 if (loop
->any_upper_bound
2504 || max_loop_iterations (loop
, &nit
))
2506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2507 fprintf (dump_file
, "Found loop %i to be finite: upper bound found.\n",
2516 Analysis of a number of iterations of a loop by a brute-force evaluation.
2520 /* Bound on the number of iterations we try to evaluate. */
2522 #define MAX_ITERATIONS_TO_TRACK \
2523 ((unsigned) PARAM_VALUE (PARAM_MAX_ITERATIONS_TO_TRACK))
2525 /* Returns the loop phi node of LOOP such that ssa name X is derived from its
2526 result by a chain of operations such that all but exactly one of their
2527 operands are constants. */
2530 chain_of_csts_start (struct loop
*loop
, tree x
)
2532 gimple
*stmt
= SSA_NAME_DEF_STMT (x
);
2534 basic_block bb
= gimple_bb (stmt
);
2535 enum tree_code code
;
2538 || !flow_bb_inside_loop_p (loop
, bb
))
2541 if (gimple_code (stmt
) == GIMPLE_PHI
)
2543 if (bb
== loop
->header
)
2544 return as_a
<gphi
*> (stmt
);
2549 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2550 || gimple_assign_rhs_class (stmt
) == GIMPLE_TERNARY_RHS
)
2553 code
= gimple_assign_rhs_code (stmt
);
2554 if (gimple_references_memory_p (stmt
)
2555 || TREE_CODE_CLASS (code
) == tcc_reference
2556 || (code
== ADDR_EXPR
2557 && !is_gimple_min_invariant (gimple_assign_rhs1 (stmt
))))
2560 use
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_USE
);
2561 if (use
== NULL_TREE
)
2564 return chain_of_csts_start (loop
, use
);
2567 /* Determines whether the expression X is derived from a result of a phi node
2568 in header of LOOP such that
2570 * the derivation of X consists only from operations with constants
2571 * the initial value of the phi node is constant
2572 * the value of the phi node in the next iteration can be derived from the
2573 value in the current iteration by a chain of operations with constants,
2574 or is also a constant
2576 If such phi node exists, it is returned, otherwise NULL is returned. */
2579 get_base_for (struct loop
*loop
, tree x
)
2584 if (is_gimple_min_invariant (x
))
2587 phi
= chain_of_csts_start (loop
, x
);
2591 init
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2592 next
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2594 if (!is_gimple_min_invariant (init
))
2597 if (TREE_CODE (next
) == SSA_NAME
2598 && chain_of_csts_start (loop
, next
) != phi
)
2604 /* Given an expression X, then
2606 * if X is NULL_TREE, we return the constant BASE.
2607 * if X is a constant, we return the constant X.
2608 * otherwise X is a SSA name, whose value in the considered loop is derived
2609 by a chain of operations with constant from a result of a phi node in
2610 the header of the loop. Then we return value of X when the value of the
2611 result of this phi node is given by the constant BASE. */
2614 get_val_for (tree x
, tree base
)
2618 gcc_checking_assert (is_gimple_min_invariant (base
));
2622 else if (is_gimple_min_invariant (x
))
2625 stmt
= SSA_NAME_DEF_STMT (x
);
2626 if (gimple_code (stmt
) == GIMPLE_PHI
)
2629 gcc_checking_assert (is_gimple_assign (stmt
));
2631 /* STMT must be either an assignment of a single SSA name or an
2632 expression involving an SSA name and a constant. Try to fold that
2633 expression using the value for the SSA name. */
2634 if (gimple_assign_ssa_name_copy_p (stmt
))
2635 return get_val_for (gimple_assign_rhs1 (stmt
), base
);
2636 else if (gimple_assign_rhs_class (stmt
) == GIMPLE_UNARY_RHS
2637 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2638 return fold_build1 (gimple_assign_rhs_code (stmt
),
2639 gimple_expr_type (stmt
),
2640 get_val_for (gimple_assign_rhs1 (stmt
), base
));
2641 else if (gimple_assign_rhs_class (stmt
) == GIMPLE_BINARY_RHS
)
2643 tree rhs1
= gimple_assign_rhs1 (stmt
);
2644 tree rhs2
= gimple_assign_rhs2 (stmt
);
2645 if (TREE_CODE (rhs1
) == SSA_NAME
)
2646 rhs1
= get_val_for (rhs1
, base
);
2647 else if (TREE_CODE (rhs2
) == SSA_NAME
)
2648 rhs2
= get_val_for (rhs2
, base
);
2651 return fold_build2 (gimple_assign_rhs_code (stmt
),
2652 gimple_expr_type (stmt
), rhs1
, rhs2
);
2659 /* Tries to count the number of iterations of LOOP till it exits by EXIT
2660 by brute force -- i.e. by determining the value of the operands of the
2661 condition at EXIT in first few iterations of the loop (assuming that
2662 these values are constant) and determining the first one in that the
2663 condition is not satisfied. Returns the constant giving the number
2664 of the iterations of LOOP if successful, chrec_dont_know otherwise. */
2667 loop_niter_by_eval (struct loop
*loop
, edge exit
)
2670 tree op
[2], val
[2], next
[2], aval
[2];
2676 cond
= last_stmt (exit
->src
);
2677 if (!cond
|| gimple_code (cond
) != GIMPLE_COND
)
2678 return chrec_dont_know
;
2680 cmp
= gimple_cond_code (cond
);
2681 if (exit
->flags
& EDGE_TRUE_VALUE
)
2682 cmp
= invert_tree_comparison (cmp
, false);
2692 op
[0] = gimple_cond_lhs (cond
);
2693 op
[1] = gimple_cond_rhs (cond
);
2697 return chrec_dont_know
;
2700 for (j
= 0; j
< 2; j
++)
2702 if (is_gimple_min_invariant (op
[j
]))
2705 next
[j
] = NULL_TREE
;
2710 phi
= get_base_for (loop
, op
[j
]);
2712 return chrec_dont_know
;
2713 val
[j
] = PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
2714 next
[j
] = PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
2718 /* Don't issue signed overflow warnings. */
2719 fold_defer_overflow_warnings ();
2721 for (i
= 0; i
< MAX_ITERATIONS_TO_TRACK
; i
++)
2723 for (j
= 0; j
< 2; j
++)
2724 aval
[j
] = get_val_for (op
[j
], val
[j
]);
2726 acnd
= fold_binary (cmp
, boolean_type_node
, aval
[0], aval
[1]);
2727 if (acnd
&& integer_zerop (acnd
))
2729 fold_undefer_and_ignore_overflow_warnings ();
2730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2732 "Proved that loop %d iterates %d times using brute force.\n",
2734 return build_int_cst (unsigned_type_node
, i
);
2737 for (j
= 0; j
< 2; j
++)
2740 val
[j
] = get_val_for (next
[j
], val
[j
]);
2741 if (!is_gimple_min_invariant (val
[j
]))
2743 fold_undefer_and_ignore_overflow_warnings ();
2744 return chrec_dont_know
;
2748 /* If the next iteration would use the same base values
2749 as the current one, there is no point looping further,
2750 all following iterations will be the same as this one. */
2751 if (val
[0] == aval
[0] && val
[1] == aval
[1])
2755 fold_undefer_and_ignore_overflow_warnings ();
2757 return chrec_dont_know
;
2760 /* Finds the exit of the LOOP by that the loop exits after a constant
2761 number of iterations and stores the exit edge to *EXIT. The constant
2762 giving the number of iterations of LOOP is returned. The number of
2763 iterations is determined using loop_niter_by_eval (i.e. by brute force
2764 evaluation). If we are unable to find the exit for that loop_niter_by_eval
2765 determines the number of iterations, chrec_dont_know is returned. */
2768 find_loop_niter_by_eval (struct loop
*loop
, edge
*exit
)
2771 vec
<edge
> exits
= get_loop_exit_edges (loop
);
2773 tree niter
= NULL_TREE
, aniter
;
2777 /* Loops with multiple exits are expensive to handle and less important. */
2778 if (!flag_expensive_optimizations
2779 && exits
.length () > 1)
2782 return chrec_dont_know
;
2785 FOR_EACH_VEC_ELT (exits
, i
, ex
)
2787 if (!just_once_each_iteration_p (loop
, ex
->src
))
2790 aniter
= loop_niter_by_eval (loop
, ex
);
2791 if (chrec_contains_undetermined (aniter
))
2795 && !tree_int_cst_lt (aniter
, niter
))
2803 return niter
? niter
: chrec_dont_know
;
2808 Analysis of upper bounds on number of iterations of a loop.
2812 static widest_int
derive_constant_upper_bound_ops (tree
, tree
,
2813 enum tree_code
, tree
);
2815 /* Returns a constant upper bound on the value of the right-hand side of
2816 an assignment statement STMT. */
2819 derive_constant_upper_bound_assign (gimple
*stmt
)
2821 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2822 tree op0
= gimple_assign_rhs1 (stmt
);
2823 tree op1
= gimple_assign_rhs2 (stmt
);
2825 return derive_constant_upper_bound_ops (TREE_TYPE (gimple_assign_lhs (stmt
)),
2829 /* Returns a constant upper bound on the value of expression VAL. VAL
2830 is considered to be unsigned. If its type is signed, its value must
2834 derive_constant_upper_bound (tree val
)
2836 enum tree_code code
;
2839 extract_ops_from_tree (val
, &code
, &op0
, &op1
, &op2
);
2840 return derive_constant_upper_bound_ops (TREE_TYPE (val
), op0
, code
, op1
);
2843 /* Returns a constant upper bound on the value of expression OP0 CODE OP1,
2844 whose type is TYPE. The expression is considered to be unsigned. If
2845 its type is signed, its value must be nonnegative. */
2848 derive_constant_upper_bound_ops (tree type
, tree op0
,
2849 enum tree_code code
, tree op1
)
2852 widest_int bnd
, max
, cst
;
2855 if (INTEGRAL_TYPE_P (type
))
2856 maxt
= TYPE_MAX_VALUE (type
);
2858 maxt
= upper_bound_in_type (type
, type
);
2860 max
= wi::to_widest (maxt
);
2865 return wi::to_widest (op0
);
2868 subtype
= TREE_TYPE (op0
);
2869 if (!TYPE_UNSIGNED (subtype
)
2870 /* If TYPE is also signed, the fact that VAL is nonnegative implies
2871 that OP0 is nonnegative. */
2872 && TYPE_UNSIGNED (type
)
2873 && !tree_expr_nonnegative_p (op0
))
2875 /* If we cannot prove that the casted expression is nonnegative,
2876 we cannot establish more useful upper bound than the precision
2877 of the type gives us. */
2881 /* We now know that op0 is an nonnegative value. Try deriving an upper
2883 bnd
= derive_constant_upper_bound (op0
);
2885 /* If the bound does not fit in TYPE, max. value of TYPE could be
2887 if (wi::ltu_p (max
, bnd
))
2893 case POINTER_PLUS_EXPR
:
2895 if (TREE_CODE (op1
) != INTEGER_CST
2896 || !tree_expr_nonnegative_p (op0
))
2899 /* Canonicalize to OP0 - CST. Consider CST to be signed, in order to
2900 choose the most logical way how to treat this constant regardless
2901 of the signedness of the type. */
2902 cst
= wi::sext (wi::to_widest (op1
), TYPE_PRECISION (type
));
2903 if (code
!= MINUS_EXPR
)
2906 bnd
= derive_constant_upper_bound (op0
);
2908 if (wi::neg_p (cst
))
2911 /* Avoid CST == 0x80000... */
2912 if (wi::neg_p (cst
))
2915 /* OP0 + CST. We need to check that
2916 BND <= MAX (type) - CST. */
2918 widest_int mmax
= max
- cst
;
2919 if (wi::leu_p (bnd
, mmax
))
2926 /* OP0 - CST, where CST >= 0.
2928 If TYPE is signed, we have already verified that OP0 >= 0, and we
2929 know that the result is nonnegative. This implies that
2932 If TYPE is unsigned, we must additionally know that OP0 >= CST,
2933 otherwise the operation underflows.
2936 /* This should only happen if the type is unsigned; however, for
2937 buggy programs that use overflowing signed arithmetics even with
2938 -fno-wrapv, this condition may also be true for signed values. */
2939 if (wi::ltu_p (bnd
, cst
))
2942 if (TYPE_UNSIGNED (type
))
2944 tree tem
= fold_binary (GE_EXPR
, boolean_type_node
, op0
,
2945 wide_int_to_tree (type
, cst
));
2946 if (!tem
|| integer_nonzerop (tem
))
2955 case FLOOR_DIV_EXPR
:
2956 case EXACT_DIV_EXPR
:
2957 if (TREE_CODE (op1
) != INTEGER_CST
2958 || tree_int_cst_sign_bit (op1
))
2961 bnd
= derive_constant_upper_bound (op0
);
2962 return wi::udiv_floor (bnd
, wi::to_widest (op1
));
2965 if (TREE_CODE (op1
) != INTEGER_CST
2966 || tree_int_cst_sign_bit (op1
))
2968 return wi::to_widest (op1
);
2971 stmt
= SSA_NAME_DEF_STMT (op0
);
2972 if (gimple_code (stmt
) != GIMPLE_ASSIGN
2973 || gimple_assign_lhs (stmt
) != op0
)
2975 return derive_constant_upper_bound_assign (stmt
);
2982 /* Emit a -Waggressive-loop-optimizations warning if needed. */
2985 do_warn_aggressive_loop_optimizations (struct loop
*loop
,
2986 widest_int i_bound
, gimple
*stmt
)
2988 /* Don't warn if the loop doesn't have known constant bound. */
2989 if (!loop
->nb_iterations
2990 || TREE_CODE (loop
->nb_iterations
) != INTEGER_CST
2991 || !warn_aggressive_loop_optimizations
2992 /* To avoid warning multiple times for the same loop,
2993 only start warning when we preserve loops. */
2994 || (cfun
->curr_properties
& PROP_loops
) == 0
2995 /* Only warn once per loop. */
2996 || loop
->warned_aggressive_loop_optimizations
2997 /* Only warn if undefined behavior gives us lower estimate than the
2998 known constant bound. */
2999 || wi::cmpu (i_bound
, wi::to_widest (loop
->nb_iterations
)) >= 0
3000 /* And undefined behavior happens unconditionally. */
3001 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, gimple_bb (stmt
)))
3004 edge e
= single_exit (loop
);
3008 gimple
*estmt
= last_stmt (e
->src
);
3009 char buf
[WIDE_INT_PRINT_BUFFER_SIZE
];
3010 print_dec (i_bound
, buf
, TYPE_UNSIGNED (TREE_TYPE (loop
->nb_iterations
))
3011 ? UNSIGNED
: SIGNED
);
3012 if (warning_at (gimple_location (stmt
), OPT_Waggressive_loop_optimizations
,
3013 "iteration %s invokes undefined behavior", buf
))
3014 inform (gimple_location (estmt
), "within this loop");
3015 loop
->warned_aggressive_loop_optimizations
= true;
3018 /* Records that AT_STMT is executed at most BOUND + 1 times in LOOP. IS_EXIT
3019 is true if the loop is exited immediately after STMT, and this exit
3020 is taken at last when the STMT is executed BOUND + 1 times.
3021 REALISTIC is true if BOUND is expected to be close to the real number
3022 of iterations. UPPER is true if we are sure the loop iterates at most
3023 BOUND times. I_BOUND is a widest_int upper estimate on BOUND. */
3026 record_estimate (struct loop
*loop
, tree bound
, const widest_int
&i_bound
,
3027 gimple
*at_stmt
, bool is_exit
, bool realistic
, bool upper
)
3031 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3033 fprintf (dump_file
, "Statement %s", is_exit
? "(exit)" : "");
3034 print_gimple_stmt (dump_file
, at_stmt
, 0, TDF_SLIM
);
3035 fprintf (dump_file
, " is %sexecuted at most ",
3036 upper
? "" : "probably ");
3037 print_generic_expr (dump_file
, bound
, TDF_SLIM
);
3038 fprintf (dump_file
, " (bounded by ");
3039 print_decu (i_bound
, dump_file
);
3040 fprintf (dump_file
, ") + 1 times in loop %d.\n", loop
->num
);
3043 /* If the I_BOUND is just an estimate of BOUND, it rarely is close to the
3044 real number of iterations. */
3045 if (TREE_CODE (bound
) != INTEGER_CST
)
3048 gcc_checking_assert (i_bound
== wi::to_widest (bound
));
3050 /* If we have a guaranteed upper bound, record it in the appropriate
3051 list, unless this is an !is_exit bound (i.e. undefined behavior in
3052 at_stmt) in a loop with known constant number of iterations. */
3055 || loop
->nb_iterations
== NULL_TREE
3056 || TREE_CODE (loop
->nb_iterations
) != INTEGER_CST
))
3058 struct nb_iter_bound
*elt
= ggc_alloc
<nb_iter_bound
> ();
3060 elt
->bound
= i_bound
;
3061 elt
->stmt
= at_stmt
;
3062 elt
->is_exit
= is_exit
;
3063 elt
->next
= loop
->bounds
;
3067 /* If statement is executed on every path to the loop latch, we can directly
3068 infer the upper bound on the # of iterations of the loop. */
3069 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, gimple_bb (at_stmt
)))
3072 /* Update the number of iteration estimates according to the bound.
3073 If at_stmt is an exit then the loop latch is executed at most BOUND times,
3074 otherwise it can be executed BOUND + 1 times. We will lower the estimate
3075 later if such statement must be executed on last iteration */
3080 widest_int new_i_bound
= i_bound
+ delta
;
3082 /* If an overflow occurred, ignore the result. */
3083 if (wi::ltu_p (new_i_bound
, delta
))
3086 if (upper
&& !is_exit
)
3087 do_warn_aggressive_loop_optimizations (loop
, new_i_bound
, at_stmt
);
3088 record_niter_bound (loop
, new_i_bound
, realistic
, upper
);
3091 /* Records the control iv analyzed in NITER for LOOP if the iv is valid
3092 and doesn't overflow. */
3095 record_control_iv (struct loop
*loop
, struct tree_niter_desc
*niter
)
3097 struct control_iv
*iv
;
3099 if (!niter
->control
.base
|| !niter
->control
.step
)
3102 if (!integer_onep (niter
->assumptions
) || !niter
->control
.no_overflow
)
3105 iv
= ggc_alloc
<control_iv
> ();
3106 iv
->base
= niter
->control
.base
;
3107 iv
->step
= niter
->control
.step
;
3108 iv
->next
= loop
->control_ivs
;
3109 loop
->control_ivs
= iv
;
3114 /* This function returns TRUE if below conditions are satisfied:
3115 1) VAR is SSA variable.
3116 2) VAR is an IV:{base, step} in its defining loop.
3117 3) IV doesn't overflow.
3118 4) Both base and step are integer constants.
3119 5) Base is the MIN/MAX value depends on IS_MIN.
3120 Store value of base to INIT correspondingly. */
3123 get_cst_init_from_scev (tree var
, wide_int
*init
, bool is_min
)
3125 if (TREE_CODE (var
) != SSA_NAME
)
3128 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
3129 struct loop
*loop
= loop_containing_stmt (def_stmt
);
3135 if (!simple_iv (loop
, loop
, var
, &iv
, false))
3138 if (!iv
.no_overflow
)
3141 if (TREE_CODE (iv
.base
) != INTEGER_CST
|| TREE_CODE (iv
.step
) != INTEGER_CST
)
3144 if (is_min
== tree_int_cst_sign_bit (iv
.step
))
3151 /* Record the estimate on number of iterations of LOOP based on the fact that
3152 the induction variable BASE + STEP * i evaluated in STMT does not wrap and
3153 its values belong to the range <LOW, HIGH>. REALISTIC is true if the
3154 estimated number of iterations is expected to be close to the real one.
3155 UPPER is true if we are sure the induction variable does not wrap. */
3158 record_nonwrapping_iv (struct loop
*loop
, tree base
, tree step
, gimple
*stmt
,
3159 tree low
, tree high
, bool realistic
, bool upper
)
3161 tree niter_bound
, extreme
, delta
;
3162 tree type
= TREE_TYPE (base
), unsigned_type
;
3163 tree orig_base
= base
;
3165 if (TREE_CODE (step
) != INTEGER_CST
|| integer_zerop (step
))
3168 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3170 fprintf (dump_file
, "Induction variable (");
3171 print_generic_expr (dump_file
, TREE_TYPE (base
), TDF_SLIM
);
3172 fprintf (dump_file
, ") ");
3173 print_generic_expr (dump_file
, base
, TDF_SLIM
);
3174 fprintf (dump_file
, " + ");
3175 print_generic_expr (dump_file
, step
, TDF_SLIM
);
3176 fprintf (dump_file
, " * iteration does not wrap in statement ");
3177 print_gimple_stmt (dump_file
, stmt
, 0, TDF_SLIM
);
3178 fprintf (dump_file
, " in loop %d.\n", loop
->num
);
3181 unsigned_type
= unsigned_type_for (type
);
3182 base
= fold_convert (unsigned_type
, base
);
3183 step
= fold_convert (unsigned_type
, step
);
3185 if (tree_int_cst_sign_bit (step
))
3188 extreme
= fold_convert (unsigned_type
, low
);
3189 if (TREE_CODE (orig_base
) == SSA_NAME
3190 && TREE_CODE (high
) == INTEGER_CST
3191 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base
))
3192 && (get_range_info (orig_base
, &min
, &max
) == VR_RANGE
3193 || get_cst_init_from_scev (orig_base
, &max
, false))
3194 && wi::gts_p (high
, max
))
3195 base
= wide_int_to_tree (unsigned_type
, max
);
3196 else if (TREE_CODE (base
) != INTEGER_CST
3197 && dominated_by_p (CDI_DOMINATORS
,
3198 loop
->latch
, gimple_bb (stmt
)))
3199 base
= fold_convert (unsigned_type
, high
);
3200 delta
= fold_build2 (MINUS_EXPR
, unsigned_type
, base
, extreme
);
3201 step
= fold_build1 (NEGATE_EXPR
, unsigned_type
, step
);
3206 extreme
= fold_convert (unsigned_type
, high
);
3207 if (TREE_CODE (orig_base
) == SSA_NAME
3208 && TREE_CODE (low
) == INTEGER_CST
3209 && INTEGRAL_TYPE_P (TREE_TYPE (orig_base
))
3210 && (get_range_info (orig_base
, &min
, &max
) == VR_RANGE
3211 || get_cst_init_from_scev (orig_base
, &min
, true))
3212 && wi::gts_p (min
, low
))
3213 base
= wide_int_to_tree (unsigned_type
, min
);
3214 else if (TREE_CODE (base
) != INTEGER_CST
3215 && dominated_by_p (CDI_DOMINATORS
,
3216 loop
->latch
, gimple_bb (stmt
)))
3217 base
= fold_convert (unsigned_type
, low
);
3218 delta
= fold_build2 (MINUS_EXPR
, unsigned_type
, extreme
, base
);
3221 /* STMT is executed at most NITER_BOUND + 1 times, since otherwise the value
3222 would get out of the range. */
3223 niter_bound
= fold_build2 (FLOOR_DIV_EXPR
, unsigned_type
, delta
, step
);
3224 widest_int max
= derive_constant_upper_bound (niter_bound
);
3225 record_estimate (loop
, niter_bound
, max
, stmt
, false, realistic
, upper
);
3228 /* Determine information about number of iterations a LOOP from the index
3229 IDX of a data reference accessed in STMT. RELIABLE is true if STMT is
3230 guaranteed to be executed in every iteration of LOOP. Callback for
3240 idx_infer_loop_bounds (tree base
, tree
*idx
, void *dta
)
3242 struct ilb_data
*data
= (struct ilb_data
*) dta
;
3243 tree ev
, init
, step
;
3244 tree low
, high
, type
, next
;
3245 bool sign
, upper
= true, at_end
= false;
3246 struct loop
*loop
= data
->loop
;
3248 if (TREE_CODE (base
) != ARRAY_REF
)
3251 /* For arrays at the end of the structure, we are not guaranteed that they
3252 do not really extend over their declared size. However, for arrays of
3253 size greater than one, this is unlikely to be intended. */
3254 if (array_at_struct_end_p (base
))
3260 struct loop
*dloop
= loop_containing_stmt (data
->stmt
);
3264 ev
= analyze_scalar_evolution (dloop
, *idx
);
3265 ev
= instantiate_parameters (loop
, ev
);
3266 init
= initial_condition (ev
);
3267 step
= evolution_part_in_loop_num (ev
, loop
->num
);
3271 || TREE_CODE (step
) != INTEGER_CST
3272 || integer_zerop (step
)
3273 || tree_contains_chrecs (init
, NULL
)
3274 || chrec_contains_symbols_defined_in_loop (init
, loop
->num
))
3277 low
= array_ref_low_bound (base
);
3278 high
= array_ref_up_bound (base
);
3280 /* The case of nonconstant bounds could be handled, but it would be
3282 if (TREE_CODE (low
) != INTEGER_CST
3284 || TREE_CODE (high
) != INTEGER_CST
)
3286 sign
= tree_int_cst_sign_bit (step
);
3287 type
= TREE_TYPE (step
);
3289 /* The array of length 1 at the end of a structure most likely extends
3290 beyond its bounds. */
3292 && operand_equal_p (low
, high
, 0))
3295 /* In case the relevant bound of the array does not fit in type, or
3296 it does, but bound + step (in type) still belongs into the range of the
3297 array, the index may wrap and still stay within the range of the array
3298 (consider e.g. if the array is indexed by the full range of
3301 To make things simpler, we require both bounds to fit into type, although
3302 there are cases where this would not be strictly necessary. */
3303 if (!int_fits_type_p (high
, type
)
3304 || !int_fits_type_p (low
, type
))
3306 low
= fold_convert (type
, low
);
3307 high
= fold_convert (type
, high
);
3310 next
= fold_binary (PLUS_EXPR
, type
, low
, step
);
3312 next
= fold_binary (PLUS_EXPR
, type
, high
, step
);
3314 if (tree_int_cst_compare (low
, next
) <= 0
3315 && tree_int_cst_compare (next
, high
) <= 0)
3318 /* If access is not executed on every iteration, we must ensure that overlow
3319 may not make the access valid later. */
3320 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, gimple_bb (data
->stmt
))
3321 && scev_probably_wraps_p (NULL_TREE
,
3322 initial_condition_in_loop_num (ev
, loop
->num
),
3323 step
, data
->stmt
, loop
, true))
3326 record_nonwrapping_iv (loop
, init
, step
, data
->stmt
, low
, high
, false, upper
);
3330 /* Determine information about number of iterations a LOOP from the bounds
3331 of arrays in the data reference REF accessed in STMT. RELIABLE is true if
3332 STMT is guaranteed to be executed in every iteration of LOOP.*/
3335 infer_loop_bounds_from_ref (struct loop
*loop
, gimple
*stmt
, tree ref
)
3337 struct ilb_data data
;
3341 for_each_index (&ref
, idx_infer_loop_bounds
, &data
);
3344 /* Determine information about number of iterations of a LOOP from the way
3345 arrays are used in STMT. RELIABLE is true if STMT is guaranteed to be
3346 executed in every iteration of LOOP. */
3349 infer_loop_bounds_from_array (struct loop
*loop
, gimple
*stmt
)
3351 if (is_gimple_assign (stmt
))
3353 tree op0
= gimple_assign_lhs (stmt
);
3354 tree op1
= gimple_assign_rhs1 (stmt
);
3356 /* For each memory access, analyze its access function
3357 and record a bound on the loop iteration domain. */
3358 if (REFERENCE_CLASS_P (op0
))
3359 infer_loop_bounds_from_ref (loop
, stmt
, op0
);
3361 if (REFERENCE_CLASS_P (op1
))
3362 infer_loop_bounds_from_ref (loop
, stmt
, op1
);
3364 else if (is_gimple_call (stmt
))
3367 unsigned i
, n
= gimple_call_num_args (stmt
);
3369 lhs
= gimple_call_lhs (stmt
);
3370 if (lhs
&& REFERENCE_CLASS_P (lhs
))
3371 infer_loop_bounds_from_ref (loop
, stmt
, lhs
);
3373 for (i
= 0; i
< n
; i
++)
3375 arg
= gimple_call_arg (stmt
, i
);
3376 if (REFERENCE_CLASS_P (arg
))
3377 infer_loop_bounds_from_ref (loop
, stmt
, arg
);
3382 /* Determine information about number of iterations of a LOOP from the fact
3383 that pointer arithmetics in STMT does not overflow. */
3386 infer_loop_bounds_from_pointer_arith (struct loop
*loop
, gimple
*stmt
)
3388 tree def
, base
, step
, scev
, type
, low
, high
;
3391 if (!is_gimple_assign (stmt
)
3392 || gimple_assign_rhs_code (stmt
) != POINTER_PLUS_EXPR
)
3395 def
= gimple_assign_lhs (stmt
);
3396 if (TREE_CODE (def
) != SSA_NAME
)
3399 type
= TREE_TYPE (def
);
3400 if (!nowrap_type_p (type
))
3403 ptr
= gimple_assign_rhs1 (stmt
);
3404 if (!expr_invariant_in_loop_p (loop
, ptr
))
3407 var
= gimple_assign_rhs2 (stmt
);
3408 if (TYPE_PRECISION (type
) != TYPE_PRECISION (TREE_TYPE (var
)))
3411 scev
= instantiate_parameters (loop
, analyze_scalar_evolution (loop
, def
));
3412 if (chrec_contains_undetermined (scev
))
3415 base
= initial_condition_in_loop_num (scev
, loop
->num
);
3416 step
= evolution_part_in_loop_num (scev
, loop
->num
);
3419 || TREE_CODE (step
) != INTEGER_CST
3420 || tree_contains_chrecs (base
, NULL
)
3421 || chrec_contains_symbols_defined_in_loop (base
, loop
->num
))
3424 low
= lower_bound_in_type (type
, type
);
3425 high
= upper_bound_in_type (type
, type
);
3427 /* In C, pointer arithmetic p + 1 cannot use a NULL pointer, and p - 1 cannot
3428 produce a NULL pointer. The contrary would mean NULL points to an object,
3429 while NULL is supposed to compare unequal with the address of all objects.
3430 Furthermore, p + 1 cannot produce a NULL pointer and p - 1 cannot use a
3431 NULL pointer since that would mean wrapping, which we assume here not to
3432 happen. So, we can exclude NULL from the valid range of pointer
3434 if (flag_delete_null_pointer_checks
&& int_cst_value (low
) == 0)
3435 low
= build_int_cstu (TREE_TYPE (low
), TYPE_ALIGN_UNIT (TREE_TYPE (type
)));
3437 record_nonwrapping_iv (loop
, base
, step
, stmt
, low
, high
, false, true);
3440 /* Determine information about number of iterations of a LOOP from the fact
3441 that signed arithmetics in STMT does not overflow. */
3444 infer_loop_bounds_from_signedness (struct loop
*loop
, gimple
*stmt
)
3446 tree def
, base
, step
, scev
, type
, low
, high
;
3448 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
3451 def
= gimple_assign_lhs (stmt
);
3453 if (TREE_CODE (def
) != SSA_NAME
)
3456 type
= TREE_TYPE (def
);
3457 if (!INTEGRAL_TYPE_P (type
)
3458 || !TYPE_OVERFLOW_UNDEFINED (type
))
3461 scev
= instantiate_parameters (loop
, analyze_scalar_evolution (loop
, def
));
3462 if (chrec_contains_undetermined (scev
))
3465 base
= initial_condition_in_loop_num (scev
, loop
->num
);
3466 step
= evolution_part_in_loop_num (scev
, loop
->num
);
3469 || TREE_CODE (step
) != INTEGER_CST
3470 || tree_contains_chrecs (base
, NULL
)
3471 || chrec_contains_symbols_defined_in_loop (base
, loop
->num
))
3474 low
= lower_bound_in_type (type
, type
);
3475 high
= upper_bound_in_type (type
, type
);
3477 record_nonwrapping_iv (loop
, base
, step
, stmt
, low
, high
, false, true);
3480 /* The following analyzers are extracting informations on the bounds
3481 of LOOP from the following undefined behaviors:
3483 - data references should not access elements over the statically
3486 - signed variables should not overflow when flag_wrapv is not set.
3490 infer_loop_bounds_from_undefined (struct loop
*loop
)
3494 gimple_stmt_iterator bsi
;
3498 bbs
= get_loop_body (loop
);
3500 for (i
= 0; i
< loop
->num_nodes
; i
++)
3504 /* If BB is not executed in each iteration of the loop, we cannot
3505 use the operations in it to infer reliable upper bound on the
3506 # of iterations of the loop. However, we can use it as a guess.
3507 Reliable guesses come only from array bounds. */
3508 reliable
= dominated_by_p (CDI_DOMINATORS
, loop
->latch
, bb
);
3510 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
3512 gimple
*stmt
= gsi_stmt (bsi
);
3514 infer_loop_bounds_from_array (loop
, stmt
);
3518 infer_loop_bounds_from_signedness (loop
, stmt
);
3519 infer_loop_bounds_from_pointer_arith (loop
, stmt
);
3528 /* Compare wide ints, callback for qsort. */
3531 wide_int_cmp (const void *p1
, const void *p2
)
3533 const widest_int
*d1
= (const widest_int
*) p1
;
3534 const widest_int
*d2
= (const widest_int
*) p2
;
3535 return wi::cmpu (*d1
, *d2
);
3538 /* Return index of BOUND in BOUNDS array sorted in increasing order.
3539 Lookup by binary search. */
3542 bound_index (vec
<widest_int
> bounds
, const widest_int
&bound
)
3544 unsigned int end
= bounds
.length ();
3545 unsigned int begin
= 0;
3547 /* Find a matching index by means of a binary search. */
3548 while (begin
!= end
)
3550 unsigned int middle
= (begin
+ end
) / 2;
3551 widest_int index
= bounds
[middle
];
3555 else if (wi::ltu_p (index
, bound
))
3563 /* We recorded loop bounds only for statements dominating loop latch (and thus
3564 executed each loop iteration). If there are any bounds on statements not
3565 dominating the loop latch we can improve the estimate by walking the loop
3566 body and seeing if every path from loop header to loop latch contains
3567 some bounded statement. */
3570 discover_iteration_bound_by_body_walk (struct loop
*loop
)
3572 struct nb_iter_bound
*elt
;
3573 auto_vec
<widest_int
> bounds
;
3574 vec
<vec
<basic_block
> > queues
= vNULL
;
3575 vec
<basic_block
> queue
= vNULL
;
3576 ptrdiff_t queue_index
;
3577 ptrdiff_t latch_index
= 0;
3579 /* Discover what bounds may interest us. */
3580 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
3582 widest_int bound
= elt
->bound
;
3584 /* Exit terminates loop at given iteration, while non-exits produce undefined
3585 effect on the next iteration. */
3589 /* If an overflow occurred, ignore the result. */
3594 if (!loop
->any_upper_bound
3595 || wi::ltu_p (bound
, loop
->nb_iterations_upper_bound
))
3596 bounds
.safe_push (bound
);
3599 /* Exit early if there is nothing to do. */
3600 if (!bounds
.exists ())
3603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3604 fprintf (dump_file
, " Trying to walk loop body to reduce the bound.\n");
3606 /* Sort the bounds in decreasing order. */
3607 bounds
.qsort (wide_int_cmp
);
3609 /* For every basic block record the lowest bound that is guaranteed to
3610 terminate the loop. */
3612 hash_map
<basic_block
, ptrdiff_t> bb_bounds
;
3613 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
3615 widest_int bound
= elt
->bound
;
3619 /* If an overflow occurred, ignore the result. */
3624 if (!loop
->any_upper_bound
3625 || wi::ltu_p (bound
, loop
->nb_iterations_upper_bound
))
3627 ptrdiff_t index
= bound_index (bounds
, bound
);
3628 ptrdiff_t *entry
= bb_bounds
.get (gimple_bb (elt
->stmt
));
3630 bb_bounds
.put (gimple_bb (elt
->stmt
), index
);
3631 else if ((ptrdiff_t)*entry
> index
)
3636 hash_map
<basic_block
, ptrdiff_t> block_priority
;
3638 /* Perform shortest path discovery loop->header ... loop->latch.
3640 The "distance" is given by the smallest loop bound of basic block
3641 present in the path and we look for path with largest smallest bound
3644 To avoid the need for fibonacci heap on double ints we simply compress
3645 double ints into indexes to BOUNDS array and then represent the queue
3646 as arrays of queues for every index.
3647 Index of BOUNDS.length() means that the execution of given BB has
3648 no bounds determined.
3650 VISITED is a pointer map translating basic block into smallest index
3651 it was inserted into the priority queue with. */
3654 /* Start walk in loop header with index set to infinite bound. */
3655 queue_index
= bounds
.length ();
3656 queues
.safe_grow_cleared (queue_index
+ 1);
3657 queue
.safe_push (loop
->header
);
3658 queues
[queue_index
] = queue
;
3659 block_priority
.put (loop
->header
, queue_index
);
3661 for (; queue_index
>= 0; queue_index
--)
3663 if (latch_index
< queue_index
)
3665 while (queues
[queue_index
].length ())
3668 ptrdiff_t bound_index
= queue_index
;
3672 queue
= queues
[queue_index
];
3675 /* OK, we later inserted the BB with lower priority, skip it. */
3676 if (*block_priority
.get (bb
) > queue_index
)
3679 /* See if we can improve the bound. */
3680 ptrdiff_t *entry
= bb_bounds
.get (bb
);
3681 if (entry
&& *entry
< bound_index
)
3682 bound_index
= *entry
;
3684 /* Insert succesors into the queue, watch for latch edge
3685 and record greatest index we saw. */
3686 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3688 bool insert
= false;
3690 if (loop_exit_edge_p (loop
, e
))
3693 if (e
== loop_latch_edge (loop
)
3694 && latch_index
< bound_index
)
3695 latch_index
= bound_index
;
3696 else if (!(entry
= block_priority
.get (e
->dest
)))
3699 block_priority
.put (e
->dest
, bound_index
);
3701 else if (*entry
< bound_index
)
3704 *entry
= bound_index
;
3708 queues
[bound_index
].safe_push (e
->dest
);
3712 queues
[queue_index
].release ();
3715 gcc_assert (latch_index
>= 0);
3716 if ((unsigned)latch_index
< bounds
.length ())
3718 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3720 fprintf (dump_file
, "Found better loop bound ");
3721 print_decu (bounds
[latch_index
], dump_file
);
3722 fprintf (dump_file
, "\n");
3724 record_niter_bound (loop
, bounds
[latch_index
], false, true);
3730 /* See if every path cross the loop goes through a statement that is known
3731 to not execute at the last iteration. In that case we can decrese iteration
3735 maybe_lower_iteration_bound (struct loop
*loop
)
3737 hash_set
<gimple
*> *not_executed_last_iteration
= NULL
;
3738 struct nb_iter_bound
*elt
;
3739 bool found_exit
= false;
3740 auto_vec
<basic_block
> queue
;
3743 /* Collect all statements with interesting (i.e. lower than
3744 nb_iterations_upper_bound) bound on them.
3746 TODO: Due to the way record_estimate choose estimates to store, the bounds
3747 will be always nb_iterations_upper_bound-1. We can change this to record
3748 also statements not dominating the loop latch and update the walk bellow
3749 to the shortest path algorithm. */
3750 for (elt
= loop
->bounds
; elt
; elt
= elt
->next
)
3753 && wi::ltu_p (elt
->bound
, loop
->nb_iterations_upper_bound
))
3755 if (!not_executed_last_iteration
)
3756 not_executed_last_iteration
= new hash_set
<gimple
*>;
3757 not_executed_last_iteration
->add (elt
->stmt
);
3760 if (!not_executed_last_iteration
)
3763 /* Start DFS walk in the loop header and see if we can reach the
3764 loop latch or any of the exits (including statements with side
3765 effects that may terminate the loop otherwise) without visiting
3766 any of the statements known to have undefined effect on the last
3768 queue
.safe_push (loop
->header
);
3769 visited
= BITMAP_ALLOC (NULL
);
3770 bitmap_set_bit (visited
, loop
->header
->index
);
3775 basic_block bb
= queue
.pop ();
3776 gimple_stmt_iterator gsi
;
3777 bool stmt_found
= false;
3779 /* Loop for possible exits and statements bounding the execution. */
3780 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3782 gimple
*stmt
= gsi_stmt (gsi
);
3783 if (not_executed_last_iteration
->contains (stmt
))
3788 if (gimple_has_side_effects (stmt
))
3797 /* If no bounding statement is found, continue the walk. */
3803 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3805 if (loop_exit_edge_p (loop
, e
)
3806 || e
== loop_latch_edge (loop
))
3811 if (bitmap_set_bit (visited
, e
->dest
->index
))
3812 queue
.safe_push (e
->dest
);
3816 while (queue
.length () && !found_exit
);
3818 /* If every path through the loop reach bounding statement before exit,
3819 then we know the last iteration of the loop will have undefined effect
3820 and we can decrease number of iterations. */
3824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3825 fprintf (dump_file
, "Reducing loop iteration estimate by 1; "
3826 "undefined statement must be executed at the last iteration.\n");
3827 record_niter_bound (loop
, loop
->nb_iterations_upper_bound
- 1,
3831 BITMAP_FREE (visited
);
3832 delete not_executed_last_iteration
;
3835 /* Records estimates on numbers of iterations of LOOP. If USE_UNDEFINED_P
3836 is true also use estimates derived from undefined behavior. */
3839 estimate_numbers_of_iterations_loop (struct loop
*loop
)
3844 struct tree_niter_desc niter_desc
;
3849 /* Give up if we already have tried to compute an estimation. */
3850 if (loop
->estimate_state
!= EST_NOT_COMPUTED
)
3853 loop
->estimate_state
= EST_AVAILABLE
;
3855 /* If we have a measured profile, use it to estimate the number of
3856 iterations. Normally this is recorded by branch_prob right after
3857 reading the profile. In case we however found a new loop, record the
3860 Explicitly check for profile status so we do not report
3861 wrong prediction hitrates for guessed loop iterations heuristics.
3862 Do not recompute already recorded bounds - we ought to be better on
3863 updating iteration bounds than updating profile in general and thus
3864 recomputing iteration bounds later in the compilation process will just
3865 introduce random roundoff errors. */
3866 if (!loop
->any_estimate
3867 && loop
->header
->count
!= 0
3868 && profile_status_for_fn (cfun
) >= PROFILE_READ
)
3870 gcov_type nit
= expected_loop_iterations_unbounded (loop
);
3871 bound
= gcov_type_to_wide_int (nit
);
3872 record_niter_bound (loop
, bound
, true, false);
3875 /* Ensure that loop->nb_iterations is computed if possible. If it turns out
3876 to be constant, we avoid undefined behavior implied bounds and instead
3877 diagnose those loops with -Waggressive-loop-optimizations. */
3878 number_of_latch_executions (loop
);
3880 exits
= get_loop_exit_edges (loop
);
3881 likely_exit
= single_likely_exit (loop
);
3882 FOR_EACH_VEC_ELT (exits
, i
, ex
)
3884 if (!number_of_iterations_exit (loop
, ex
, &niter_desc
, false, false))
3887 niter
= niter_desc
.niter
;
3888 type
= TREE_TYPE (niter
);
3889 if (TREE_CODE (niter_desc
.may_be_zero
) != INTEGER_CST
)
3890 niter
= build3 (COND_EXPR
, type
, niter_desc
.may_be_zero
,
3891 build_int_cst (type
, 0),
3893 record_estimate (loop
, niter
, niter_desc
.max
,
3894 last_stmt (ex
->src
),
3895 true, ex
== likely_exit
, true);
3896 record_control_iv (loop
, &niter_desc
);
3900 if (flag_aggressive_loop_optimizations
)
3901 infer_loop_bounds_from_undefined (loop
);
3903 discover_iteration_bound_by_body_walk (loop
);
3905 maybe_lower_iteration_bound (loop
);
3907 /* If we know the exact number of iterations of this loop, try to
3908 not break code with undefined behavior by not recording smaller
3909 maximum number of iterations. */
3910 if (loop
->nb_iterations
3911 && TREE_CODE (loop
->nb_iterations
) == INTEGER_CST
)
3913 loop
->any_upper_bound
= true;
3914 loop
->nb_iterations_upper_bound
= wi::to_widest (loop
->nb_iterations
);
3918 /* Sets NIT to the estimated number of executions of the latch of the
3919 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
3920 large as the number of iterations. If we have no reliable estimate,
3921 the function returns false, otherwise returns true. */
3924 estimated_loop_iterations (struct loop
*loop
, widest_int
*nit
)
3926 /* When SCEV information is available, try to update loop iterations
3927 estimate. Otherwise just return whatever we recorded earlier. */
3928 if (scev_initialized_p ())
3929 estimate_numbers_of_iterations_loop (loop
);
3931 return (get_estimated_loop_iterations (loop
, nit
));
3934 /* Similar to estimated_loop_iterations, but returns the estimate only
3935 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3936 on the number of iterations of LOOP could not be derived, returns -1. */
3939 estimated_loop_iterations_int (struct loop
*loop
)
3942 HOST_WIDE_INT hwi_nit
;
3944 if (!estimated_loop_iterations (loop
, &nit
))
3947 if (!wi::fits_shwi_p (nit
))
3949 hwi_nit
= nit
.to_shwi ();
3951 return hwi_nit
< 0 ? -1 : hwi_nit
;
3955 /* Sets NIT to an upper bound for the maximum number of executions of the
3956 latch of the LOOP. If we have no reliable estimate, the function returns
3957 false, otherwise returns true. */
3960 max_loop_iterations (struct loop
*loop
, widest_int
*nit
)
3962 /* When SCEV information is available, try to update loop iterations
3963 estimate. Otherwise just return whatever we recorded earlier. */
3964 if (scev_initialized_p ())
3965 estimate_numbers_of_iterations_loop (loop
);
3967 return get_max_loop_iterations (loop
, nit
);
3970 /* Similar to max_loop_iterations, but returns the estimate only
3971 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
3972 on the number of iterations of LOOP could not be derived, returns -1. */
3975 max_loop_iterations_int (struct loop
*loop
)
3978 HOST_WIDE_INT hwi_nit
;
3980 if (!max_loop_iterations (loop
, &nit
))
3983 if (!wi::fits_shwi_p (nit
))
3985 hwi_nit
= nit
.to_shwi ();
3987 return hwi_nit
< 0 ? -1 : hwi_nit
;
3990 /* Sets NIT to an likely upper bound for the maximum number of executions of the
3991 latch of the LOOP. If we have no reliable estimate, the function returns
3992 false, otherwise returns true. */
3995 likely_max_loop_iterations (struct loop
*loop
, widest_int
*nit
)
3997 /* When SCEV information is available, try to update loop iterations
3998 estimate. Otherwise just return whatever we recorded earlier. */
3999 if (scev_initialized_p ())
4000 estimate_numbers_of_iterations_loop (loop
);
4002 return get_likely_max_loop_iterations (loop
, nit
);
4005 /* Similar to max_loop_iterations, but returns the estimate only
4006 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
4007 on the number of iterations of LOOP could not be derived, returns -1. */
4010 likely_max_loop_iterations_int (struct loop
*loop
)
4013 HOST_WIDE_INT hwi_nit
;
4015 if (!likely_max_loop_iterations (loop
, &nit
))
4018 if (!wi::fits_shwi_p (nit
))
4020 hwi_nit
= nit
.to_shwi ();
4022 return hwi_nit
< 0 ? -1 : hwi_nit
;
4025 /* Returns an estimate for the number of executions of statements
4026 in the LOOP. For statements before the loop exit, this exceeds
4027 the number of execution of the latch by one. */
4030 estimated_stmt_executions_int (struct loop
*loop
)
4032 HOST_WIDE_INT nit
= estimated_loop_iterations_int (loop
);
4038 snit
= (HOST_WIDE_INT
) ((unsigned HOST_WIDE_INT
) nit
+ 1);
4040 /* If the computation overflows, return -1. */
4041 return snit
< 0 ? -1 : snit
;
4044 /* Sets NIT to the maximum number of executions of the latch of the
4045 LOOP, plus one. If we have no reliable estimate, the function returns
4046 false, otherwise returns true. */
4049 max_stmt_executions (struct loop
*loop
, widest_int
*nit
)
4051 widest_int nit_minus_one
;
4053 if (!max_loop_iterations (loop
, nit
))
4056 nit_minus_one
= *nit
;
4060 return wi::gtu_p (*nit
, nit_minus_one
);
4063 /* Sets NIT to the estimated maximum number of executions of the latch of the
4064 LOOP, plus one. If we have no likely estimate, the function returns
4065 false, otherwise returns true. */
4068 likely_max_stmt_executions (struct loop
*loop
, widest_int
*nit
)
4070 widest_int nit_minus_one
;
4072 if (!likely_max_loop_iterations (loop
, nit
))
4075 nit_minus_one
= *nit
;
4079 return wi::gtu_p (*nit
, nit_minus_one
);
4082 /* Sets NIT to the estimated number of executions of the latch of the
4083 LOOP, plus one. If we have no reliable estimate, the function returns
4084 false, otherwise returns true. */
4087 estimated_stmt_executions (struct loop
*loop
, widest_int
*nit
)
4089 widest_int nit_minus_one
;
4091 if (!estimated_loop_iterations (loop
, nit
))
4094 nit_minus_one
= *nit
;
4098 return wi::gtu_p (*nit
, nit_minus_one
);
4101 /* Records estimates on numbers of iterations of loops. */
4104 estimate_numbers_of_iterations (void)
4108 /* We don't want to issue signed overflow warnings while getting
4109 loop iteration estimates. */
4110 fold_defer_overflow_warnings ();
4112 FOR_EACH_LOOP (loop
, 0)
4114 estimate_numbers_of_iterations_loop (loop
);
4117 fold_undefer_and_ignore_overflow_warnings ();
4120 /* Returns true if statement S1 dominates statement S2. */
4123 stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
4125 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
4133 gimple_stmt_iterator bsi
;
4135 if (gimple_code (s2
) == GIMPLE_PHI
)
4138 if (gimple_code (s1
) == GIMPLE_PHI
)
4141 for (bsi
= gsi_start_bb (bb1
); gsi_stmt (bsi
) != s2
; gsi_next (&bsi
))
4142 if (gsi_stmt (bsi
) == s1
)
4148 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
4151 /* Returns true when we can prove that the number of executions of
4152 STMT in the loop is at most NITER, according to the bound on
4153 the number of executions of the statement NITER_BOUND->stmt recorded in
4154 NITER_BOUND and fact that NITER_BOUND->stmt dominate STMT.
4156 ??? This code can become quite a CPU hog - we can have many bounds,
4157 and large basic block forcing stmt_dominates_stmt_p to be queried
4158 many times on a large basic blocks, so the whole thing is O(n^2)
4159 for scev_probably_wraps_p invocation (that can be done n times).
4161 It would make more sense (and give better answers) to remember BB
4162 bounds computed by discover_iteration_bound_by_body_walk. */
4165 n_of_executions_at_most (gimple
*stmt
,
4166 struct nb_iter_bound
*niter_bound
,
4169 widest_int bound
= niter_bound
->bound
;
4170 tree nit_type
= TREE_TYPE (niter
), e
;
4173 gcc_assert (TYPE_UNSIGNED (nit_type
));
4175 /* If the bound does not even fit into NIT_TYPE, it cannot tell us that
4176 the number of iterations is small. */
4177 if (!wi::fits_to_tree_p (bound
, nit_type
))
4180 /* We know that NITER_BOUND->stmt is executed at most NITER_BOUND->bound + 1
4181 times. This means that:
4183 -- if NITER_BOUND->is_exit is true, then everything after
4184 it at most NITER_BOUND->bound times.
4186 -- If NITER_BOUND->is_exit is false, then if we can prove that when STMT
4187 is executed, then NITER_BOUND->stmt is executed as well in the same
4188 iteration then STMT is executed at most NITER_BOUND->bound + 1 times.
4190 If we can determine that NITER_BOUND->stmt is always executed
4191 after STMT, then STMT is executed at most NITER_BOUND->bound + 2 times.
4192 We conclude that if both statements belong to the same
4193 basic block and STMT is before NITER_BOUND->stmt and there are no
4194 statements with side effects in between. */
4196 if (niter_bound
->is_exit
)
4198 if (stmt
== niter_bound
->stmt
4199 || !stmt_dominates_stmt_p (niter_bound
->stmt
, stmt
))
4205 if (!stmt_dominates_stmt_p (niter_bound
->stmt
, stmt
))
4207 gimple_stmt_iterator bsi
;
4208 if (gimple_bb (stmt
) != gimple_bb (niter_bound
->stmt
)
4209 || gimple_code (stmt
) == GIMPLE_PHI
4210 || gimple_code (niter_bound
->stmt
) == GIMPLE_PHI
)
4213 /* By stmt_dominates_stmt_p we already know that STMT appears
4214 before NITER_BOUND->STMT. Still need to test that the loop
4215 can not be terinated by a side effect in between. */
4216 for (bsi
= gsi_for_stmt (stmt
); gsi_stmt (bsi
) != niter_bound
->stmt
;
4218 if (gimple_has_side_effects (gsi_stmt (bsi
)))
4222 || !wi::fits_to_tree_p (bound
, nit_type
))
4228 e
= fold_binary (cmp
, boolean_type_node
,
4229 niter
, wide_int_to_tree (nit_type
, bound
));
4230 return e
&& integer_nonzerop (e
);
4233 /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
4236 nowrap_type_p (tree type
)
4238 if (ANY_INTEGRAL_TYPE_P (type
)
4239 && TYPE_OVERFLOW_UNDEFINED (type
))
4242 if (POINTER_TYPE_P (type
))
4248 /* Return true if we can prove LOOP is exited before evolution of induction
4249 variable {BASE, STEP} overflows with respect to its type bound. */
4252 loop_exits_before_overflow (tree base
, tree step
,
4253 gimple
*at_stmt
, struct loop
*loop
)
4256 struct control_iv
*civ
;
4257 struct nb_iter_bound
*bound
;
4258 tree e
, delta
, step_abs
, unsigned_base
;
4259 tree type
= TREE_TYPE (step
);
4260 tree unsigned_type
, valid_niter
;
4262 /* Don't issue signed overflow warnings. */
4263 fold_defer_overflow_warnings ();
4265 /* Compute the number of iterations before we reach the bound of the
4266 type, and verify that the loop is exited before this occurs. */
4267 unsigned_type
= unsigned_type_for (type
);
4268 unsigned_base
= fold_convert (unsigned_type
, base
);
4270 if (tree_int_cst_sign_bit (step
))
4272 tree extreme
= fold_convert (unsigned_type
,
4273 lower_bound_in_type (type
, type
));
4274 delta
= fold_build2 (MINUS_EXPR
, unsigned_type
, unsigned_base
, extreme
);
4275 step_abs
= fold_build1 (NEGATE_EXPR
, unsigned_type
,
4276 fold_convert (unsigned_type
, step
));
4280 tree extreme
= fold_convert (unsigned_type
,
4281 upper_bound_in_type (type
, type
));
4282 delta
= fold_build2 (MINUS_EXPR
, unsigned_type
, extreme
, unsigned_base
);
4283 step_abs
= fold_convert (unsigned_type
, step
);
4286 valid_niter
= fold_build2 (FLOOR_DIV_EXPR
, unsigned_type
, delta
, step_abs
);
4288 estimate_numbers_of_iterations_loop (loop
);
4290 if (max_loop_iterations (loop
, &niter
)
4291 && wi::fits_to_tree_p (niter
, TREE_TYPE (valid_niter
))
4292 && (e
= fold_binary (GT_EXPR
, boolean_type_node
, valid_niter
,
4293 wide_int_to_tree (TREE_TYPE (valid_niter
),
4295 && integer_nonzerop (e
))
4297 fold_undefer_and_ignore_overflow_warnings ();
4301 for (bound
= loop
->bounds
; bound
; bound
= bound
->next
)
4303 if (n_of_executions_at_most (at_stmt
, bound
, valid_niter
))
4305 fold_undefer_and_ignore_overflow_warnings ();
4309 fold_undefer_and_ignore_overflow_warnings ();
4311 /* Try to prove loop is exited before {base, step} overflows with the
4312 help of analyzed loop control IV. This is done only for IVs with
4313 constant step because otherwise we don't have the information. */
4314 if (TREE_CODE (step
) == INTEGER_CST
)
4316 for (civ
= loop
->control_ivs
; civ
; civ
= civ
->next
)
4318 enum tree_code code
;
4319 tree civ_type
= TREE_TYPE (civ
->step
);
4321 /* Have to consider type difference because operand_equal_p ignores
4322 that for constants. */
4323 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (civ_type
)
4324 || element_precision (type
) != element_precision (civ_type
))
4327 /* Only consider control IV with same step. */
4328 if (!operand_equal_p (step
, civ
->step
, 0))
4331 /* Done proving if this is a no-overflow control IV. */
4332 if (operand_equal_p (base
, civ
->base
, 0))
4335 /* Control IV is recorded after expanding simple operations,
4336 Here we expand base and compare it too. */
4337 tree expanded_base
= expand_simple_operations (base
);
4338 if (operand_equal_p (expanded_base
, civ
->base
, 0))
4341 /* If this is a before stepping control IV, in other words, we have
4343 {civ_base, step} = {base + step, step}
4345 Because civ {base + step, step} doesn't overflow during loop
4346 iterations, {base, step} will not overflow if we can prove the
4347 operation "base + step" does not overflow. Specifically, we try
4348 to prove below conditions are satisfied:
4350 base <= UPPER_BOUND (type) - step ;;step > 0
4351 base >= LOWER_BOUND (type) - step ;;step < 0
4353 by proving the reverse conditions are false using loop's initial
4355 if (POINTER_TYPE_P (TREE_TYPE (base
)))
4356 code
= POINTER_PLUS_EXPR
;
4360 tree stepped
= fold_build2 (code
, TREE_TYPE (base
), base
, step
);
4361 tree expanded_stepped
= fold_build2 (code
, TREE_TYPE (base
),
4362 expanded_base
, step
);
4363 if (operand_equal_p (stepped
, civ
->base
, 0)
4364 || operand_equal_p (expanded_stepped
, civ
->base
, 0))
4368 if (tree_int_cst_sign_bit (step
))
4371 extreme
= lower_bound_in_type (type
, type
);
4376 extreme
= upper_bound_in_type (type
, type
);
4378 extreme
= fold_build2 (MINUS_EXPR
, type
, extreme
, step
);
4379 e
= fold_build2 (code
, boolean_type_node
, base
, extreme
);
4380 e
= simplify_using_initial_conditions (loop
, e
);
4381 if (integer_zerop (e
))
4390 /* VAR is scev variable whose evolution part is constant STEP, this function
4391 proves that VAR can't overflow by using value range info. If VAR's value
4392 range is [MIN, MAX], it can be proven by:
4393 MAX + step doesn't overflow ; if step > 0
4395 MIN + step doesn't underflow ; if step < 0.
4397 We can only do this if var is computed in every loop iteration, i.e, var's
4398 definition has to dominate loop latch. Consider below example:
4406 # RANGE [0, 4294967294] NONZERO 65535
4407 # i_21 = PHI <0(3), i_18(9)>
4414 # RANGE [0, 65533] NONZERO 65535
4415 _6 = i_21 + 4294967295;
4416 # RANGE [0, 65533] NONZERO 65535
4417 _7 = (long unsigned int) _6;
4418 # RANGE [0, 524264] NONZERO 524280
4420 # PT = nonlocal escaped
4425 # RANGE [1, 65535] NONZERO 65535
4439 VAR _6 doesn't overflow only with pre-condition (i_21 != 0), here we
4440 can't use _6 to prove no-overlfow for _7. In fact, var _7 takes value
4441 sequence (4294967295, 0, 1, ..., 65533) in loop life time, rather than
4442 (4294967295, 4294967296, ...). */
4445 scev_var_range_cant_overflow (tree var
, tree step
, struct loop
*loop
)
4448 wide_int minv
, maxv
, diff
, step_wi
;
4449 enum value_range_type rtype
;
4451 if (TREE_CODE (step
) != INTEGER_CST
|| !INTEGRAL_TYPE_P (TREE_TYPE (var
)))
4454 /* Check if VAR evaluates in every loop iteration. It's not the case
4455 if VAR is default definition or does not dominate loop's latch. */
4456 basic_block def_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
4457 if (!def_bb
|| !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, def_bb
))
4460 rtype
= get_range_info (var
, &minv
, &maxv
);
4461 if (rtype
!= VR_RANGE
)
4464 /* VAR is a scev whose evolution part is STEP and value range info
4465 is [MIN, MAX], we can prove its no-overflowness by conditions:
4467 type_MAX - MAX >= step ; if step > 0
4468 MIN - type_MIN >= |step| ; if step < 0.
4470 Or VAR must take value outside of value range, which is not true. */
4472 type
= TREE_TYPE (var
);
4473 if (tree_int_cst_sign_bit (step
))
4475 diff
= lower_bound_in_type (type
, type
);
4477 step_wi
= - step_wi
;
4481 diff
= upper_bound_in_type (type
, type
);
4485 return (wi::geu_p (diff
, step_wi
));
4488 /* Return false only when the induction variable BASE + STEP * I is
4489 known to not overflow: i.e. when the number of iterations is small
4490 enough with respect to the step and initial condition in order to
4491 keep the evolution confined in TYPEs bounds. Return true when the
4492 iv is known to overflow or when the property is not computable.
4494 USE_OVERFLOW_SEMANTICS is true if this function should assume that
4495 the rules for overflow of the given language apply (e.g., that signed
4496 arithmetics in C does not overflow).
4498 If VAR is a ssa variable, this function also returns false if VAR can
4499 be proven not overflow with value range info. */
4502 scev_probably_wraps_p (tree var
, tree base
, tree step
,
4503 gimple
*at_stmt
, struct loop
*loop
,
4504 bool use_overflow_semantics
)
4506 /* FIXME: We really need something like
4507 http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
4509 We used to test for the following situation that frequently appears
4510 during address arithmetics:
4512 D.1621_13 = (long unsigned intD.4) D.1620_12;
4513 D.1622_14 = D.1621_13 * 8;
4514 D.1623_15 = (doubleD.29 *) D.1622_14;
4516 And derived that the sequence corresponding to D_14
4517 can be proved to not wrap because it is used for computing a
4518 memory access; however, this is not really the case -- for example,
4519 if D_12 = (unsigned char) [254,+,1], then D_14 has values
4520 2032, 2040, 0, 8, ..., but the code is still legal. */
4522 if (chrec_contains_undetermined (base
)
4523 || chrec_contains_undetermined (step
))
4526 if (integer_zerop (step
))
4529 /* If we can use the fact that signed and pointer arithmetics does not
4530 wrap, we are done. */
4531 if (use_overflow_semantics
&& nowrap_type_p (TREE_TYPE (base
)))
4534 /* To be able to use estimates on number of iterations of the loop,
4535 we must have an upper bound on the absolute value of the step. */
4536 if (TREE_CODE (step
) != INTEGER_CST
)
4539 /* Check if var can be proven not overflow with value range info. */
4540 if (var
&& TREE_CODE (var
) == SSA_NAME
4541 && scev_var_range_cant_overflow (var
, step
, loop
))
4544 if (loop_exits_before_overflow (base
, step
, at_stmt
, loop
))
4547 /* At this point we still don't have a proof that the iv does not
4548 overflow: give up. */
4552 /* Frees the information on upper bounds on numbers of iterations of LOOP. */
4555 free_numbers_of_iterations_estimates_loop (struct loop
*loop
)
4557 struct control_iv
*civ
;
4558 struct nb_iter_bound
*bound
;
4560 loop
->nb_iterations
= NULL
;
4561 loop
->estimate_state
= EST_NOT_COMPUTED
;
4562 for (bound
= loop
->bounds
; bound
;)
4564 struct nb_iter_bound
*next
= bound
->next
;
4568 loop
->bounds
= NULL
;
4570 for (civ
= loop
->control_ivs
; civ
;)
4572 struct control_iv
*next
= civ
->next
;
4576 loop
->control_ivs
= NULL
;
4579 /* Frees the information on upper bounds on numbers of iterations of loops. */
4582 free_numbers_of_iterations_estimates (function
*fn
)
4586 FOR_EACH_LOOP_FN (fn
, loop
, 0)
4588 free_numbers_of_iterations_estimates_loop (loop
);
4592 /* Substitute value VAL for ssa name NAME inside expressions held
4596 substitute_in_loop_info (struct loop
*loop
, tree name
, tree val
)
4598 loop
->nb_iterations
= simplify_replace_tree (loop
->nb_iterations
, name
, val
);