gccrs: dump: Fix module dumping
[official-gcc.git] / gcc / tree-scalar-evolution.cc
blob526b7f68768a9b6261b2c8d20d4abd0cd18a5036
1 /* Scalar evolution detector.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <s.pop@laposte.net>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 Description:
24 This pass analyzes the evolution of scalar variables in loop
25 structures. The algorithm is based on the SSA representation,
26 and on the loop hierarchy tree. This algorithm is not based on
27 the notion of versions of a variable, as it was the case for the
28 previous implementations of the scalar evolution algorithm, but
29 it assumes that each defined name is unique.
31 The notation used in this file is called "chains of recurrences",
32 and has been proposed by Eugene Zima, Robert Van Engelen, and
33 others for describing induction variables in programs. For example
34 "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0
35 when entering in the loop_1 and has a step 2 in this loop, in other
36 words "for (b = 0; b < N; b+=2);". Note that the coefficients of
37 this chain of recurrence (or chrec [shrek]) can contain the name of
38 other variables, in which case they are called parametric chrecs.
39 For example, "b -> {a, +, 2}_1" means that the initial value of "b"
40 is the value of "a". In most of the cases these parametric chrecs
41 are fully instantiated before their use because symbolic names can
42 hide some difficult cases such as self-references described later
43 (see the Fibonacci example).
45 A short sketch of the algorithm is:
47 Given a scalar variable to be analyzed, follow the SSA edge to
48 its definition:
50 - When the definition is a GIMPLE_ASSIGN: if the right hand side
51 (RHS) of the definition cannot be statically analyzed, the answer
52 of the analyzer is: "don't know".
53 Otherwise, for all the variables that are not yet analyzed in the
54 RHS, try to determine their evolution, and finally try to
55 evaluate the operation of the RHS that gives the evolution
56 function of the analyzed variable.
58 - When the definition is a condition-phi-node: determine the
59 evolution function for all the branches of the phi node, and
60 finally merge these evolutions (see chrec_merge).
62 - When the definition is a loop-phi-node: determine its initial
63 condition, that is the SSA edge defined in an outer loop, and
64 keep it symbolic. Then determine the SSA edges that are defined
65 in the body of the loop. Follow the inner edges until ending on
66 another loop-phi-node of the same analyzed loop. If the reached
67 loop-phi-node is not the starting loop-phi-node, then we keep
68 this definition under a symbolic form. If the reached
69 loop-phi-node is the same as the starting one, then we compute a
70 symbolic stride on the return path. The result is then the
71 symbolic chrec {initial_condition, +, symbolic_stride}_loop.
73 Examples:
75 Example 1: Illustration of the basic algorithm.
77 | a = 3
78 | loop_1
79 | b = phi (a, c)
80 | c = b + 1
81 | if (c > 10) exit_loop
82 | endloop
84 Suppose that we want to know the number of iterations of the
85 loop_1. The exit_loop is controlled by a COND_EXPR (c > 10). We
86 ask the scalar evolution analyzer two questions: what's the
87 scalar evolution (scev) of "c", and what's the scev of "10". For
88 "10" the answer is "10" since it is a scalar constant. For the
89 scalar variable "c", it follows the SSA edge to its definition,
90 "c = b + 1", and then asks again what's the scev of "b".
91 Following the SSA edge, we end on a loop-phi-node "b = phi (a,
92 c)", where the initial condition is "a", and the inner loop edge
93 is "c". The initial condition is kept under a symbolic form (it
94 may be the case that the copy constant propagation has done its
95 work and we end with the constant "3" as one of the edges of the
96 loop-phi-node). The update edge is followed to the end of the
97 loop, and until reaching again the starting loop-phi-node: b -> c
98 -> b. At this point we have drawn a path from "b" to "b" from
99 which we compute the stride in the loop: in this example it is
100 "+1". The resulting scev for "b" is "b -> {a, +, 1}_1". Now
101 that the scev for "b" is known, it is possible to compute the
102 scev for "c", that is "c -> {a + 1, +, 1}_1". In order to
103 determine the number of iterations in the loop_1, we have to
104 instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some
105 more analysis the scev {4, +, 1}_1, or in other words, this is
106 the function "f (x) = x + 4", where x is the iteration count of
107 the loop_1. Now we have to solve the inequality "x + 4 > 10",
108 and take the smallest iteration number for which the loop is
109 exited: x = 7. This loop runs from x = 0 to x = 7, and in total
110 there are 8 iterations. In terms of loop normalization, we have
111 created a variable that is implicitly defined, "x" or just "_1",
112 and all the other analyzed scalars of the loop are defined in
113 function of this variable:
115 a -> 3
116 b -> {3, +, 1}_1
117 c -> {4, +, 1}_1
119 or in terms of a C program:
121 | a = 3
122 | for (x = 0; x <= 7; x++)
124 | b = x + 3
125 | c = x + 4
128 Example 2a: Illustration of the algorithm on nested loops.
130 | loop_1
131 | a = phi (1, b)
132 | c = a + 2
133 | loop_2 10 times
134 | b = phi (c, d)
135 | d = b + 3
136 | endloop
137 | endloop
139 For analyzing the scalar evolution of "a", the algorithm follows
140 the SSA edge into the loop's body: "a -> b". "b" is an inner
141 loop-phi-node, and its analysis as in Example 1, gives:
143 b -> {c, +, 3}_2
144 d -> {c + 3, +, 3}_2
146 Following the SSA edge for the initial condition, we end on "c = a
147 + 2", and then on the starting loop-phi-node "a". From this point,
148 the loop stride is computed: back on "c = a + 2" we get a "+2" in
149 the loop_1, then on the loop-phi-node "b" we compute the overall
150 effect of the inner loop that is "b = c + 30", and we get a "+30"
151 in the loop_1. That means that the overall stride in loop_1 is
152 equal to "+32", and the result is:
154 a -> {1, +, 32}_1
155 c -> {3, +, 32}_1
157 Example 2b: Multivariate chains of recurrences.
159 | loop_1
160 | k = phi (0, k + 1)
161 | loop_2 4 times
162 | j = phi (0, j + 1)
163 | loop_3 4 times
164 | i = phi (0, i + 1)
165 | A[j + k] = ...
166 | endloop
167 | endloop
168 | endloop
170 Analyzing the access function of array A with
171 instantiate_parameters (loop_1, "j + k"), we obtain the
172 instantiation and the analysis of the scalar variables "j" and "k"
173 in loop_1. This leads to the scalar evolution {4, +, 1}_1: the end
174 value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is
175 {0, +, 1}_1. To obtain the evolution function in loop_3 and
176 instantiate the scalar variables up to loop_1, one has to use:
177 instantiate_scev (block_before_loop (loop_1), loop_3, "j + k").
178 The result of this call is {{0, +, 1}_1, +, 1}_2.
180 Example 3: Higher degree polynomials.
182 | loop_1
183 | a = phi (2, b)
184 | c = phi (5, d)
185 | b = a + 1
186 | d = c + a
187 | endloop
189 a -> {2, +, 1}_1
190 b -> {3, +, 1}_1
191 c -> {5, +, a}_1
192 d -> {5 + a, +, a}_1
194 instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1
195 instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1
197 Example 4: Lucas, Fibonacci, or mixers in general.
199 | loop_1
200 | a = phi (1, b)
201 | c = phi (3, d)
202 | b = c
203 | d = c + a
204 | endloop
206 a -> (1, c)_1
207 c -> {3, +, a}_1
209 The syntax "(1, c)_1" stands for a PEELED_CHREC that has the
210 following semantics: during the first iteration of the loop_1, the
211 variable contains the value 1, and then it contains the value "c".
212 Note that this syntax is close to the syntax of the loop-phi-node:
213 "a -> (1, c)_1" vs. "a = phi (1, c)".
215 The symbolic chrec representation contains all the semantics of the
216 original code. What is more difficult is to use this information.
218 Example 5: Flip-flops, or exchangers.
220 | loop_1
221 | a = phi (1, b)
222 | c = phi (3, d)
223 | b = c
224 | d = a
225 | endloop
227 a -> (1, c)_1
228 c -> (3, a)_1
230 Based on these symbolic chrecs, it is possible to refine this
231 information into the more precise PERIODIC_CHRECs:
233 a -> |1, 3|_1
234 c -> |3, 1|_1
236 This transformation is not yet implemented.
238 Further readings:
240 You can find a more detailed description of the algorithm in:
241 http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf
242 http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz. But note that
243 this is a preliminary report and some of the details of the
244 algorithm have changed. I'm working on a research report that
245 updates the description of the algorithms to reflect the design
246 choices used in this implementation.
248 A set of slides show a high level overview of the algorithm and run
249 an example through the scalar evolution analyzer:
250 http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf
252 The slides that I have presented at the GCC Summit'04 are available
253 at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf
256 #include "config.h"
257 #include "system.h"
258 #include "coretypes.h"
259 #include "backend.h"
260 #include "target.h"
261 #include "rtl.h"
262 #include "optabs-query.h"
263 #include "tree.h"
264 #include "gimple.h"
265 #include "ssa.h"
266 #include "gimple-pretty-print.h"
267 #include "fold-const.h"
268 #include "gimplify.h"
269 #include "gimple-iterator.h"
270 #include "gimplify-me.h"
271 #include "tree-cfg.h"
272 #include "tree-ssa-loop-ivopts.h"
273 #include "tree-ssa-loop-manip.h"
274 #include "tree-ssa-loop-niter.h"
275 #include "tree-ssa-loop.h"
276 #include "tree-ssa.h"
277 #include "cfgloop.h"
278 #include "tree-chrec.h"
279 #include "tree-affine.h"
280 #include "tree-scalar-evolution.h"
281 #include "dumpfile.h"
282 #include "tree-ssa-propagate.h"
283 #include "gimple-fold.h"
284 #include "tree-into-ssa.h"
285 #include "builtins.h"
286 #include "case-cfn-macros.h"
288 static tree analyze_scalar_evolution_1 (class loop *, tree);
289 static tree analyze_scalar_evolution_for_address_of (class loop *loop,
290 tree var);
292 /* The cached information about an SSA name with version NAME_VERSION,
293 claiming that below basic block with index INSTANTIATED_BELOW, the
294 value of the SSA name can be expressed as CHREC. */
296 struct GTY((for_user)) scev_info_str {
297 unsigned int name_version;
298 int instantiated_below;
299 tree chrec;
302 /* Counters for the scev database. */
303 static unsigned nb_set_scev = 0;
304 static unsigned nb_get_scev = 0;
306 struct scev_info_hasher : ggc_ptr_hash<scev_info_str>
308 static hashval_t hash (scev_info_str *i);
309 static bool equal (const scev_info_str *a, const scev_info_str *b);
312 static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info;
315 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */
317 static inline struct scev_info_str *
318 new_scev_info_str (basic_block instantiated_below, tree var)
320 struct scev_info_str *res;
322 res = ggc_alloc<scev_info_str> ();
323 res->name_version = SSA_NAME_VERSION (var);
324 res->chrec = chrec_not_analyzed_yet;
325 res->instantiated_below = instantiated_below->index;
327 return res;
330 /* Computes a hash function for database element ELT. */
332 hashval_t
333 scev_info_hasher::hash (scev_info_str *elt)
335 return elt->name_version ^ elt->instantiated_below;
338 /* Compares database elements E1 and E2. */
340 bool
341 scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2)
343 return (elt1->name_version == elt2->name_version
344 && elt1->instantiated_below == elt2->instantiated_below);
347 /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block.
348 A first query on VAR returns chrec_not_analyzed_yet. */
350 static tree *
351 find_var_scev_info (basic_block instantiated_below, tree var)
353 struct scev_info_str *res;
354 struct scev_info_str tmp;
356 tmp.name_version = SSA_NAME_VERSION (var);
357 tmp.instantiated_below = instantiated_below->index;
358 scev_info_str **slot = scalar_evolution_info->find_slot (&tmp, INSERT);
360 if (!*slot)
361 *slot = new_scev_info_str (instantiated_below, var);
362 res = *slot;
364 return &res->chrec;
368 /* Hashtable helpers for a temporary hash-table used when
369 analyzing a scalar evolution, instantiating a CHREC or
370 resolving mixers. */
372 class instantiate_cache_type
374 public:
375 htab_t map;
376 vec<scev_info_str> entries;
378 instantiate_cache_type () : map (NULL), entries (vNULL) {}
379 ~instantiate_cache_type ();
380 tree get (unsigned slot) { return entries[slot].chrec; }
381 void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; }
384 instantiate_cache_type::~instantiate_cache_type ()
386 if (map != NULL)
388 htab_delete (map);
389 entries.release ();
393 /* Cache to avoid infinite recursion when instantiating an SSA name.
394 Live during the outermost analyze_scalar_evolution, instantiate_scev
395 or resolve_mixers call. */
396 static instantiate_cache_type *global_cache;
399 /* Return true when PHI is a loop-phi-node. */
401 static bool
402 loop_phi_node_p (gimple *phi)
404 /* The implementation of this function is based on the following
405 property: "all the loop-phi-nodes of a loop are contained in the
406 loop's header basic block". */
408 return loop_containing_stmt (phi)->header == gimple_bb (phi);
411 /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP.
412 In general, in the case of multivariate evolutions we want to get
413 the evolution in different loops. LOOP specifies the level for
414 which to get the evolution.
416 Example:
418 | for (j = 0; j < 100; j++)
420 | for (k = 0; k < 100; k++)
422 | i = k + j; - Here the value of i is a function of j, k.
424 | ... = i - Here the value of i is a function of j.
426 | ... = i - Here the value of i is a scalar.
428 Example:
430 | i_0 = ...
431 | loop_1 10 times
432 | i_1 = phi (i_0, i_2)
433 | i_2 = i_1 + 2
434 | endloop
436 This loop has the same effect as:
437 LOOP_1 has the same effect as:
439 | i_1 = i_0 + 20
441 The overall effect of the loop, "i_0 + 20" in the previous example,
442 is obtained by passing in the parameters: LOOP = 1,
443 EVOLUTION_FN = {i_0, +, 2}_1.
446 tree
447 compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn)
449 bool val = false;
451 if (evolution_fn == chrec_dont_know)
452 return chrec_dont_know;
454 else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC)
456 class loop *inner_loop = get_chrec_loop (evolution_fn);
458 if (inner_loop == loop
459 || flow_loop_nested_p (loop, inner_loop))
461 tree nb_iter = number_of_latch_executions (inner_loop);
463 if (nb_iter == chrec_dont_know)
464 return chrec_dont_know;
465 else
467 tree res;
469 /* evolution_fn is the evolution function in LOOP. Get
470 its value in the nb_iter-th iteration. */
471 res = chrec_apply (inner_loop->num, evolution_fn, nb_iter);
473 if (chrec_contains_symbols_defined_in_loop (res, loop->num))
474 res = instantiate_parameters (loop, res);
476 /* Continue the computation until ending on a parent of LOOP. */
477 return compute_overall_effect_of_inner_loop (loop, res);
480 else
481 return evolution_fn;
484 /* If the evolution function is an invariant, there is nothing to do. */
485 else if (no_evolution_in_loop_p (evolution_fn, loop->num, &val) && val)
486 return evolution_fn;
488 else
489 return chrec_dont_know;
492 /* Associate CHREC to SCALAR. */
494 static void
495 set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec)
497 tree *scalar_info;
499 if (TREE_CODE (scalar) != SSA_NAME)
500 return;
502 scalar_info = find_var_scev_info (instantiated_below, scalar);
504 if (dump_file)
506 if (dump_flags & TDF_SCEV)
508 fprintf (dump_file, "(set_scalar_evolution \n");
509 fprintf (dump_file, " instantiated_below = %d \n",
510 instantiated_below->index);
511 fprintf (dump_file, " (scalar = ");
512 print_generic_expr (dump_file, scalar);
513 fprintf (dump_file, ")\n (scalar_evolution = ");
514 print_generic_expr (dump_file, chrec);
515 fprintf (dump_file, "))\n");
517 if (dump_flags & TDF_STATS)
518 nb_set_scev++;
521 *scalar_info = chrec;
524 /* Retrieve the chrec associated to SCALAR instantiated below
525 INSTANTIATED_BELOW block. */
527 static tree
528 get_scalar_evolution (basic_block instantiated_below, tree scalar)
530 tree res;
532 if (dump_file)
534 if (dump_flags & TDF_SCEV)
536 fprintf (dump_file, "(get_scalar_evolution \n");
537 fprintf (dump_file, " (scalar = ");
538 print_generic_expr (dump_file, scalar);
539 fprintf (dump_file, ")\n");
541 if (dump_flags & TDF_STATS)
542 nb_get_scev++;
545 if (VECTOR_TYPE_P (TREE_TYPE (scalar))
546 || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE)
547 /* For chrec_dont_know we keep the symbolic form. */
548 res = scalar;
549 else
550 switch (TREE_CODE (scalar))
552 case SSA_NAME:
553 if (SSA_NAME_IS_DEFAULT_DEF (scalar))
554 res = scalar;
555 else
556 res = *find_var_scev_info (instantiated_below, scalar);
557 break;
559 case REAL_CST:
560 case FIXED_CST:
561 case INTEGER_CST:
562 res = scalar;
563 break;
565 default:
566 res = chrec_not_analyzed_yet;
567 break;
570 if (dump_file && (dump_flags & TDF_SCEV))
572 fprintf (dump_file, " (scalar_evolution = ");
573 print_generic_expr (dump_file, res);
574 fprintf (dump_file, "))\n");
577 return res;
581 /* Depth first search algorithm. */
583 enum t_bool {
584 t_false,
585 t_true,
586 t_dont_know
589 class scev_dfs
591 public:
592 scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_)
593 : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {}
594 t_bool get_ev (tree *, tree);
596 private:
597 t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int);
598 t_bool follow_ssa_edge_binary (gimple *at_stmt,
599 tree type, tree rhs0, enum tree_code code,
600 tree rhs1, tree *evolution_of_loop, int limit);
601 t_bool follow_ssa_edge_in_condition_phi_branch (int i,
602 gphi *condition_phi,
603 tree *evolution_of_branch,
604 tree init_cond, int limit);
605 t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi,
606 tree *evolution_of_loop, int limit);
607 t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
608 tree *evolution_of_loop, int limit);
609 tree add_to_evolution (tree chrec_before, enum tree_code code,
610 tree to_add, gimple *at_stmt);
611 tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt);
613 class loop *loop;
614 gphi *loop_phi_node;
615 tree init_cond;
618 t_bool
619 scev_dfs::get_ev (tree *ev_fn, tree arg)
621 *ev_fn = chrec_dont_know;
622 return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0);
625 /* Helper function for add_to_evolution. Returns the evolution
626 function for an assignment of the form "a = b + c", where "a" and
627 "b" are on the strongly connected component. CHREC_BEFORE is the
628 information that we already have collected up to this point.
629 TO_ADD is the evolution of "c".
631 When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
632 evolution the expression TO_ADD, otherwise construct an evolution
633 part for this loop. */
635 tree
636 scev_dfs::add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt)
638 tree type, left, right;
639 unsigned loop_nb = loop->num;
640 class loop *chloop;
642 switch (TREE_CODE (chrec_before))
644 case POLYNOMIAL_CHREC:
645 chloop = get_chrec_loop (chrec_before);
646 if (chloop == loop
647 || flow_loop_nested_p (chloop, loop))
649 unsigned var;
651 type = chrec_type (chrec_before);
653 /* When there is no evolution part in this loop, build it. */
654 if (chloop != loop)
656 var = loop_nb;
657 left = chrec_before;
658 right = SCALAR_FLOAT_TYPE_P (type)
659 ? build_real (type, dconst0)
660 : build_int_cst (type, 0);
662 else
664 var = CHREC_VARIABLE (chrec_before);
665 left = CHREC_LEFT (chrec_before);
666 right = CHREC_RIGHT (chrec_before);
669 to_add = chrec_convert (type, to_add, at_stmt);
670 right = chrec_convert_rhs (type, right, at_stmt);
671 right = chrec_fold_plus (chrec_type (right), right, to_add);
672 return build_polynomial_chrec (var, left, right);
674 else
676 gcc_assert (flow_loop_nested_p (loop, chloop));
678 /* Search the evolution in LOOP_NB. */
679 left = add_to_evolution_1 (CHREC_LEFT (chrec_before),
680 to_add, at_stmt);
681 right = CHREC_RIGHT (chrec_before);
682 right = chrec_convert_rhs (chrec_type (left), right, at_stmt);
683 return build_polynomial_chrec (CHREC_VARIABLE (chrec_before),
684 left, right);
687 default:
688 /* These nodes do not depend on a loop. */
689 if (chrec_before == chrec_dont_know)
690 return chrec_dont_know;
692 left = chrec_before;
693 right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt);
694 /* When we add the first evolution we need to replace the symbolic
695 evolution we've put in when the DFS reached the loop PHI node
696 with the initial value. There's only a limited cases of
697 extra operations ontop of that symbol allowed, namely
698 sign-conversions we can look through. For other cases we leave
699 the symbolic initial condition which causes build_polynomial_chrec
700 to return chrec_dont_know. See PR42512, PR66375 and PR107176 for
701 cases we mishandled before. */
702 STRIP_NOPS (chrec_before);
703 if (chrec_before == gimple_phi_result (loop_phi_node))
704 left = fold_convert (TREE_TYPE (left), init_cond);
705 return build_polynomial_chrec (loop_nb, left, right);
709 /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
710 of LOOP_NB.
712 Description (provided for completeness, for those who read code in
713 a plane, and for my poor 62 bytes brain that would have forgotten
714 all this in the next two or three months):
716 The algorithm of translation of programs from the SSA representation
717 into the chrecs syntax is based on a pattern matching. After having
718 reconstructed the overall tree expression for a loop, there are only
719 two cases that can arise:
721 1. a = loop-phi (init, a + expr)
722 2. a = loop-phi (init, expr)
724 where EXPR is either a scalar constant with respect to the analyzed
725 loop (this is a degree 0 polynomial), or an expression containing
726 other loop-phi definitions (these are higher degree polynomials).
728 Examples:
731 | init = ...
732 | loop_1
733 | a = phi (init, a + 5)
734 | endloop
737 | inita = ...
738 | initb = ...
739 | loop_1
740 | a = phi (inita, 2 * b + 3)
741 | b = phi (initb, b + 1)
742 | endloop
744 For the first case, the semantics of the SSA representation is:
746 | a (x) = init + \sum_{j = 0}^{x - 1} expr (j)
748 that is, there is a loop index "x" that determines the scalar value
749 of the variable during the loop execution. During the first
750 iteration, the value is that of the initial condition INIT, while
751 during the subsequent iterations, it is the sum of the initial
752 condition with the sum of all the values of EXPR from the initial
753 iteration to the before last considered iteration.
755 For the second case, the semantics of the SSA program is:
757 | a (x) = init, if x = 0;
758 | expr (x - 1), otherwise.
760 The second case corresponds to the PEELED_CHREC, whose syntax is
761 close to the syntax of a loop-phi-node:
763 | phi (init, expr) vs. (init, expr)_x
765 The proof of the translation algorithm for the first case is a
766 proof by structural induction based on the degree of EXPR.
768 Degree 0:
769 When EXPR is a constant with respect to the analyzed loop, or in
770 other words when EXPR is a polynomial of degree 0, the evolution of
771 the variable A in the loop is an affine function with an initial
772 condition INIT, and a step EXPR. In order to show this, we start
773 from the semantics of the SSA representation:
775 f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
777 and since "expr (j)" is a constant with respect to "j",
779 f (x) = init + x * expr
781 Finally, based on the semantics of the pure sum chrecs, by
782 identification we get the corresponding chrecs syntax:
784 f (x) = init * \binom{x}{0} + expr * \binom{x}{1}
785 f (x) -> {init, +, expr}_x
787 Higher degree:
788 Suppose that EXPR is a polynomial of degree N with respect to the
789 analyzed loop_x for which we have already determined that it is
790 written under the chrecs syntax:
792 | expr (x) -> {b_0, +, b_1, +, ..., +, b_{n-1}} (x)
794 We start from the semantics of the SSA program:
796 | f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
798 | f (x) = init + \sum_{j = 0}^{x - 1}
799 | (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1})
801 | f (x) = init + \sum_{j = 0}^{x - 1}
802 | \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k})
804 | f (x) = init + \sum_{k = 0}^{n - 1}
805 | (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k})
807 | f (x) = init + \sum_{k = 0}^{n - 1}
808 | (b_k * \binom{x}{k + 1})
810 | f (x) = init + b_0 * \binom{x}{1} + ...
811 | + b_{n-1} * \binom{x}{n}
813 | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ...
814 | + b_{n-1} * \binom{x}{n}
817 And finally from the definition of the chrecs syntax, we identify:
818 | f (x) -> {init, +, b_0, +, ..., +, b_{n-1}}_x
820 This shows the mechanism that stands behind the add_to_evolution
821 function. An important point is that the use of symbolic
822 parameters avoids the need of an analysis schedule.
824 Example:
826 | inita = ...
827 | initb = ...
828 | loop_1
829 | a = phi (inita, a + 2 + b)
830 | b = phi (initb, b + 1)
831 | endloop
833 When analyzing "a", the algorithm keeps "b" symbolically:
835 | a -> {inita, +, 2 + b}_1
837 Then, after instantiation, the analyzer ends on the evolution:
839 | a -> {inita, +, 2 + initb, +, 1}_1
843 tree
844 scev_dfs::add_to_evolution (tree chrec_before, enum tree_code code,
845 tree to_add, gimple *at_stmt)
847 tree type = chrec_type (to_add);
848 tree res = NULL_TREE;
850 if (to_add == NULL_TREE)
851 return chrec_before;
853 /* TO_ADD is either a scalar, or a parameter. TO_ADD is not
854 instantiated at this point. */
855 if (TREE_CODE (to_add) == POLYNOMIAL_CHREC)
856 /* This should not happen. */
857 return chrec_dont_know;
859 if (dump_file && (dump_flags & TDF_SCEV))
861 fprintf (dump_file, "(add_to_evolution \n");
862 fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
863 fprintf (dump_file, " (chrec_before = ");
864 print_generic_expr (dump_file, chrec_before);
865 fprintf (dump_file, ")\n (to_add = ");
866 print_generic_expr (dump_file, to_add);
867 fprintf (dump_file, ")\n");
870 if (code == MINUS_EXPR)
871 to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type)
872 ? build_real (type, dconstm1)
873 : build_int_cst_type (type, -1));
875 res = add_to_evolution_1 (chrec_before, to_add, at_stmt);
877 if (dump_file && (dump_flags & TDF_SCEV))
879 fprintf (dump_file, " (res = ");
880 print_generic_expr (dump_file, res);
881 fprintf (dump_file, "))\n");
884 return res;
888 /* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
889 Return true if the strongly connected component has been found. */
891 t_bool
892 scev_dfs::follow_ssa_edge_binary (gimple *at_stmt, tree type, tree rhs0,
893 enum tree_code code, tree rhs1,
894 tree *evolution_of_loop, int limit)
896 t_bool res = t_false;
897 tree evol;
899 switch (code)
901 case POINTER_PLUS_EXPR:
902 case PLUS_EXPR:
903 if (TREE_CODE (rhs0) == SSA_NAME)
905 if (TREE_CODE (rhs1) == SSA_NAME)
907 /* Match an assignment under the form:
908 "a = b + c". */
910 /* We want only assignments of form "name + name" contribute to
911 LIMIT, as the other cases do not necessarily contribute to
912 the complexity of the expression. */
913 limit++;
915 evol = *evolution_of_loop;
916 res = follow_ssa_edge_expr (at_stmt, rhs0, &evol, limit);
917 if (res == t_true)
918 *evolution_of_loop = add_to_evolution
919 (chrec_convert (type, evol, at_stmt), code, rhs1, at_stmt);
920 else if (res == t_false)
922 res = follow_ssa_edge_expr
923 (at_stmt, rhs1, evolution_of_loop, limit);
924 if (res == t_true)
925 *evolution_of_loop = add_to_evolution
926 (chrec_convert (type, *evolution_of_loop, at_stmt),
927 code, rhs0, at_stmt);
931 else
932 gcc_unreachable (); /* Handled in caller. */
935 else if (TREE_CODE (rhs1) == SSA_NAME)
937 /* Match an assignment under the form:
938 "a = ... + c". */
939 res = follow_ssa_edge_expr (at_stmt, rhs1, evolution_of_loop, limit);
940 if (res == t_true)
941 *evolution_of_loop = add_to_evolution
942 (chrec_convert (type, *evolution_of_loop, at_stmt),
943 code, rhs0, at_stmt);
946 else
947 /* Otherwise, match an assignment under the form:
948 "a = ... + ...". */
949 /* And there is nothing to do. */
950 res = t_false;
951 break;
953 case MINUS_EXPR:
954 /* This case is under the form "opnd0 = rhs0 - rhs1". */
955 if (TREE_CODE (rhs0) == SSA_NAME)
956 gcc_unreachable (); /* Handled in caller. */
957 else
958 /* Otherwise, match an assignment under the form:
959 "a = ... - ...". */
960 /* And there is nothing to do. */
961 res = t_false;
962 break;
964 default:
965 res = t_false;
968 return res;
971 /* Checks whether the I-th argument of a PHI comes from a backedge. */
973 static bool
974 backedge_phi_arg_p (gphi *phi, int i)
976 const_edge e = gimple_phi_arg_edge (phi, i);
978 /* We would in fact like to test EDGE_DFS_BACK here, but we do not care
979 about updating it anywhere, and this should work as well most of the
980 time. */
981 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
982 return true;
984 return false;
987 /* Helper function for one branch of the condition-phi-node. Return
988 true if the strongly connected component has been found following
989 this path. */
991 t_bool
992 scev_dfs::follow_ssa_edge_in_condition_phi_branch (int i,
993 gphi *condition_phi,
994 tree *evolution_of_branch,
995 tree init_cond, int limit)
997 tree branch = PHI_ARG_DEF (condition_phi, i);
998 *evolution_of_branch = chrec_dont_know;
1000 /* Do not follow back edges (they must belong to an irreducible loop, which
1001 we really do not want to worry about). */
1002 if (backedge_phi_arg_p (condition_phi, i))
1003 return t_false;
1005 if (TREE_CODE (branch) == SSA_NAME)
1007 *evolution_of_branch = init_cond;
1008 return follow_ssa_edge_expr (condition_phi, branch,
1009 evolution_of_branch, limit);
1012 /* This case occurs when one of the condition branches sets
1013 the variable to a constant: i.e. a phi-node like
1014 "a_2 = PHI <a_7(5), 2(6)>;".
1016 FIXME: This case have to be refined correctly:
1017 in some cases it is possible to say something better than
1018 chrec_dont_know, for example using a wrap-around notation. */
1019 return t_false;
1022 /* This function merges the branches of a condition-phi-node in a
1023 loop. */
1025 t_bool
1026 scev_dfs::follow_ssa_edge_in_condition_phi (gphi *condition_phi,
1027 tree *evolution_of_loop, int limit)
1029 int i, n;
1030 tree init = *evolution_of_loop;
1031 tree evolution_of_branch;
1032 t_bool res = follow_ssa_edge_in_condition_phi_branch (0, condition_phi,
1033 &evolution_of_branch,
1034 init, limit);
1035 if (res == t_false || res == t_dont_know)
1036 return res;
1038 *evolution_of_loop = evolution_of_branch;
1040 n = gimple_phi_num_args (condition_phi);
1041 for (i = 1; i < n; i++)
1043 /* Quickly give up when the evolution of one of the branches is
1044 not known. */
1045 if (*evolution_of_loop == chrec_dont_know)
1046 return t_true;
1048 /* Increase the limit by the PHI argument number to avoid exponential
1049 time and memory complexity. */
1050 res = follow_ssa_edge_in_condition_phi_branch (i, condition_phi,
1051 &evolution_of_branch,
1052 init, limit + i);
1053 if (res == t_false || res == t_dont_know)
1054 return res;
1056 *evolution_of_loop = chrec_merge (*evolution_of_loop,
1057 evolution_of_branch);
1060 return t_true;
1063 /* Follow an SSA edge in an inner loop. It computes the overall
1064 effect of the loop, and following the symbolic initial conditions,
1065 it follows the edges in the parent loop. The inner loop is
1066 considered as a single statement. */
1068 t_bool
1069 scev_dfs::follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
1070 tree *evolution_of_loop, int limit)
1072 class loop *loop = loop_containing_stmt (loop_phi_node);
1073 tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node));
1075 /* Sometimes, the inner loop is too difficult to analyze, and the
1076 result of the analysis is a symbolic parameter. */
1077 if (ev == PHI_RESULT (loop_phi_node))
1079 t_bool res = t_false;
1080 int i, n = gimple_phi_num_args (loop_phi_node);
1082 for (i = 0; i < n; i++)
1084 tree arg = PHI_ARG_DEF (loop_phi_node, i);
1085 basic_block bb;
1087 /* Follow the edges that exit the inner loop. */
1088 bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1089 if (!flow_bb_inside_loop_p (loop, bb))
1090 res = follow_ssa_edge_expr (loop_phi_node,
1091 arg, evolution_of_loop, limit);
1092 if (res == t_true)
1093 break;
1096 /* If the path crosses this loop-phi, give up. */
1097 if (res == t_true)
1098 *evolution_of_loop = chrec_dont_know;
1100 return res;
1103 /* Otherwise, compute the overall effect of the inner loop. */
1104 ev = compute_overall_effect_of_inner_loop (loop, ev);
1105 return follow_ssa_edge_expr (loop_phi_node, ev, evolution_of_loop, limit);
1108 /* Follow the ssa edge into the expression EXPR.
1109 Return true if the strongly connected component has been found. */
1111 t_bool
1112 scev_dfs::follow_ssa_edge_expr (gimple *at_stmt, tree expr,
1113 tree *evolution_of_loop, int limit)
1115 gphi *halting_phi = loop_phi_node;
1116 enum tree_code code;
1117 tree type, rhs0, rhs1 = NULL_TREE;
1119 /* The EXPR is one of the following cases:
1120 - an SSA_NAME,
1121 - an INTEGER_CST,
1122 - a PLUS_EXPR,
1123 - a POINTER_PLUS_EXPR,
1124 - a MINUS_EXPR,
1125 - other cases are not yet handled. */
1127 /* For SSA_NAME look at the definition statement, handling
1128 PHI nodes and otherwise expand appropriately for the expression
1129 handling below. */
1130 if (TREE_CODE (expr) == SSA_NAME)
1132 gimple *def = SSA_NAME_DEF_STMT (expr);
1134 if (gimple_nop_p (def))
1135 return t_false;
1137 /* Give up if the path is longer than the MAX that we allow. */
1138 if (limit > param_scev_max_expr_complexity)
1140 *evolution_of_loop = chrec_dont_know;
1141 return t_dont_know;
1144 if (gphi *phi = dyn_cast <gphi *>(def))
1146 if (!loop_phi_node_p (phi))
1147 /* DEF is a condition-phi-node. Follow the branches, and
1148 record their evolutions. Finally, merge the collected
1149 information and set the approximation to the main
1150 variable. */
1151 return follow_ssa_edge_in_condition_phi (phi, evolution_of_loop,
1152 limit);
1154 /* When the analyzed phi is the halting_phi, the
1155 depth-first search is over: we have found a path from
1156 the halting_phi to itself in the loop. */
1157 if (phi == halting_phi)
1159 *evolution_of_loop = expr;
1160 return t_true;
1163 /* Otherwise, the evolution of the HALTING_PHI depends
1164 on the evolution of another loop-phi-node, i.e. the
1165 evolution function is a higher degree polynomial. */
1166 class loop *def_loop = loop_containing_stmt (def);
1167 if (def_loop == loop)
1168 return t_false;
1170 /* Inner loop. */
1171 if (flow_loop_nested_p (loop, def_loop))
1172 return follow_ssa_edge_inner_loop_phi (phi, evolution_of_loop,
1173 limit + 1);
1175 /* Outer loop. */
1176 return t_false;
1179 /* At this level of abstraction, the program is just a set
1180 of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no
1181 other def to be handled. */
1182 if (!is_gimple_assign (def))
1183 return t_false;
1185 code = gimple_assign_rhs_code (def);
1186 switch (get_gimple_rhs_class (code))
1188 case GIMPLE_BINARY_RHS:
1189 rhs0 = gimple_assign_rhs1 (def);
1190 rhs1 = gimple_assign_rhs2 (def);
1191 break;
1192 case GIMPLE_UNARY_RHS:
1193 case GIMPLE_SINGLE_RHS:
1194 rhs0 = gimple_assign_rhs1 (def);
1195 break;
1196 default:
1197 return t_false;
1199 type = TREE_TYPE (gimple_assign_lhs (def));
1200 at_stmt = def;
1202 else
1204 code = TREE_CODE (expr);
1205 type = TREE_TYPE (expr);
1206 /* Via follow_ssa_edge_inner_loop_phi we arrive here with the
1207 GENERIC scalar evolution of the inner loop. */
1208 switch (code)
1210 CASE_CONVERT:
1211 rhs0 = TREE_OPERAND (expr, 0);
1212 break;
1213 case POINTER_PLUS_EXPR:
1214 case PLUS_EXPR:
1215 case MINUS_EXPR:
1216 rhs0 = TREE_OPERAND (expr, 0);
1217 rhs1 = TREE_OPERAND (expr, 1);
1218 STRIP_USELESS_TYPE_CONVERSION (rhs0);
1219 STRIP_USELESS_TYPE_CONVERSION (rhs1);
1220 break;
1221 default:
1222 rhs0 = expr;
1226 switch (code)
1228 CASE_CONVERT:
1230 /* This assignment is under the form "a_1 = (cast) rhs. We cannot
1231 validate any precision altering conversion during the SCC
1232 analysis, so don't even try. */
1233 if (!tree_nop_conversion_p (type, TREE_TYPE (rhs0)))
1234 return t_false;
1235 t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1236 evolution_of_loop, limit);
1237 if (res == t_true)
1238 *evolution_of_loop = chrec_convert (type, *evolution_of_loop,
1239 at_stmt);
1240 return res;
1243 case INTEGER_CST:
1244 /* This assignment is under the form "a_1 = 7". */
1245 return t_false;
1247 case ADDR_EXPR:
1249 /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
1250 if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
1251 return t_false;
1252 tree mem = TREE_OPERAND (rhs0, 0);
1253 rhs0 = TREE_OPERAND (mem, 0);
1254 rhs1 = TREE_OPERAND (mem, 1);
1255 code = POINTER_PLUS_EXPR;
1257 /* Fallthru. */
1258 case POINTER_PLUS_EXPR:
1259 case PLUS_EXPR:
1260 case MINUS_EXPR:
1261 /* This case is under the form "rhs0 +- rhs1". */
1262 if (TREE_CODE (rhs0) == SSA_NAME
1263 && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR))
1265 /* Match an assignment under the form:
1266 "a = b +- ...". */
1267 t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1268 evolution_of_loop, limit);
1269 if (res == t_true)
1270 *evolution_of_loop = add_to_evolution
1271 (chrec_convert (type, *evolution_of_loop, at_stmt),
1272 code, rhs1, at_stmt);
1273 return res;
1275 /* Else search for the SCC in both rhs0 and rhs1. */
1276 return follow_ssa_edge_binary (at_stmt, type, rhs0, code, rhs1,
1277 evolution_of_loop, limit);
1279 default:
1280 return t_false;
1285 /* This section selects the loops that will be good candidates for the
1286 scalar evolution analysis. For the moment, greedily select all the
1287 loop nests we could analyze. */
1289 /* For a loop with a single exit edge, return the COND_EXPR that
1290 guards the exit edge. If the expression is too difficult to
1291 analyze, then give up. */
1293 gcond *
1294 get_loop_exit_condition (const class loop *loop)
1296 gcond *res = NULL;
1297 edge exit_edge = single_exit (loop);
1299 if (dump_file && (dump_flags & TDF_SCEV))
1300 fprintf (dump_file, "(get_loop_exit_condition \n ");
1302 if (exit_edge)
1304 gimple *stmt;
1306 stmt = last_stmt (exit_edge->src);
1307 if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt))
1308 res = cond_stmt;
1311 if (dump_file && (dump_flags & TDF_SCEV))
1313 print_gimple_stmt (dump_file, res, 0);
1314 fprintf (dump_file, ")\n");
1317 return res;
1321 /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP.
1322 Handle below case and return the corresponding POLYNOMIAL_CHREC:
1324 # i_17 = PHI <i_13(5), 0(3)>
1325 # _20 = PHI <_5(5), start_4(D)(3)>
1327 i_13 = i_17 + 1;
1328 _5 = start_4(D) + i_13;
1330 Though variable _20 appears as a PEELED_CHREC in the form of
1331 (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP.
1333 See PR41488. */
1335 static tree
1336 simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond)
1338 aff_tree aff1, aff2;
1339 tree ev, left, right, type, step_val;
1340 hash_map<tree, name_expansion *> *peeled_chrec_map = NULL;
1342 ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg));
1343 if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC)
1344 return chrec_dont_know;
1346 left = CHREC_LEFT (ev);
1347 right = CHREC_RIGHT (ev);
1348 type = TREE_TYPE (left);
1349 step_val = chrec_fold_plus (type, init_cond, right);
1351 /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1352 if "left" equals to "init + right". */
1353 if (operand_equal_p (left, step_val, 0))
1355 if (dump_file && (dump_flags & TDF_SCEV))
1356 fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1358 return build_polynomial_chrec (loop->num, init_cond, right);
1361 /* The affine code only deals with pointer and integer types. */
1362 if (!POINTER_TYPE_P (type)
1363 && !INTEGRAL_TYPE_P (type))
1364 return chrec_dont_know;
1366 /* Try harder to check if they are equal. */
1367 tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map);
1368 tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map);
1369 free_affine_expand_cache (&peeled_chrec_map);
1370 aff_combination_scale (&aff2, -1);
1371 aff_combination_add (&aff1, &aff2);
1373 /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1374 if "left" equals to "init + right". */
1375 if (aff_combination_zero_p (&aff1))
1377 if (dump_file && (dump_flags & TDF_SCEV))
1378 fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1380 return build_polynomial_chrec (loop->num, init_cond, right);
1382 return chrec_dont_know;
1385 /* Given a LOOP_PHI_NODE, this function determines the evolution
1386 function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop. */
1388 static tree
1389 analyze_evolution_in_loop (gphi *loop_phi_node,
1390 tree init_cond)
1392 int i, n = gimple_phi_num_args (loop_phi_node);
1393 tree evolution_function = chrec_not_analyzed_yet;
1394 class loop *loop = loop_containing_stmt (loop_phi_node);
1395 basic_block bb;
1396 static bool simplify_peeled_chrec_p = true;
1398 if (dump_file && (dump_flags & TDF_SCEV))
1400 fprintf (dump_file, "(analyze_evolution_in_loop \n");
1401 fprintf (dump_file, " (loop_phi_node = ");
1402 print_gimple_stmt (dump_file, loop_phi_node, 0);
1403 fprintf (dump_file, ")\n");
1406 for (i = 0; i < n; i++)
1408 tree arg = PHI_ARG_DEF (loop_phi_node, i);
1409 tree ev_fn = chrec_dont_know;
1410 t_bool res;
1412 /* Select the edges that enter the loop body. */
1413 bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1414 if (!flow_bb_inside_loop_p (loop, bb))
1415 continue;
1417 if (TREE_CODE (arg) == SSA_NAME)
1419 bool val = false;
1421 /* Pass in the initial condition to the follow edge function. */
1422 scev_dfs dfs (loop, loop_phi_node, init_cond);
1423 res = dfs.get_ev (&ev_fn, arg);
1425 /* If ev_fn has no evolution in the inner loop, and the
1426 init_cond is not equal to ev_fn, then we have an
1427 ambiguity between two possible values, as we cannot know
1428 the number of iterations at this point. */
1429 if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC
1430 && no_evolution_in_loop_p (ev_fn, loop->num, &val) && val
1431 && !operand_equal_p (init_cond, ev_fn, 0))
1432 ev_fn = chrec_dont_know;
1434 else
1435 res = t_false;
1437 /* When it is impossible to go back on the same
1438 loop_phi_node by following the ssa edges, the
1439 evolution is represented by a peeled chrec, i.e. the
1440 first iteration, EV_FN has the value INIT_COND, then
1441 all the other iterations it has the value of ARG.
1442 For the moment, PEELED_CHREC nodes are not built. */
1443 if (res != t_true)
1445 ev_fn = chrec_dont_know;
1446 /* Try to recognize POLYNOMIAL_CHREC which appears in
1447 the form of PEELED_CHREC, but guard the process with
1448 a bool variable to keep the analyzer from infinite
1449 recurrence for real PEELED_RECs. */
1450 if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME)
1452 simplify_peeled_chrec_p = false;
1453 ev_fn = simplify_peeled_chrec (loop, arg, init_cond);
1454 simplify_peeled_chrec_p = true;
1458 /* When there are multiple back edges of the loop (which in fact never
1459 happens currently, but nevertheless), merge their evolutions. */
1460 evolution_function = chrec_merge (evolution_function, ev_fn);
1462 if (evolution_function == chrec_dont_know)
1463 break;
1466 if (dump_file && (dump_flags & TDF_SCEV))
1468 fprintf (dump_file, " (evolution_function = ");
1469 print_generic_expr (dump_file, evolution_function);
1470 fprintf (dump_file, "))\n");
1473 return evolution_function;
1476 /* Looks to see if VAR is a copy of a constant (via straightforward assignments
1477 or degenerate phi's). If so, returns the constant; else, returns VAR. */
1479 static tree
1480 follow_copies_to_constant (tree var)
1482 tree res = var;
1483 while (TREE_CODE (res) == SSA_NAME
1484 /* We face not updated SSA form in multiple places and this walk
1485 may end up in sibling loops so we have to guard it. */
1486 && !name_registered_for_update_p (res))
1488 gimple *def = SSA_NAME_DEF_STMT (res);
1489 if (gphi *phi = dyn_cast <gphi *> (def))
1491 if (tree rhs = degenerate_phi_result (phi))
1492 res = rhs;
1493 else
1494 break;
1496 else if (gimple_assign_single_p (def))
1497 /* Will exit loop if not an SSA_NAME. */
1498 res = gimple_assign_rhs1 (def);
1499 else
1500 break;
1502 if (CONSTANT_CLASS_P (res))
1503 return res;
1504 return var;
1507 /* Given a loop-phi-node, return the initial conditions of the
1508 variable on entry of the loop. When the CCP has propagated
1509 constants into the loop-phi-node, the initial condition is
1510 instantiated, otherwise the initial condition is kept symbolic.
1511 This analyzer does not analyze the evolution outside the current
1512 loop, and leaves this task to the on-demand tree reconstructor. */
1514 static tree
1515 analyze_initial_condition (gphi *loop_phi_node)
1517 int i, n;
1518 tree init_cond = chrec_not_analyzed_yet;
1519 class loop *loop = loop_containing_stmt (loop_phi_node);
1521 if (dump_file && (dump_flags & TDF_SCEV))
1523 fprintf (dump_file, "(analyze_initial_condition \n");
1524 fprintf (dump_file, " (loop_phi_node = \n");
1525 print_gimple_stmt (dump_file, loop_phi_node, 0);
1526 fprintf (dump_file, ")\n");
1529 n = gimple_phi_num_args (loop_phi_node);
1530 for (i = 0; i < n; i++)
1532 tree branch = PHI_ARG_DEF (loop_phi_node, i);
1533 basic_block bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1535 /* When the branch is oriented to the loop's body, it does
1536 not contribute to the initial condition. */
1537 if (flow_bb_inside_loop_p (loop, bb))
1538 continue;
1540 if (init_cond == chrec_not_analyzed_yet)
1542 init_cond = branch;
1543 continue;
1546 if (TREE_CODE (branch) == SSA_NAME)
1548 init_cond = chrec_dont_know;
1549 break;
1552 init_cond = chrec_merge (init_cond, branch);
1555 /* Ooops -- a loop without an entry??? */
1556 if (init_cond == chrec_not_analyzed_yet)
1557 init_cond = chrec_dont_know;
1559 /* We may not have fully constant propagated IL. Handle degenerate PHIs here
1560 to not miss important early loop unrollings. */
1561 init_cond = follow_copies_to_constant (init_cond);
1563 if (dump_file && (dump_flags & TDF_SCEV))
1565 fprintf (dump_file, " (init_cond = ");
1566 print_generic_expr (dump_file, init_cond);
1567 fprintf (dump_file, "))\n");
1570 return init_cond;
1573 /* Analyze the scalar evolution for LOOP_PHI_NODE. */
1575 static tree
1576 interpret_loop_phi (class loop *loop, gphi *loop_phi_node)
1578 class loop *phi_loop = loop_containing_stmt (loop_phi_node);
1579 tree init_cond;
1581 gcc_assert (phi_loop == loop);
1583 /* Otherwise really interpret the loop phi. */
1584 init_cond = analyze_initial_condition (loop_phi_node);
1585 return analyze_evolution_in_loop (loop_phi_node, init_cond);
1588 /* This function merges the branches of a condition-phi-node,
1589 contained in the outermost loop, and whose arguments are already
1590 analyzed. */
1592 static tree
1593 interpret_condition_phi (class loop *loop, gphi *condition_phi)
1595 int i, n = gimple_phi_num_args (condition_phi);
1596 tree res = chrec_not_analyzed_yet;
1598 for (i = 0; i < n; i++)
1600 tree branch_chrec;
1602 if (backedge_phi_arg_p (condition_phi, i))
1604 res = chrec_dont_know;
1605 break;
1608 branch_chrec = analyze_scalar_evolution
1609 (loop, PHI_ARG_DEF (condition_phi, i));
1611 res = chrec_merge (res, branch_chrec);
1612 if (res == chrec_dont_know)
1613 break;
1616 return res;
1619 /* Interpret the operation RHS1 OP RHS2. If we didn't
1620 analyze this node before, follow the definitions until ending
1621 either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node. On the
1622 return path, this function propagates evolutions (ala constant copy
1623 propagation). OPND1 is not a GIMPLE expression because we could
1624 analyze the effect of an inner loop: see interpret_loop_phi. */
1626 static tree
1627 interpret_rhs_expr (class loop *loop, gimple *at_stmt,
1628 tree type, tree rhs1, enum tree_code code, tree rhs2)
1630 tree res, chrec1, chrec2, ctype;
1631 gimple *def;
1633 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1635 if (is_gimple_min_invariant (rhs1))
1636 return chrec_convert (type, rhs1, at_stmt);
1638 if (code == SSA_NAME)
1639 return chrec_convert (type, analyze_scalar_evolution (loop, rhs1),
1640 at_stmt);
1643 switch (code)
1645 case ADDR_EXPR:
1646 if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
1647 || handled_component_p (TREE_OPERAND (rhs1, 0)))
1649 machine_mode mode;
1650 poly_int64 bitsize, bitpos;
1651 int unsignedp, reversep;
1652 int volatilep = 0;
1653 tree base, offset;
1654 tree chrec3;
1655 tree unitpos;
1657 base = get_inner_reference (TREE_OPERAND (rhs1, 0),
1658 &bitsize, &bitpos, &offset, &mode,
1659 &unsignedp, &reversep, &volatilep);
1661 if (TREE_CODE (base) == MEM_REF)
1663 rhs2 = TREE_OPERAND (base, 1);
1664 rhs1 = TREE_OPERAND (base, 0);
1666 chrec1 = analyze_scalar_evolution (loop, rhs1);
1667 chrec2 = analyze_scalar_evolution (loop, rhs2);
1668 chrec1 = chrec_convert (type, chrec1, at_stmt);
1669 chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1670 chrec1 = instantiate_parameters (loop, chrec1);
1671 chrec2 = instantiate_parameters (loop, chrec2);
1672 res = chrec_fold_plus (type, chrec1, chrec2);
1674 else
1676 chrec1 = analyze_scalar_evolution_for_address_of (loop, base);
1677 chrec1 = chrec_convert (type, chrec1, at_stmt);
1678 res = chrec1;
1681 if (offset != NULL_TREE)
1683 chrec2 = analyze_scalar_evolution (loop, offset);
1684 chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
1685 chrec2 = instantiate_parameters (loop, chrec2);
1686 res = chrec_fold_plus (type, res, chrec2);
1689 if (maybe_ne (bitpos, 0))
1691 unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT));
1692 chrec3 = analyze_scalar_evolution (loop, unitpos);
1693 chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt);
1694 chrec3 = instantiate_parameters (loop, chrec3);
1695 res = chrec_fold_plus (type, res, chrec3);
1698 else
1699 res = chrec_dont_know;
1700 break;
1702 case POINTER_PLUS_EXPR:
1703 chrec1 = analyze_scalar_evolution (loop, rhs1);
1704 chrec2 = analyze_scalar_evolution (loop, rhs2);
1705 chrec1 = chrec_convert (type, chrec1, at_stmt);
1706 chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1707 chrec1 = instantiate_parameters (loop, chrec1);
1708 chrec2 = instantiate_parameters (loop, chrec2);
1709 res = chrec_fold_plus (type, chrec1, chrec2);
1710 break;
1712 case PLUS_EXPR:
1713 chrec1 = analyze_scalar_evolution (loop, rhs1);
1714 chrec2 = analyze_scalar_evolution (loop, rhs2);
1715 ctype = type;
1716 /* When the stmt is conditionally executed re-write the CHREC
1717 into a form that has well-defined behavior on overflow. */
1718 if (at_stmt
1719 && INTEGRAL_TYPE_P (type)
1720 && ! TYPE_OVERFLOW_WRAPS (type)
1721 && ! dominated_by_p (CDI_DOMINATORS, loop->latch,
1722 gimple_bb (at_stmt)))
1723 ctype = unsigned_type_for (type);
1724 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1725 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1726 chrec1 = instantiate_parameters (loop, chrec1);
1727 chrec2 = instantiate_parameters (loop, chrec2);
1728 res = chrec_fold_plus (ctype, chrec1, chrec2);
1729 if (type != ctype)
1730 res = chrec_convert (type, res, at_stmt);
1731 break;
1733 case MINUS_EXPR:
1734 chrec1 = analyze_scalar_evolution (loop, rhs1);
1735 chrec2 = analyze_scalar_evolution (loop, rhs2);
1736 ctype = type;
1737 /* When the stmt is conditionally executed re-write the CHREC
1738 into a form that has well-defined behavior on overflow. */
1739 if (at_stmt
1740 && INTEGRAL_TYPE_P (type)
1741 && ! TYPE_OVERFLOW_WRAPS (type)
1742 && ! dominated_by_p (CDI_DOMINATORS,
1743 loop->latch, gimple_bb (at_stmt)))
1744 ctype = unsigned_type_for (type);
1745 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1746 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1747 chrec1 = instantiate_parameters (loop, chrec1);
1748 chrec2 = instantiate_parameters (loop, chrec2);
1749 res = chrec_fold_minus (ctype, chrec1, chrec2);
1750 if (type != ctype)
1751 res = chrec_convert (type, res, at_stmt);
1752 break;
1754 case NEGATE_EXPR:
1755 chrec1 = analyze_scalar_evolution (loop, rhs1);
1756 ctype = type;
1757 /* When the stmt is conditionally executed re-write the CHREC
1758 into a form that has well-defined behavior on overflow. */
1759 if (at_stmt
1760 && INTEGRAL_TYPE_P (type)
1761 && ! TYPE_OVERFLOW_WRAPS (type)
1762 && ! dominated_by_p (CDI_DOMINATORS,
1763 loop->latch, gimple_bb (at_stmt)))
1764 ctype = unsigned_type_for (type);
1765 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1766 /* TYPE may be integer, real or complex, so use fold_convert. */
1767 chrec1 = instantiate_parameters (loop, chrec1);
1768 res = chrec_fold_multiply (ctype, chrec1,
1769 fold_convert (ctype, integer_minus_one_node));
1770 if (type != ctype)
1771 res = chrec_convert (type, res, at_stmt);
1772 break;
1774 case BIT_NOT_EXPR:
1775 /* Handle ~X as -1 - X. */
1776 chrec1 = analyze_scalar_evolution (loop, rhs1);
1777 chrec1 = chrec_convert (type, chrec1, at_stmt);
1778 chrec1 = instantiate_parameters (loop, chrec1);
1779 res = chrec_fold_minus (type,
1780 fold_convert (type, integer_minus_one_node),
1781 chrec1);
1782 break;
1784 case MULT_EXPR:
1785 chrec1 = analyze_scalar_evolution (loop, rhs1);
1786 chrec2 = analyze_scalar_evolution (loop, rhs2);
1787 ctype = type;
1788 /* When the stmt is conditionally executed re-write the CHREC
1789 into a form that has well-defined behavior on overflow. */
1790 if (at_stmt
1791 && INTEGRAL_TYPE_P (type)
1792 && ! TYPE_OVERFLOW_WRAPS (type)
1793 && ! dominated_by_p (CDI_DOMINATORS,
1794 loop->latch, gimple_bb (at_stmt)))
1795 ctype = unsigned_type_for (type);
1796 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1797 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1798 chrec1 = instantiate_parameters (loop, chrec1);
1799 chrec2 = instantiate_parameters (loop, chrec2);
1800 res = chrec_fold_multiply (ctype, chrec1, chrec2);
1801 if (type != ctype)
1802 res = chrec_convert (type, res, at_stmt);
1803 break;
1805 case LSHIFT_EXPR:
1807 /* Handle A<<B as A * (1<<B). */
1808 tree uns = unsigned_type_for (type);
1809 chrec1 = analyze_scalar_evolution (loop, rhs1);
1810 chrec2 = analyze_scalar_evolution (loop, rhs2);
1811 chrec1 = chrec_convert (uns, chrec1, at_stmt);
1812 chrec1 = instantiate_parameters (loop, chrec1);
1813 chrec2 = instantiate_parameters (loop, chrec2);
1815 tree one = build_int_cst (uns, 1);
1816 chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2);
1817 res = chrec_fold_multiply (uns, chrec1, chrec2);
1818 res = chrec_convert (type, res, at_stmt);
1820 break;
1822 CASE_CONVERT:
1823 /* In case we have a truncation of a widened operation that in
1824 the truncated type has undefined overflow behavior analyze
1825 the operation done in an unsigned type of the same precision
1826 as the final truncation. We cannot derive a scalar evolution
1827 for the widened operation but for the truncated result. */
1828 if (TREE_CODE (type) == INTEGER_TYPE
1829 && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE
1830 && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1))
1831 && TYPE_OVERFLOW_UNDEFINED (type)
1832 && TREE_CODE (rhs1) == SSA_NAME
1833 && (def = SSA_NAME_DEF_STMT (rhs1))
1834 && is_gimple_assign (def)
1835 && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary
1836 && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST)
1838 tree utype = unsigned_type_for (type);
1839 chrec1 = interpret_rhs_expr (loop, at_stmt, utype,
1840 gimple_assign_rhs1 (def),
1841 gimple_assign_rhs_code (def),
1842 gimple_assign_rhs2 (def));
1844 else
1845 chrec1 = analyze_scalar_evolution (loop, rhs1);
1846 res = chrec_convert (type, chrec1, at_stmt, true, rhs1);
1847 break;
1849 case BIT_AND_EXPR:
1850 /* Given int variable A, handle A&0xffff as (int)(unsigned short)A.
1851 If A is SCEV and its value is in the range of representable set
1852 of type unsigned short, the result expression is a (no-overflow)
1853 SCEV. */
1854 res = chrec_dont_know;
1855 if (tree_fits_uhwi_p (rhs2))
1857 int precision;
1858 unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2);
1860 val ++;
1861 /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or
1862 it's not the maximum value of a smaller type than rhs1. */
1863 if (val != 0
1864 && (precision = exact_log2 (val)) > 0
1865 && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1)))
1867 tree utype = build_nonstandard_integer_type (precision, 1);
1869 if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1)))
1871 chrec1 = analyze_scalar_evolution (loop, rhs1);
1872 chrec1 = chrec_convert (utype, chrec1, at_stmt);
1873 res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt);
1877 break;
1879 default:
1880 res = chrec_dont_know;
1881 break;
1884 return res;
1887 /* Interpret the expression EXPR. */
1889 static tree
1890 interpret_expr (class loop *loop, gimple *at_stmt, tree expr)
1892 enum tree_code code;
1893 tree type = TREE_TYPE (expr), op0, op1;
1895 if (automatically_generated_chrec_p (expr))
1896 return expr;
1898 if (TREE_CODE (expr) == POLYNOMIAL_CHREC
1899 || TREE_CODE (expr) == CALL_EXPR
1900 || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS)
1901 return chrec_dont_know;
1903 extract_ops_from_tree (expr, &code, &op0, &op1);
1905 return interpret_rhs_expr (loop, at_stmt, type,
1906 op0, code, op1);
1909 /* Interpret the rhs of the assignment STMT. */
1911 static tree
1912 interpret_gimple_assign (class loop *loop, gimple *stmt)
1914 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1915 enum tree_code code = gimple_assign_rhs_code (stmt);
1917 return interpret_rhs_expr (loop, stmt, type,
1918 gimple_assign_rhs1 (stmt), code,
1919 gimple_assign_rhs2 (stmt));
1924 /* This section contains all the entry points:
1925 - number_of_iterations_in_loop,
1926 - analyze_scalar_evolution,
1927 - instantiate_parameters.
1930 /* Helper recursive function. */
1932 static tree
1933 analyze_scalar_evolution_1 (class loop *loop, tree var)
1935 gimple *def;
1936 basic_block bb;
1937 class loop *def_loop;
1938 tree res;
1940 if (TREE_CODE (var) != SSA_NAME)
1941 return interpret_expr (loop, NULL, var);
1943 def = SSA_NAME_DEF_STMT (var);
1944 bb = gimple_bb (def);
1945 def_loop = bb->loop_father;
1947 if (!flow_bb_inside_loop_p (loop, bb))
1949 /* Keep symbolic form, but look through obvious copies for constants. */
1950 res = follow_copies_to_constant (var);
1951 goto set_and_end;
1954 if (loop != def_loop)
1956 res = analyze_scalar_evolution_1 (def_loop, var);
1957 class loop *loop_to_skip = superloop_at_depth (def_loop,
1958 loop_depth (loop) + 1);
1959 res = compute_overall_effect_of_inner_loop (loop_to_skip, res);
1960 if (chrec_contains_symbols_defined_in_loop (res, loop->num))
1961 res = analyze_scalar_evolution_1 (loop, res);
1962 goto set_and_end;
1965 switch (gimple_code (def))
1967 case GIMPLE_ASSIGN:
1968 res = interpret_gimple_assign (loop, def);
1969 break;
1971 case GIMPLE_PHI:
1972 if (loop_phi_node_p (def))
1973 res = interpret_loop_phi (loop, as_a <gphi *> (def));
1974 else
1975 res = interpret_condition_phi (loop, as_a <gphi *> (def));
1976 break;
1978 default:
1979 res = chrec_dont_know;
1980 break;
1983 set_and_end:
1985 /* Keep the symbolic form. */
1986 if (res == chrec_dont_know)
1987 res = var;
1989 if (loop == def_loop)
1990 set_scalar_evolution (block_before_loop (loop), var, res);
1992 return res;
1995 /* Analyzes and returns the scalar evolution of the ssa_name VAR in
1996 LOOP. LOOP is the loop in which the variable is used.
1998 Example of use: having a pointer VAR to a SSA_NAME node, STMT a
1999 pointer to the statement that uses this variable, in order to
2000 determine the evolution function of the variable, use the following
2001 calls:
2003 loop_p loop = loop_containing_stmt (stmt);
2004 tree chrec_with_symbols = analyze_scalar_evolution (loop, var);
2005 tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols);
2008 tree
2009 analyze_scalar_evolution (class loop *loop, tree var)
2011 tree res;
2013 /* ??? Fix callers. */
2014 if (! loop)
2015 return var;
2017 if (dump_file && (dump_flags & TDF_SCEV))
2019 fprintf (dump_file, "(analyze_scalar_evolution \n");
2020 fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
2021 fprintf (dump_file, " (scalar = ");
2022 print_generic_expr (dump_file, var);
2023 fprintf (dump_file, ")\n");
2026 res = get_scalar_evolution (block_before_loop (loop), var);
2027 if (res == chrec_not_analyzed_yet)
2029 /* We'll recurse into instantiate_scev, avoid tearing down the
2030 instantiate cache repeatedly and keep it live from here. */
2031 bool destr = false;
2032 if (!global_cache)
2034 global_cache = new instantiate_cache_type;
2035 destr = true;
2037 res = analyze_scalar_evolution_1 (loop, var);
2038 if (destr)
2040 delete global_cache;
2041 global_cache = NULL;
2045 if (dump_file && (dump_flags & TDF_SCEV))
2046 fprintf (dump_file, ")\n");
2048 return res;
2051 /* Analyzes and returns the scalar evolution of VAR address in LOOP. */
2053 static tree
2054 analyze_scalar_evolution_for_address_of (class loop *loop, tree var)
2056 return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
2059 /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to
2060 WRTO_LOOP (which should be a superloop of USE_LOOP)
2062 FOLDED_CASTS is set to true if resolve_mixers used
2063 chrec_convert_aggressive (TODO -- not really, we are way too conservative
2064 at the moment in order to keep things simple).
2066 To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following
2067 example:
2069 for (i = 0; i < 100; i++) -- loop 1
2071 for (j = 0; j < 100; j++) -- loop 2
2073 k1 = i;
2074 k2 = j;
2076 use2 (k1, k2);
2078 for (t = 0; t < 100; t++) -- loop 3
2079 use3 (k1, k2);
2082 use1 (k1, k2);
2085 Both k1 and k2 are invariants in loop3, thus
2086 analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1
2087 analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2
2089 As they are invariant, it does not matter whether we consider their
2090 usage in loop 3 or loop 2, hence
2091 analyze_scalar_evolution_in_loop (loop2, loop3, k1) =
2092 analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i
2093 analyze_scalar_evolution_in_loop (loop2, loop3, k2) =
2094 analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2
2096 Similarly for their evolutions with respect to loop 1. The values of K2
2097 in the use in loop 2 vary independently on loop 1, thus we cannot express
2098 the evolution with respect to loop 1:
2099 analyze_scalar_evolution_in_loop (loop1, loop3, k1) =
2100 analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1
2101 analyze_scalar_evolution_in_loop (loop1, loop3, k2) =
2102 analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know
2104 The value of k2 in the use in loop 1 is known, though:
2105 analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1
2106 analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100
2109 static tree
2110 analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop,
2111 tree version, bool *folded_casts)
2113 bool val = false;
2114 tree ev = version, tmp;
2116 /* We cannot just do
2118 tmp = analyze_scalar_evolution (use_loop, version);
2119 ev = resolve_mixers (wrto_loop, tmp, folded_casts);
2121 as resolve_mixers would query the scalar evolution with respect to
2122 wrto_loop. For example, in the situation described in the function
2123 comment, suppose that wrto_loop = loop1, use_loop = loop3 and
2124 version = k2. Then
2126 analyze_scalar_evolution (use_loop, version) = k2
2128 and resolve_mixers (loop1, k2, folded_casts) finds that the value of
2129 k2 in loop 1 is 100, which is a wrong result, since we are interested
2130 in the value in loop 3.
2132 Instead, we need to proceed from use_loop to wrto_loop loop by loop,
2133 each time checking that there is no evolution in the inner loop. */
2135 if (folded_casts)
2136 *folded_casts = false;
2137 while (1)
2139 tmp = analyze_scalar_evolution (use_loop, ev);
2140 ev = resolve_mixers (use_loop, tmp, folded_casts);
2142 if (use_loop == wrto_loop)
2143 return ev;
2145 /* If the value of the use changes in the inner loop, we cannot express
2146 its value in the outer loop (we might try to return interval chrec,
2147 but we do not have a user for it anyway) */
2148 if (!no_evolution_in_loop_p (ev, use_loop->num, &val)
2149 || !val)
2150 return chrec_dont_know;
2152 use_loop = loop_outer (use_loop);
2157 /* Computes a hash function for database element ELT. */
2159 static inline hashval_t
2160 hash_idx_scev_info (const void *elt_)
2162 unsigned idx = ((size_t) elt_) - 2;
2163 return scev_info_hasher::hash (&global_cache->entries[idx]);
2166 /* Compares database elements E1 and E2. */
2168 static inline int
2169 eq_idx_scev_info (const void *e1, const void *e2)
2171 unsigned idx1 = ((size_t) e1) - 2;
2172 return scev_info_hasher::equal (&global_cache->entries[idx1],
2173 (const scev_info_str *) e2);
2176 /* Returns from CACHE the slot number of the cached chrec for NAME. */
2178 static unsigned
2179 get_instantiated_value_entry (instantiate_cache_type &cache,
2180 tree name, edge instantiate_below)
2182 if (!cache.map)
2184 cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL);
2185 cache.entries.create (10);
2188 scev_info_str e;
2189 e.name_version = SSA_NAME_VERSION (name);
2190 e.instantiated_below = instantiate_below->dest->index;
2191 void **slot = htab_find_slot_with_hash (cache.map, &e,
2192 scev_info_hasher::hash (&e), INSERT);
2193 if (!*slot)
2195 e.chrec = chrec_not_analyzed_yet;
2196 *slot = (void *)(size_t)(cache.entries.length () + 2);
2197 cache.entries.safe_push (e);
2200 return ((size_t)*slot) - 2;
2204 /* Return the closed_loop_phi node for VAR. If there is none, return
2205 NULL_TREE. */
2207 static tree
2208 loop_closed_phi_def (tree var)
2210 class loop *loop;
2211 edge exit;
2212 gphi *phi;
2213 gphi_iterator psi;
2215 if (var == NULL_TREE
2216 || TREE_CODE (var) != SSA_NAME)
2217 return NULL_TREE;
2219 loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var));
2220 exit = single_exit (loop);
2221 if (!exit)
2222 return NULL_TREE;
2224 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2226 phi = psi.phi ();
2227 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var)
2228 return PHI_RESULT (phi);
2231 return NULL_TREE;
2234 static tree instantiate_scev_r (edge, class loop *, class loop *,
2235 tree, bool *, int);
2237 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2238 and EVOLUTION_LOOP, that were left under a symbolic form.
2240 CHREC is an SSA_NAME to be instantiated.
2242 CACHE is the cache of already instantiated values.
2244 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2245 conversions that may wrap in signed/pointer type are folded, as long
2246 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2247 then we don't do such fold.
2249 SIZE_EXPR is used for computing the size of the expression to be
2250 instantiated, and to stop if it exceeds some limit. */
2252 static tree
2253 instantiate_scev_name (edge instantiate_below,
2254 class loop *evolution_loop, class loop *inner_loop,
2255 tree chrec,
2256 bool *fold_conversions,
2257 int size_expr)
2259 tree res;
2260 class loop *def_loop;
2261 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec));
2263 /* A parameter, nothing to do. */
2264 if (!def_bb
2265 || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
2266 return chrec;
2268 /* We cache the value of instantiated variable to avoid exponential
2269 time complexity due to reevaluations. We also store the convenient
2270 value in the cache in order to prevent infinite recursion -- we do
2271 not want to instantiate the SSA_NAME if it is in a mixer
2272 structure. This is used for avoiding the instantiation of
2273 recursively defined functions, such as:
2275 | a_2 -> {0, +, 1, +, a_2}_1 */
2277 unsigned si = get_instantiated_value_entry (*global_cache,
2278 chrec, instantiate_below);
2279 if (global_cache->get (si) != chrec_not_analyzed_yet)
2280 return global_cache->get (si);
2282 /* On recursion return chrec_dont_know. */
2283 global_cache->set (si, chrec_dont_know);
2285 def_loop = find_common_loop (evolution_loop, def_bb->loop_father);
2287 if (! dominated_by_p (CDI_DOMINATORS,
2288 def_loop->header, instantiate_below->dest))
2290 gimple *def = SSA_NAME_DEF_STMT (chrec);
2291 if (gassign *ass = dyn_cast <gassign *> (def))
2293 switch (gimple_assign_rhs_class (ass))
2295 case GIMPLE_UNARY_RHS:
2297 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2298 inner_loop, gimple_assign_rhs1 (ass),
2299 fold_conversions, size_expr);
2300 if (op0 == chrec_dont_know)
2301 return chrec_dont_know;
2302 res = fold_build1 (gimple_assign_rhs_code (ass),
2303 TREE_TYPE (chrec), op0);
2304 break;
2306 case GIMPLE_BINARY_RHS:
2308 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2309 inner_loop, gimple_assign_rhs1 (ass),
2310 fold_conversions, size_expr);
2311 if (op0 == chrec_dont_know)
2312 return chrec_dont_know;
2313 tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2314 inner_loop, gimple_assign_rhs2 (ass),
2315 fold_conversions, size_expr);
2316 if (op1 == chrec_dont_know)
2317 return chrec_dont_know;
2318 res = fold_build2 (gimple_assign_rhs_code (ass),
2319 TREE_TYPE (chrec), op0, op1);
2320 break;
2322 default:
2323 res = chrec_dont_know;
2326 else
2327 res = chrec_dont_know;
2328 global_cache->set (si, res);
2329 return res;
2332 /* If the analysis yields a parametric chrec, instantiate the
2333 result again. */
2334 res = analyze_scalar_evolution (def_loop, chrec);
2336 /* Don't instantiate default definitions. */
2337 if (TREE_CODE (res) == SSA_NAME
2338 && SSA_NAME_IS_DEFAULT_DEF (res))
2341 /* Don't instantiate loop-closed-ssa phi nodes. */
2342 else if (TREE_CODE (res) == SSA_NAME
2343 && loop_depth (loop_containing_stmt (SSA_NAME_DEF_STMT (res)))
2344 > loop_depth (def_loop))
2346 if (res == chrec)
2347 res = loop_closed_phi_def (chrec);
2348 else
2349 res = chrec;
2351 /* When there is no loop_closed_phi_def, it means that the
2352 variable is not used after the loop: try to still compute the
2353 value of the variable when exiting the loop. */
2354 if (res == NULL_TREE)
2356 loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec));
2357 res = analyze_scalar_evolution (loop, chrec);
2358 res = compute_overall_effect_of_inner_loop (loop, res);
2359 res = instantiate_scev_r (instantiate_below, evolution_loop,
2360 inner_loop, res,
2361 fold_conversions, size_expr);
2363 else if (dominated_by_p (CDI_DOMINATORS,
2364 gimple_bb (SSA_NAME_DEF_STMT (res)),
2365 instantiate_below->dest))
2366 res = chrec_dont_know;
2369 else if (res != chrec_dont_know)
2371 if (inner_loop
2372 && def_bb->loop_father != inner_loop
2373 && !flow_loop_nested_p (def_bb->loop_father, inner_loop))
2374 /* ??? We could try to compute the overall effect of the loop here. */
2375 res = chrec_dont_know;
2376 else
2377 res = instantiate_scev_r (instantiate_below, evolution_loop,
2378 inner_loop, res,
2379 fold_conversions, size_expr);
2382 /* Store the correct value to the cache. */
2383 global_cache->set (si, res);
2384 return res;
2387 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2388 and EVOLUTION_LOOP, that were left under a symbolic form.
2390 CHREC is a polynomial chain of recurrence to be instantiated.
2392 CACHE is the cache of already instantiated values.
2394 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2395 conversions that may wrap in signed/pointer type are folded, as long
2396 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2397 then we don't do such fold.
2399 SIZE_EXPR is used for computing the size of the expression to be
2400 instantiated, and to stop if it exceeds some limit. */
2402 static tree
2403 instantiate_scev_poly (edge instantiate_below,
2404 class loop *evolution_loop, class loop *,
2405 tree chrec, bool *fold_conversions, int size_expr)
2407 tree op1;
2408 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2409 get_chrec_loop (chrec),
2410 CHREC_LEFT (chrec), fold_conversions,
2411 size_expr);
2412 if (op0 == chrec_dont_know)
2413 return chrec_dont_know;
2415 op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2416 get_chrec_loop (chrec),
2417 CHREC_RIGHT (chrec), fold_conversions,
2418 size_expr);
2419 if (op1 == chrec_dont_know)
2420 return chrec_dont_know;
2422 if (CHREC_LEFT (chrec) != op0
2423 || CHREC_RIGHT (chrec) != op1)
2425 op1 = chrec_convert_rhs (chrec_type (op0), op1, NULL);
2426 chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
2429 return chrec;
2432 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2433 and EVOLUTION_LOOP, that were left under a symbolic form.
2435 "C0 CODE C1" is a binary expression of type TYPE to be instantiated.
2437 CACHE is the cache of already instantiated values.
2439 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2440 conversions that may wrap in signed/pointer type are folded, as long
2441 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2442 then we don't do such fold.
2444 SIZE_EXPR is used for computing the size of the expression to be
2445 instantiated, and to stop if it exceeds some limit. */
2447 static tree
2448 instantiate_scev_binary (edge instantiate_below,
2449 class loop *evolution_loop, class loop *inner_loop,
2450 tree chrec, enum tree_code code,
2451 tree type, tree c0, tree c1,
2452 bool *fold_conversions, int size_expr)
2454 tree op1;
2455 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2456 c0, fold_conversions, size_expr);
2457 if (op0 == chrec_dont_know)
2458 return chrec_dont_know;
2460 /* While we eventually compute the same op1 if c0 == c1 the process
2461 of doing this is expensive so the following short-cut prevents
2462 exponential compile-time behavior. */
2463 if (c0 != c1)
2465 op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2466 c1, fold_conversions, size_expr);
2467 if (op1 == chrec_dont_know)
2468 return chrec_dont_know;
2470 else
2471 op1 = op0;
2473 if (c0 != op0
2474 || c1 != op1)
2476 op0 = chrec_convert (type, op0, NULL);
2477 op1 = chrec_convert_rhs (type, op1, NULL);
2479 switch (code)
2481 case POINTER_PLUS_EXPR:
2482 case PLUS_EXPR:
2483 return chrec_fold_plus (type, op0, op1);
2485 case MINUS_EXPR:
2486 return chrec_fold_minus (type, op0, op1);
2488 case MULT_EXPR:
2489 return chrec_fold_multiply (type, op0, op1);
2491 default:
2492 gcc_unreachable ();
2496 return chrec ? chrec : fold_build2 (code, type, c0, c1);
2499 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2500 and EVOLUTION_LOOP, that were left under a symbolic form.
2502 "CHREC" that stands for a convert expression "(TYPE) OP" is to be
2503 instantiated.
2505 CACHE is the cache of already instantiated values.
2507 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2508 conversions that may wrap in signed/pointer type are folded, as long
2509 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2510 then we don't do such fold.
2512 SIZE_EXPR is used for computing the size of the expression to be
2513 instantiated, and to stop if it exceeds some limit. */
2515 static tree
2516 instantiate_scev_convert (edge instantiate_below,
2517 class loop *evolution_loop, class loop *inner_loop,
2518 tree chrec, tree type, tree op,
2519 bool *fold_conversions, int size_expr)
2521 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2522 inner_loop, op,
2523 fold_conversions, size_expr);
2525 if (op0 == chrec_dont_know)
2526 return chrec_dont_know;
2528 if (fold_conversions)
2530 tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
2531 if (tmp)
2532 return tmp;
2534 /* If we used chrec_convert_aggressive, we can no longer assume that
2535 signed chrecs do not overflow, as chrec_convert does, so avoid
2536 calling it in that case. */
2537 if (*fold_conversions)
2539 if (chrec && op0 == op)
2540 return chrec;
2542 return fold_convert (type, op0);
2546 return chrec_convert (type, op0, NULL);
2549 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2550 and EVOLUTION_LOOP, that were left under a symbolic form.
2552 CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated.
2553 Handle ~X as -1 - X.
2554 Handle -X as -1 * X.
2556 CACHE is the cache of already instantiated values.
2558 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2559 conversions that may wrap in signed/pointer type are folded, as long
2560 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2561 then we don't do such fold.
2563 SIZE_EXPR is used for computing the size of the expression to be
2564 instantiated, and to stop if it exceeds some limit. */
2566 static tree
2567 instantiate_scev_not (edge instantiate_below,
2568 class loop *evolution_loop, class loop *inner_loop,
2569 tree chrec,
2570 enum tree_code code, tree type, tree op,
2571 bool *fold_conversions, int size_expr)
2573 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2574 inner_loop, op,
2575 fold_conversions, size_expr);
2577 if (op0 == chrec_dont_know)
2578 return chrec_dont_know;
2580 if (op != op0)
2582 op0 = chrec_convert (type, op0, NULL);
2584 switch (code)
2586 case BIT_NOT_EXPR:
2587 return chrec_fold_minus
2588 (type, fold_convert (type, integer_minus_one_node), op0);
2590 case NEGATE_EXPR:
2591 return chrec_fold_multiply
2592 (type, fold_convert (type, integer_minus_one_node), op0);
2594 default:
2595 gcc_unreachable ();
2599 return chrec ? chrec : fold_build1 (code, type, op0);
2602 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2603 and EVOLUTION_LOOP, that were left under a symbolic form.
2605 CHREC is the scalar evolution to instantiate.
2607 CACHE is the cache of already instantiated values.
2609 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2610 conversions that may wrap in signed/pointer type are folded, as long
2611 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2612 then we don't do such fold.
2614 SIZE_EXPR is used for computing the size of the expression to be
2615 instantiated, and to stop if it exceeds some limit. */
2617 static tree
2618 instantiate_scev_r (edge instantiate_below,
2619 class loop *evolution_loop, class loop *inner_loop,
2620 tree chrec,
2621 bool *fold_conversions, int size_expr)
2623 /* Give up if the expression is larger than the MAX that we allow. */
2624 if (size_expr++ > param_scev_max_expr_size)
2625 return chrec_dont_know;
2627 if (chrec == NULL_TREE
2628 || automatically_generated_chrec_p (chrec)
2629 || is_gimple_min_invariant (chrec))
2630 return chrec;
2632 switch (TREE_CODE (chrec))
2634 case SSA_NAME:
2635 return instantiate_scev_name (instantiate_below, evolution_loop,
2636 inner_loop, chrec,
2637 fold_conversions, size_expr);
2639 case POLYNOMIAL_CHREC:
2640 return instantiate_scev_poly (instantiate_below, evolution_loop,
2641 inner_loop, chrec,
2642 fold_conversions, size_expr);
2644 case POINTER_PLUS_EXPR:
2645 case PLUS_EXPR:
2646 case MINUS_EXPR:
2647 case MULT_EXPR:
2648 return instantiate_scev_binary (instantiate_below, evolution_loop,
2649 inner_loop, chrec,
2650 TREE_CODE (chrec), chrec_type (chrec),
2651 TREE_OPERAND (chrec, 0),
2652 TREE_OPERAND (chrec, 1),
2653 fold_conversions, size_expr);
2655 CASE_CONVERT:
2656 return instantiate_scev_convert (instantiate_below, evolution_loop,
2657 inner_loop, chrec,
2658 TREE_TYPE (chrec), TREE_OPERAND (chrec, 0),
2659 fold_conversions, size_expr);
2661 case NEGATE_EXPR:
2662 case BIT_NOT_EXPR:
2663 return instantiate_scev_not (instantiate_below, evolution_loop,
2664 inner_loop, chrec,
2665 TREE_CODE (chrec), TREE_TYPE (chrec),
2666 TREE_OPERAND (chrec, 0),
2667 fold_conversions, size_expr);
2669 case ADDR_EXPR:
2670 if (is_gimple_min_invariant (chrec))
2671 return chrec;
2672 /* Fallthru. */
2673 case SCEV_NOT_KNOWN:
2674 return chrec_dont_know;
2676 case SCEV_KNOWN:
2677 return chrec_known;
2679 default:
2680 if (CONSTANT_CLASS_P (chrec))
2681 return chrec;
2682 return chrec_dont_know;
2686 /* Analyze all the parameters of the chrec that were left under a
2687 symbolic form. INSTANTIATE_BELOW is the basic block that stops the
2688 recursive instantiation of parameters: a parameter is a variable
2689 that is defined in a basic block that dominates INSTANTIATE_BELOW or
2690 a function parameter. */
2692 tree
2693 instantiate_scev (edge instantiate_below, class loop *evolution_loop,
2694 tree chrec)
2696 tree res;
2698 if (dump_file && (dump_flags & TDF_SCEV))
2700 fprintf (dump_file, "(instantiate_scev \n");
2701 fprintf (dump_file, " (instantiate_below = %d -> %d)\n",
2702 instantiate_below->src->index, instantiate_below->dest->index);
2703 if (evolution_loop)
2704 fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num);
2705 fprintf (dump_file, " (chrec = ");
2706 print_generic_expr (dump_file, chrec);
2707 fprintf (dump_file, ")\n");
2710 bool destr = false;
2711 if (!global_cache)
2713 global_cache = new instantiate_cache_type;
2714 destr = true;
2717 res = instantiate_scev_r (instantiate_below, evolution_loop,
2718 NULL, chrec, NULL, 0);
2720 if (destr)
2722 delete global_cache;
2723 global_cache = NULL;
2726 if (dump_file && (dump_flags & TDF_SCEV))
2728 fprintf (dump_file, " (res = ");
2729 print_generic_expr (dump_file, res);
2730 fprintf (dump_file, "))\n");
2733 return res;
2736 /* Similar to instantiate_parameters, but does not introduce the
2737 evolutions in outer loops for LOOP invariants in CHREC, and does not
2738 care about causing overflows, as long as they do not affect value
2739 of an expression. */
2741 tree
2742 resolve_mixers (class loop *loop, tree chrec, bool *folded_casts)
2744 bool destr = false;
2745 bool fold_conversions = false;
2746 if (!global_cache)
2748 global_cache = new instantiate_cache_type;
2749 destr = true;
2752 tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL,
2753 chrec, &fold_conversions, 0);
2755 if (folded_casts && !*folded_casts)
2756 *folded_casts = fold_conversions;
2758 if (destr)
2760 delete global_cache;
2761 global_cache = NULL;
2764 return ret;
2767 /* Entry point for the analysis of the number of iterations pass.
2768 This function tries to safely approximate the number of iterations
2769 the loop will run. When this property is not decidable at compile
2770 time, the result is chrec_dont_know. Otherwise the result is a
2771 scalar or a symbolic parameter. When the number of iterations may
2772 be equal to zero and the property cannot be determined at compile
2773 time, the result is a COND_EXPR that represents in a symbolic form
2774 the conditions under which the number of iterations is not zero.
2776 Example of analysis: suppose that the loop has an exit condition:
2778 "if (b > 49) goto end_loop;"
2780 and that in a previous analysis we have determined that the
2781 variable 'b' has an evolution function:
2783 "EF = {23, +, 5}_2".
2785 When we evaluate the function at the point 5, i.e. the value of the
2786 variable 'b' after 5 iterations in the loop, we have EF (5) = 48,
2787 and EF (6) = 53. In this case the value of 'b' on exit is '53' and
2788 the loop body has been executed 6 times. */
2790 tree
2791 number_of_latch_executions (class loop *loop)
2793 edge exit;
2794 class tree_niter_desc niter_desc;
2795 tree may_be_zero;
2796 tree res;
2798 /* Determine whether the number of iterations in loop has already
2799 been computed. */
2800 res = loop->nb_iterations;
2801 if (res)
2802 return res;
2804 may_be_zero = NULL_TREE;
2806 if (dump_file && (dump_flags & TDF_SCEV))
2807 fprintf (dump_file, "(number_of_iterations_in_loop = \n");
2809 res = chrec_dont_know;
2810 exit = single_exit (loop);
2812 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
2814 may_be_zero = niter_desc.may_be_zero;
2815 res = niter_desc.niter;
2818 if (res == chrec_dont_know
2819 || !may_be_zero
2820 || integer_zerop (may_be_zero))
2822 else if (integer_nonzerop (may_be_zero))
2823 res = build_int_cst (TREE_TYPE (res), 0);
2825 else if (COMPARISON_CLASS_P (may_be_zero))
2826 res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero,
2827 build_int_cst (TREE_TYPE (res), 0), res);
2828 else
2829 res = chrec_dont_know;
2831 if (dump_file && (dump_flags & TDF_SCEV))
2833 fprintf (dump_file, " (set_nb_iterations_in_loop = ");
2834 print_generic_expr (dump_file, res);
2835 fprintf (dump_file, "))\n");
2838 loop->nb_iterations = res;
2839 return res;
2843 /* Counters for the stats. */
2845 struct chrec_stats
2847 unsigned nb_chrecs;
2848 unsigned nb_affine;
2849 unsigned nb_affine_multivar;
2850 unsigned nb_higher_poly;
2851 unsigned nb_chrec_dont_know;
2852 unsigned nb_undetermined;
2855 /* Reset the counters. */
2857 static inline void
2858 reset_chrecs_counters (struct chrec_stats *stats)
2860 stats->nb_chrecs = 0;
2861 stats->nb_affine = 0;
2862 stats->nb_affine_multivar = 0;
2863 stats->nb_higher_poly = 0;
2864 stats->nb_chrec_dont_know = 0;
2865 stats->nb_undetermined = 0;
2868 /* Dump the contents of a CHREC_STATS structure. */
2870 static void
2871 dump_chrecs_stats (FILE *file, struct chrec_stats *stats)
2873 fprintf (file, "\n(\n");
2874 fprintf (file, "-----------------------------------------\n");
2875 fprintf (file, "%d\taffine univariate chrecs\n", stats->nb_affine);
2876 fprintf (file, "%d\taffine multivariate chrecs\n", stats->nb_affine_multivar);
2877 fprintf (file, "%d\tdegree greater than 2 polynomials\n",
2878 stats->nb_higher_poly);
2879 fprintf (file, "%d\tchrec_dont_know chrecs\n", stats->nb_chrec_dont_know);
2880 fprintf (file, "-----------------------------------------\n");
2881 fprintf (file, "%d\ttotal chrecs\n", stats->nb_chrecs);
2882 fprintf (file, "%d\twith undetermined coefficients\n",
2883 stats->nb_undetermined);
2884 fprintf (file, "-----------------------------------------\n");
2885 fprintf (file, "%d\tchrecs in the scev database\n",
2886 (int) scalar_evolution_info->elements ());
2887 fprintf (file, "%d\tsets in the scev database\n", nb_set_scev);
2888 fprintf (file, "%d\tgets in the scev database\n", nb_get_scev);
2889 fprintf (file, "-----------------------------------------\n");
2890 fprintf (file, ")\n\n");
2893 /* Gather statistics about CHREC. */
2895 static void
2896 gather_chrec_stats (tree chrec, struct chrec_stats *stats)
2898 if (dump_file && (dump_flags & TDF_STATS))
2900 fprintf (dump_file, "(classify_chrec ");
2901 print_generic_expr (dump_file, chrec);
2902 fprintf (dump_file, "\n");
2905 stats->nb_chrecs++;
2907 if (chrec == NULL_TREE)
2909 stats->nb_undetermined++;
2910 return;
2913 switch (TREE_CODE (chrec))
2915 case POLYNOMIAL_CHREC:
2916 if (evolution_function_is_affine_p (chrec))
2918 if (dump_file && (dump_flags & TDF_STATS))
2919 fprintf (dump_file, " affine_univariate\n");
2920 stats->nb_affine++;
2922 else if (evolution_function_is_affine_multivariate_p (chrec, 0))
2924 if (dump_file && (dump_flags & TDF_STATS))
2925 fprintf (dump_file, " affine_multivariate\n");
2926 stats->nb_affine_multivar++;
2928 else
2930 if (dump_file && (dump_flags & TDF_STATS))
2931 fprintf (dump_file, " higher_degree_polynomial\n");
2932 stats->nb_higher_poly++;
2935 break;
2937 default:
2938 break;
2941 if (chrec_contains_undetermined (chrec))
2943 if (dump_file && (dump_flags & TDF_STATS))
2944 fprintf (dump_file, " undetermined\n");
2945 stats->nb_undetermined++;
2948 if (dump_file && (dump_flags & TDF_STATS))
2949 fprintf (dump_file, ")\n");
2952 /* Classify the chrecs of the whole database. */
2954 void
2955 gather_stats_on_scev_database (void)
2957 struct chrec_stats stats;
2959 if (!dump_file)
2960 return;
2962 reset_chrecs_counters (&stats);
2964 hash_table<scev_info_hasher>::iterator iter;
2965 scev_info_str *elt;
2966 FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *,
2967 iter)
2968 gather_chrec_stats (elt->chrec, &stats);
2970 dump_chrecs_stats (dump_file, &stats);
2974 /* Initialize the analysis of scalar evolutions for LOOPS. */
2976 void
2977 scev_initialize (void)
2979 gcc_assert (! scev_initialized_p ()
2980 && loops_state_satisfies_p (cfun, LOOPS_NORMAL));
2982 scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (100);
2984 for (auto loop : loops_list (cfun, 0))
2985 loop->nb_iterations = NULL_TREE;
2988 /* Return true if SCEV is initialized. */
2990 bool
2991 scev_initialized_p (void)
2993 return scalar_evolution_info != NULL;
2996 /* Cleans up the information cached by the scalar evolutions analysis
2997 in the hash table. */
2999 void
3000 scev_reset_htab (void)
3002 if (!scalar_evolution_info)
3003 return;
3005 scalar_evolution_info->empty ();
3008 /* Cleans up the information cached by the scalar evolutions analysis
3009 in the hash table and in the loop->nb_iterations. */
3011 void
3012 scev_reset (void)
3014 scev_reset_htab ();
3016 for (auto loop : loops_list (cfun, 0))
3017 loop->nb_iterations = NULL_TREE;
3020 /* Return true if the IV calculation in TYPE can overflow based on the knowledge
3021 of the upper bound on the number of iterations of LOOP, the BASE and STEP
3022 of IV.
3024 We do not use information whether TYPE can overflow so it is safe to
3025 use this test even for derived IVs not computed every iteration or
3026 hypotetical IVs to be inserted into code. */
3028 bool
3029 iv_can_overflow_p (class loop *loop, tree type, tree base, tree step)
3031 widest_int nit;
3032 wide_int base_min, base_max, step_min, step_max, type_min, type_max;
3033 signop sgn = TYPE_SIGN (type);
3034 value_range r;
3036 if (integer_zerop (step))
3037 return false;
3039 if (!INTEGRAL_TYPE_P (TREE_TYPE (base))
3040 || !get_range_query (cfun)->range_of_expr (r, base)
3041 || r.kind () != VR_RANGE)
3042 return true;
3044 base_min = r.lower_bound ();
3045 base_max = r.upper_bound ();
3047 if (!INTEGRAL_TYPE_P (TREE_TYPE (step))
3048 || !get_range_query (cfun)->range_of_expr (r, step)
3049 || r.kind () != VR_RANGE)
3050 return true;
3052 step_min = r.lower_bound ();
3053 step_max = r.upper_bound ();
3055 if (!get_max_loop_iterations (loop, &nit))
3056 return true;
3058 type_min = wi::min_value (type);
3059 type_max = wi::max_value (type);
3061 /* Just sanity check that we don't see values out of the range of the type.
3062 In this case the arithmetics bellow would overflow. */
3063 gcc_checking_assert (wi::ge_p (base_min, type_min, sgn)
3064 && wi::le_p (base_max, type_max, sgn));
3066 /* Account the possible increment in the last ieration. */
3067 wi::overflow_type overflow = wi::OVF_NONE;
3068 nit = wi::add (nit, 1, SIGNED, &overflow);
3069 if (overflow)
3070 return true;
3072 /* NIT is typeless and can exceed the precision of the type. In this case
3073 overflow is always possible, because we know STEP is non-zero. */
3074 if (wi::min_precision (nit, UNSIGNED) > TYPE_PRECISION (type))
3075 return true;
3076 wide_int nit2 = wide_int::from (nit, TYPE_PRECISION (type), UNSIGNED);
3078 /* If step can be positive, check that nit*step <= type_max-base.
3079 This can be done by unsigned arithmetic and we only need to watch overflow
3080 in the multiplication. The right hand side can always be represented in
3081 the type. */
3082 if (sgn == UNSIGNED || !wi::neg_p (step_max))
3084 wi::overflow_type overflow = wi::OVF_NONE;
3085 if (wi::gtu_p (wi::mul (step_max, nit2, UNSIGNED, &overflow),
3086 type_max - base_max)
3087 || overflow)
3088 return true;
3090 /* If step can be negative, check that nit*(-step) <= base_min-type_min. */
3091 if (sgn == SIGNED && wi::neg_p (step_min))
3093 wi::overflow_type overflow, overflow2;
3094 overflow = overflow2 = wi::OVF_NONE;
3095 if (wi::gtu_p (wi::mul (wi::neg (step_min, &overflow2),
3096 nit2, UNSIGNED, &overflow),
3097 base_min - type_min)
3098 || overflow || overflow2)
3099 return true;
3102 return false;
3105 /* Given EV with form of "(type) {inner_base, inner_step}_loop", this
3106 function tries to derive condition under which it can be simplified
3107 into "{(type)inner_base, (type)inner_step}_loop". The condition is
3108 the maximum number that inner iv can iterate. */
3110 static tree
3111 derive_simple_iv_with_niters (tree ev, tree *niters)
3113 if (!CONVERT_EXPR_P (ev))
3114 return ev;
3116 tree inner_ev = TREE_OPERAND (ev, 0);
3117 if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC)
3118 return ev;
3120 tree init = CHREC_LEFT (inner_ev);
3121 tree step = CHREC_RIGHT (inner_ev);
3122 if (TREE_CODE (init) != INTEGER_CST
3123 || TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3124 return ev;
3126 tree type = TREE_TYPE (ev);
3127 tree inner_type = TREE_TYPE (inner_ev);
3128 if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type))
3129 return ev;
3131 /* Type conversion in "(type) {inner_base, inner_step}_loop" can be
3132 folded only if inner iv won't overflow. We compute the maximum
3133 number the inner iv can iterate before overflowing and return the
3134 simplified affine iv. */
3135 tree delta;
3136 init = fold_convert (type, init);
3137 step = fold_convert (type, step);
3138 ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step);
3139 if (tree_int_cst_sign_bit (step))
3141 tree bound = lower_bound_in_type (inner_type, inner_type);
3142 delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound));
3143 step = fold_build1 (NEGATE_EXPR, type, step);
3145 else
3147 tree bound = upper_bound_in_type (inner_type, inner_type);
3148 delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init);
3150 *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step);
3151 return ev;
3154 /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with
3155 respect to WRTO_LOOP and returns its base and step in IV if possible
3156 (see analyze_scalar_evolution_in_loop for more details on USE_LOOP
3157 and WRTO_LOOP). If ALLOW_NONCONSTANT_STEP is true, we want step to be
3158 invariant in LOOP. Otherwise we require it to be an integer constant.
3160 IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g.
3161 because it is computed in signed arithmetics). Consequently, adding an
3162 induction variable
3164 for (i = IV->base; ; i += IV->step)
3166 is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is
3167 false for the type of the induction variable, or you can prove that i does
3168 not wrap by some other argument. Otherwise, this might introduce undefined
3169 behavior, and
3171 i = iv->base;
3172 for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step))
3174 must be used instead.
3176 When IV_NITERS is not NULL, this function also checks case in which OP
3177 is a conversion of an inner simple iv of below form:
3179 (outer_type){inner_base, inner_step}_loop.
3181 If type of inner iv has smaller precision than outer_type, it can't be
3182 folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because
3183 the inner iv could overflow/wrap. In this case, we derive a condition
3184 under which the inner iv won't overflow/wrap and do the simplification.
3185 The derived condition normally is the maximum number the inner iv can
3186 iterate, and will be stored in IV_NITERS. This is useful in loop niter
3187 analysis, to derive break conditions when a loop must terminate, when is
3188 infinite. */
3190 bool
3191 simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop,
3192 tree op, affine_iv *iv, tree *iv_niters,
3193 bool allow_nonconstant_step)
3195 enum tree_code code;
3196 tree type, ev, base, e;
3197 wide_int extreme;
3198 bool folded_casts;
3200 iv->base = NULL_TREE;
3201 iv->step = NULL_TREE;
3202 iv->no_overflow = false;
3204 type = TREE_TYPE (op);
3205 if (!POINTER_TYPE_P (type)
3206 && !INTEGRAL_TYPE_P (type))
3207 return false;
3209 ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, op,
3210 &folded_casts);
3211 if (chrec_contains_undetermined (ev)
3212 || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num))
3213 return false;
3215 if (tree_does_not_contain_chrecs (ev))
3217 iv->base = ev;
3218 iv->step = build_int_cst (TREE_TYPE (ev), 0);
3219 iv->no_overflow = true;
3220 return true;
3223 /* If we can derive valid scalar evolution with assumptions. */
3224 if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC)
3225 ev = derive_simple_iv_with_niters (ev, iv_niters);
3227 if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
3228 return false;
3230 if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
3231 return false;
3233 iv->step = CHREC_RIGHT (ev);
3234 if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST)
3235 || tree_contains_chrecs (iv->step, NULL))
3236 return false;
3238 iv->base = CHREC_LEFT (ev);
3239 if (tree_contains_chrecs (iv->base, NULL))
3240 return false;
3242 iv->no_overflow = !folded_casts && nowrap_type_p (type);
3244 if (!iv->no_overflow
3245 && !iv_can_overflow_p (wrto_loop, type, iv->base, iv->step))
3246 iv->no_overflow = true;
3248 /* Try to simplify iv base:
3250 (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T
3251 == (signed T)(unsigned T)base + step
3252 == base + step
3254 If we can prove operation (base + step) doesn't overflow or underflow.
3255 Specifically, we try to prove below conditions are satisfied:
3257 base <= UPPER_BOUND (type) - step ;;step > 0
3258 base >= LOWER_BOUND (type) - step ;;step < 0
3260 This is done by proving the reverse conditions are false using loop's
3261 initial conditions.
3263 The is necessary to make loop niter, or iv overflow analysis easier
3264 for below example:
3266 int foo (int *a, signed char s, signed char l)
3268 signed char i;
3269 for (i = s; i < l; i++)
3270 a[i] = 0;
3271 return 0;
3274 Note variable I is firstly converted to type unsigned char, incremented,
3275 then converted back to type signed char. */
3277 if (wrto_loop->num != use_loop->num)
3278 return true;
3280 if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST)
3281 return true;
3283 type = TREE_TYPE (iv->base);
3284 e = TREE_OPERAND (iv->base, 0);
3285 if (TREE_CODE (e) != PLUS_EXPR
3286 || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
3287 || !tree_int_cst_equal (iv->step,
3288 fold_convert (type, TREE_OPERAND (e, 1))))
3289 return true;
3290 e = TREE_OPERAND (e, 0);
3291 if (!CONVERT_EXPR_P (e))
3292 return true;
3293 base = TREE_OPERAND (e, 0);
3294 if (!useless_type_conversion_p (type, TREE_TYPE (base)))
3295 return true;
3297 if (tree_int_cst_sign_bit (iv->step))
3299 code = LT_EXPR;
3300 extreme = wi::min_value (type);
3302 else
3304 code = GT_EXPR;
3305 extreme = wi::max_value (type);
3307 wi::overflow_type overflow = wi::OVF_NONE;
3308 extreme = wi::sub (extreme, wi::to_wide (iv->step),
3309 TYPE_SIGN (type), &overflow);
3310 if (overflow)
3311 return true;
3312 e = fold_build2 (code, boolean_type_node, base,
3313 wide_int_to_tree (type, extreme));
3314 e = simplify_using_initial_conditions (use_loop, e);
3315 if (!integer_zerop (e))
3316 return true;
3318 if (POINTER_TYPE_P (TREE_TYPE (base)))
3319 code = POINTER_PLUS_EXPR;
3320 else
3321 code = PLUS_EXPR;
3323 iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step);
3324 return true;
3327 /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple
3328 affine iv unconditionally. */
3330 bool
3331 simple_iv (class loop *wrto_loop, class loop *use_loop, tree op,
3332 affine_iv *iv, bool allow_nonconstant_step)
3334 return simple_iv_with_niters (wrto_loop, use_loop, op, iv,
3335 NULL, allow_nonconstant_step);
3338 /* Finalize the scalar evolution analysis. */
3340 void
3341 scev_finalize (void)
3343 if (!scalar_evolution_info)
3344 return;
3345 scalar_evolution_info->empty ();
3346 scalar_evolution_info = NULL;
3347 free_numbers_of_iterations_estimates (cfun);
3350 /* Returns true if the expression EXPR is considered to be too expensive
3351 for scev_const_prop. */
3353 static bool
3354 expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache,
3355 uint64_t &cost)
3357 enum tree_code code;
3359 if (is_gimple_val (expr))
3360 return false;
3362 code = TREE_CODE (expr);
3363 if (code == TRUNC_DIV_EXPR
3364 || code == CEIL_DIV_EXPR
3365 || code == FLOOR_DIV_EXPR
3366 || code == ROUND_DIV_EXPR
3367 || code == TRUNC_MOD_EXPR
3368 || code == CEIL_MOD_EXPR
3369 || code == FLOOR_MOD_EXPR
3370 || code == ROUND_MOD_EXPR
3371 || code == EXACT_DIV_EXPR)
3373 /* Division by power of two is usually cheap, so we allow it.
3374 Forbid anything else. */
3375 if (!integer_pow2p (TREE_OPERAND (expr, 1)))
3376 return true;
3379 bool visited_p;
3380 uint64_t &local_cost = cache.get_or_insert (expr, &visited_p);
3381 if (visited_p)
3383 uint64_t tem = cost + local_cost;
3384 if (tem < cost)
3385 return true;
3386 cost = tem;
3387 return false;
3389 local_cost = 1;
3391 uint64_t op_cost = 0;
3392 if (code == CALL_EXPR)
3394 tree arg;
3395 call_expr_arg_iterator iter;
3396 /* Even though is_inexpensive_builtin might say true, we will get a
3397 library call for popcount when backend does not have an instruction
3398 to do so. We consider this to be expensive and generate
3399 __builtin_popcount only when backend defines it. */
3400 optab optab;
3401 combined_fn cfn = get_call_combined_fn (expr);
3402 switch (cfn)
3404 CASE_CFN_POPCOUNT:
3405 optab = popcount_optab;
3406 goto bitcount_call;
3407 CASE_CFN_CLZ:
3408 optab = clz_optab;
3409 goto bitcount_call;
3410 CASE_CFN_CTZ:
3411 optab = ctz_optab;
3412 bitcount_call:
3413 /* Check if opcode for popcount is available in the mode required. */
3414 if (optab_handler (optab,
3415 TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
3416 == CODE_FOR_nothing)
3418 machine_mode mode;
3419 mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)));
3420 scalar_int_mode int_mode;
3422 /* If the mode is of 2 * UNITS_PER_WORD size, we can handle
3423 double-word popcount by emitting two single-word popcount
3424 instructions. */
3425 if (is_a <scalar_int_mode> (mode, &int_mode)
3426 && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
3427 && (optab_handler (optab, word_mode)
3428 != CODE_FOR_nothing))
3429 break;
3430 return true;
3432 break;
3434 default:
3435 if (cfn == CFN_LAST
3436 || !is_inexpensive_builtin (get_callee_fndecl (expr)))
3437 return true;
3438 break;
3441 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
3442 if (expression_expensive_p (arg, cache, op_cost))
3443 return true;
3444 *cache.get (expr) += op_cost;
3445 cost += op_cost + 1;
3446 return false;
3449 if (code == COND_EXPR)
3451 if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost)
3452 || (EXPR_P (TREE_OPERAND (expr, 1))
3453 && EXPR_P (TREE_OPERAND (expr, 2)))
3454 /* If either branch has side effects or could trap. */
3455 || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 1))
3456 || generic_expr_could_trap_p (TREE_OPERAND (expr, 1))
3457 || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 0))
3458 || generic_expr_could_trap_p (TREE_OPERAND (expr, 0))
3459 || expression_expensive_p (TREE_OPERAND (expr, 1),
3460 cache, op_cost)
3461 || expression_expensive_p (TREE_OPERAND (expr, 2),
3462 cache, op_cost))
3463 return true;
3464 *cache.get (expr) += op_cost;
3465 cost += op_cost + 1;
3466 return false;
3469 switch (TREE_CODE_CLASS (code))
3471 case tcc_binary:
3472 case tcc_comparison:
3473 if (expression_expensive_p (TREE_OPERAND (expr, 1), cache, op_cost))
3474 return true;
3476 /* Fallthru. */
3477 case tcc_unary:
3478 if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost))
3479 return true;
3480 *cache.get (expr) += op_cost;
3481 cost += op_cost + 1;
3482 return false;
3484 default:
3485 return true;
3489 bool
3490 expression_expensive_p (tree expr)
3492 hash_map<tree, uint64_t> cache;
3493 uint64_t expanded_size = 0;
3494 return (expression_expensive_p (expr, cache, expanded_size)
3495 || expanded_size > cache.elements ());
3498 /* Match.pd function to match bitwise inductive expression.
3499 .i.e.
3500 _2 = 1 << _1;
3501 _3 = ~_2;
3502 tmp_9 = _3 & tmp_12; */
3503 extern bool gimple_bitwise_induction_p (tree, tree *, tree (*)(tree));
3505 /* Return the inductive expression of bitwise operation if possible,
3506 otherwise returns DEF. */
3507 static tree
3508 analyze_and_compute_bitwise_induction_effect (class loop* loop,
3509 tree phidef,
3510 unsigned HOST_WIDE_INT niter)
3512 tree match_op[3],inv, bitwise_scev;
3513 tree type = TREE_TYPE (phidef);
3514 gphi* header_phi = NULL;
3516 /* Match things like op2(MATCH_OP[2]), op1(MATCH_OP[1]), phidef(PHIDEF)
3518 op2 = PHI <phidef, inv>
3519 _1 = (int) bit_17;
3520 _3 = 1 << _1;
3521 op1 = ~_3;
3522 phidef = op1 & op2; */
3523 if (!gimple_bitwise_induction_p (phidef, &match_op[0], NULL)
3524 || TREE_CODE (match_op[2]) != SSA_NAME
3525 || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[2])))
3526 || gimple_phi_num_args (header_phi) != 2)
3527 return NULL_TREE;
3529 if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3530 return NULL_TREE;
3532 bitwise_scev = analyze_scalar_evolution (loop, match_op[1]);
3533 bitwise_scev = instantiate_parameters (loop, bitwise_scev);
3535 /* Make sure bits is in range of type precision. */
3536 if (TREE_CODE (bitwise_scev) != POLYNOMIAL_CHREC
3537 || !INTEGRAL_TYPE_P (TREE_TYPE (bitwise_scev))
3538 || !tree_fits_uhwi_p (CHREC_LEFT (bitwise_scev))
3539 || tree_to_uhwi (CHREC_LEFT (bitwise_scev)) >= TYPE_PRECISION (type)
3540 || !tree_fits_shwi_p (CHREC_RIGHT (bitwise_scev)))
3541 return NULL_TREE;
3543 enum bit_op_kind
3545 INDUCTION_BIT_CLEAR,
3546 INDUCTION_BIT_IOR,
3547 INDUCTION_BIT_XOR,
3548 INDUCTION_BIT_RESET,
3549 INDUCTION_ZERO,
3550 INDUCTION_ALL
3553 enum bit_op_kind induction_kind;
3554 enum tree_code code1
3555 = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef));
3556 enum tree_code code2
3557 = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (match_op[0]));
3559 /* BIT_CLEAR: A &= ~(1 << bit)
3560 BIT_RESET: A ^= (1 << bit).
3561 BIT_IOR: A |= (1 << bit)
3562 BIT_ZERO: A &= (1 << bit)
3563 BIT_ALL: A |= ~(1 << bit)
3564 BIT_XOR: A ^= ~(1 << bit).
3565 bit is induction variable. */
3566 switch (code1)
3568 case BIT_AND_EXPR:
3569 induction_kind = code2 == BIT_NOT_EXPR
3570 ? INDUCTION_BIT_CLEAR
3571 : INDUCTION_ZERO;
3572 break;
3573 case BIT_IOR_EXPR:
3574 induction_kind = code2 == BIT_NOT_EXPR
3575 ? INDUCTION_ALL
3576 : INDUCTION_BIT_IOR;
3577 break;
3578 case BIT_XOR_EXPR:
3579 induction_kind = code2 == BIT_NOT_EXPR
3580 ? INDUCTION_BIT_XOR
3581 : INDUCTION_BIT_RESET;
3582 break;
3583 /* A ^ ~(1 << bit) is equal to ~(A ^ (1 << bit)). */
3584 case BIT_NOT_EXPR:
3585 gcc_assert (code2 == BIT_XOR_EXPR);
3586 induction_kind = INDUCTION_BIT_XOR;
3587 break;
3588 default:
3589 gcc_unreachable ();
3592 if (induction_kind == INDUCTION_ZERO)
3593 return build_zero_cst (type);
3594 if (induction_kind == INDUCTION_ALL)
3595 return build_all_ones_cst (type);
3597 wide_int bits = wi::zero (TYPE_PRECISION (type));
3598 HOST_WIDE_INT bit_start = tree_to_shwi (CHREC_LEFT (bitwise_scev));
3599 HOST_WIDE_INT step = tree_to_shwi (CHREC_RIGHT (bitwise_scev));
3600 HOST_WIDE_INT bit_final = bit_start + step * niter;
3602 /* bit_start, bit_final in range of [0,TYPE_PRECISION)
3603 implies all bits are set in range. */
3604 if (bit_final >= TYPE_PRECISION (type)
3605 || bit_final < 0)
3606 return NULL_TREE;
3608 /* Loop tripcount should be niter + 1. */
3609 for (unsigned i = 0; i != niter + 1; i++)
3611 bits = wi::set_bit (bits, bit_start);
3612 bit_start += step;
3615 bool inverted = false;
3616 switch (induction_kind)
3618 case INDUCTION_BIT_CLEAR:
3619 code1 = BIT_AND_EXPR;
3620 inverted = true;
3621 break;
3622 case INDUCTION_BIT_IOR:
3623 code1 = BIT_IOR_EXPR;
3624 break;
3625 case INDUCTION_BIT_RESET:
3626 code1 = BIT_XOR_EXPR;
3627 break;
3628 /* A ^= ~(1 << bit) is special, when loop tripcount is even,
3629 it's equal to A ^= bits, else A ^= ~bits. */
3630 case INDUCTION_BIT_XOR:
3631 code1 = BIT_XOR_EXPR;
3632 if (niter % 2 == 0)
3633 inverted = true;
3634 break;
3635 default:
3636 gcc_unreachable ();
3639 if (inverted)
3640 bits = wi::bit_not (bits);
3642 inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3643 return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits));
3646 /* Match.pd function to match bitop with invariant expression
3647 .i.e.
3648 tmp_7 = _0 & _1; */
3649 extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree));
3651 /* Return the inductive expression of bitop with invariant if possible,
3652 otherwise returns DEF. */
3653 static tree
3654 analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
3655 tree niter)
3657 tree match_op[2],inv;
3658 tree type = TREE_TYPE (phidef);
3659 gphi* header_phi = NULL;
3660 enum tree_code code;
3661 /* match thing like op0 (match[0]), op1 (match[1]), phidef (PHIDEF)
3663 op1 = PHI <phidef, inv>
3664 phidef = op0 & op1
3665 if op0 is an invariant, it could change to
3666 phidef = op0 & inv. */
3667 gimple *def;
3668 def = SSA_NAME_DEF_STMT (phidef);
3669 if (!(is_gimple_assign (def)
3670 && ((code = gimple_assign_rhs_code (def)), true)
3671 && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR
3672 || code == BIT_XOR_EXPR)))
3673 return NULL_TREE;
3675 match_op[0] = gimple_assign_rhs1 (def);
3676 match_op[1] = gimple_assign_rhs2 (def);
3678 if (TREE_CODE (match_op[1]) != SSA_NAME
3679 || !expr_invariant_in_loop_p (loop, match_op[0])
3680 || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1])))
3681 || gimple_phi_num_args (header_phi) != 2)
3682 return NULL_TREE;
3684 if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3685 return NULL_TREE;
3687 enum tree_code code1
3688 = gimple_assign_rhs_code (def);
3690 if (code1 == BIT_XOR_EXPR)
3692 if (!tree_fits_uhwi_p (niter))
3693 return NULL_TREE;
3694 unsigned HOST_WIDE_INT niter_num;
3695 niter_num = tree_to_uhwi (niter);
3696 if (niter_num % 2 != 0)
3697 match_op[0] = build_zero_cst (type);
3700 inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3701 return fold_build2 (code1, type, inv, match_op[0]);
3704 /* Do final value replacement for LOOP, return true if we did anything. */
3706 bool
3707 final_value_replacement_loop (class loop *loop)
3709 /* If we do not know exact number of iterations of the loop, we cannot
3710 replace the final value. */
3711 edge exit = single_exit (loop);
3712 if (!exit)
3713 return false;
3715 tree niter = number_of_latch_executions (loop);
3716 if (niter == chrec_dont_know)
3717 return false;
3719 /* Ensure that it is possible to insert new statements somewhere. */
3720 if (!single_pred_p (exit->dest))
3721 split_loop_exit_edge (exit);
3723 /* Set stmt insertion pointer. All stmts are inserted before this point. */
3724 gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
3726 class loop *ex_loop
3727 = superloop_at_depth (loop,
3728 loop_depth (exit->dest->loop_father) + 1);
3730 bool any = false;
3731 gphi_iterator psi;
3732 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
3734 gphi *phi = psi.phi ();
3735 tree rslt = PHI_RESULT (phi);
3736 tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3737 tree def = phidef;
3738 if (virtual_operand_p (def))
3740 gsi_next (&psi);
3741 continue;
3744 if (!POINTER_TYPE_P (TREE_TYPE (def))
3745 && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
3747 gsi_next (&psi);
3748 continue;
3751 bool folded_casts;
3752 def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
3753 &folded_casts);
3755 tree bitinv_def, bit_def;
3756 unsigned HOST_WIDE_INT niter_num;
3758 if (def != chrec_dont_know)
3759 def = compute_overall_effect_of_inner_loop (ex_loop, def);
3761 /* Handle bitop with invariant induction expression.
3763 .i.e
3764 for (int i =0 ;i < 32; i++)
3765 tmp &= bit2;
3766 if bit2 is an invariant in loop which could simple to
3767 tmp &= bit2. */
3768 else if ((bitinv_def
3769 = analyze_and_compute_bitop_with_inv_effect (loop,
3770 phidef, niter)))
3771 def = bitinv_def;
3773 /* Handle bitwise induction expression.
3775 .i.e.
3776 for (int i = 0; i != 64; i+=3)
3777 res &= ~(1UL << i);
3779 RES can't be analyzed out by SCEV because it is not polynomially
3780 expressible, but in fact final value of RES can be replaced by
3781 RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
3782 being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */
3783 else if (tree_fits_uhwi_p (niter)
3784 && (niter_num = tree_to_uhwi (niter)) != 0
3785 && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
3786 && (bit_def
3787 = analyze_and_compute_bitwise_induction_effect (loop,
3788 phidef,
3789 niter_num)))
3790 def = bit_def;
3792 if (!tree_does_not_contain_chrecs (def)
3793 || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
3794 /* Moving the computation from the loop may prolong life range
3795 of some ssa names, which may cause problems if they appear
3796 on abnormal edges. */
3797 || contains_abnormal_ssa_name_p (def)
3798 /* Do not emit expensive expressions. The rationale is that
3799 when someone writes a code like
3801 while (n > 45) n -= 45;
3803 he probably knows that n is not large, and does not want it
3804 to be turned into n %= 45. */
3805 || expression_expensive_p (def))
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3809 fprintf (dump_file, "not replacing:\n ");
3810 print_gimple_stmt (dump_file, phi, 0);
3811 fprintf (dump_file, "\n");
3813 gsi_next (&psi);
3814 continue;
3817 /* Eliminate the PHI node and replace it by a computation outside
3818 the loop. */
3819 if (dump_file)
3821 fprintf (dump_file, "\nfinal value replacement:\n ");
3822 print_gimple_stmt (dump_file, phi, 0);
3823 fprintf (dump_file, " with expr: ");
3824 print_generic_expr (dump_file, def);
3826 any = true;
3827 def = unshare_expr (def);
3828 remove_phi_node (&psi, false);
3830 /* If def's type has undefined overflow and there were folded
3831 casts, rewrite all stmts added for def into arithmetics
3832 with defined overflow behavior. */
3833 if (folded_casts && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
3834 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
3836 gimple_seq stmts;
3837 gimple_stmt_iterator gsi2;
3838 def = force_gimple_operand (def, &stmts, true, NULL_TREE);
3839 gsi2 = gsi_start (stmts);
3840 while (!gsi_end_p (gsi2))
3842 gimple *stmt = gsi_stmt (gsi2);
3843 gimple_stmt_iterator gsi3 = gsi2;
3844 gsi_next (&gsi2);
3845 gsi_remove (&gsi3, false);
3846 if (is_gimple_assign (stmt)
3847 && arith_code_with_undefined_signed_overflow
3848 (gimple_assign_rhs_code (stmt)))
3849 gsi_insert_seq_before (&gsi,
3850 rewrite_to_defined_overflow (stmt),
3851 GSI_SAME_STMT);
3852 else
3853 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3856 else
3857 def = force_gimple_operand_gsi (&gsi, def, false, NULL_TREE,
3858 true, GSI_SAME_STMT);
3860 gassign *ass = gimple_build_assign (rslt, def);
3861 gimple_set_location (ass,
3862 gimple_phi_arg_location (phi, exit->dest_idx));
3863 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
3864 if (dump_file)
3866 fprintf (dump_file, "\n final stmt:\n ");
3867 print_gimple_stmt (dump_file, ass, 0);
3868 fprintf (dump_file, "\n");
3872 return any;
3875 #include "gt-tree-scalar-evolution.h"