2016-01-21 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob946edd304d11c1adef1e80834174bd2d37e893ba
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
126 return niter;
129 /* Representation of the induction variable. */
130 struct iv
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
155 /* Types of uses. */
156 enum use_type
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
163 /* Cost of a computation. */
164 struct comp_cost
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
176 /* The candidate - cost pair. */
177 struct cost_pair
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
190 /* Use. */
191 struct iv_use
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand *selected;
207 /* The selected candidate. */
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
216 enum iv_position
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
225 /* The induction variable candidate. */
226 struct iv_cand
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
253 tree base;
254 tree step;
255 /* IV uses from which this common candidate is derived. */
256 auto_vec<iv_use *> uses;
257 hashval_t hash;
260 /* Hashtable helpers. */
262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
264 static inline hashval_t hash (const iv_common_cand *);
265 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
268 /* Hash function for possible common candidates. */
270 inline hashval_t
271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
273 return ccand->hash;
276 /* Hash table equality function for common candidates. */
278 inline bool
279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
280 const iv_common_cand *ccand2)
282 return (ccand1->hash == ccand2->hash
283 && operand_equal_p (ccand1->base, ccand2->base, 0)
284 && operand_equal_p (ccand1->step, ccand2->step, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
286 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
292 tree expr;
293 int id;
294 hashval_t hash;
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
301 static inline hashval_t hash (const iv_inv_expr_ent *);
302 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
305 /* Hash function for loop invariant expressions. */
307 inline hashval_t
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
310 return expr->hash;
313 /* Hash table equality function for expressions. */
315 inline bool
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
317 const iv_inv_expr_ent *expr2)
319 return expr1->hash == expr2->hash
320 && operand_equal_p (expr1->expr, expr2->expr, 0);
323 struct ivopts_data
325 /* The currently optimized loop. */
326 struct loop *current_loop;
327 source_location loop_loc;
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map<edge, tree_niter_desc *> *niters;
332 /* Number of registers used in it. */
333 unsigned regs_used;
335 /* The size of version_info array allocated. */
336 unsigned version_info_size;
338 /* The array of information for the ssa names. */
339 struct version_info *version_info;
341 /* The hashtable of loop invariant expressions created
342 by ivopt. */
343 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
345 /* Loop invariant expression id. */
346 int inv_expr_id;
348 /* The bitmap of indices in version_info whose value was changed. */
349 bitmap relevant;
351 /* The uses of induction variables. */
352 vec<iv_use *> iv_uses;
354 /* The candidates. */
355 vec<iv_cand *> iv_candidates;
357 /* A bitmap of important candidates. */
358 bitmap important_candidates;
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map<tree, name_expansion *> *name_expansion_cache;
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
366 /* The common candidates. */
367 vec<iv_common_cand *> iv_common_cands;
369 /* The maximum invariant id. */
370 unsigned max_inv_id;
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr;
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack;
378 /* Whether to consider just related and important candidates when replacing a
379 use. */
380 bool consider_all_candidates;
382 /* Are we optimizing for speed? */
383 bool speed;
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call;
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p;
392 /* An assignment of iv candidates to uses. */
394 struct iv_ca
396 /* The number of uses covered by the assignment. */
397 unsigned upto;
399 /* Number of uses that cannot be expressed by the candidates in the set. */
400 unsigned bad_uses;
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair **cand_for_use;
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses;
408 /* The candidates used. */
409 bitmap cands;
411 /* The number of candidates in the set. */
412 unsigned n_cands;
414 /* Total number of registers needed. */
415 unsigned n_regs;
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost;
420 /* Total cost of candidates. */
421 unsigned cand_cost;
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses;
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr;
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr;
433 /* Total cost of the assignment. */
434 comp_cost cost;
437 /* Difference of two iv candidate assignments. */
439 struct iv_ca_delta
441 /* Changed use. */
442 struct iv_use *use;
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair *old_cp;
447 /* A new assignment. */
448 struct cost_pair *new_cp;
450 /* Next change in the list. */
451 struct iv_ca_delta *next_change;
454 /* Bound on number of candidates below that all candidates are considered. */
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471 /* The list of trees for that the decl_rtl field must be reset is stored
472 here. */
474 static vec<tree> decl_rtl_to_reset;
476 static comp_cost force_expr_to_var_cost (tree, bool);
478 /* Number of uses recorded in DATA. */
480 static inline unsigned
481 n_iv_uses (struct ivopts_data *data)
483 return data->iv_uses.length ();
486 /* Ith use recorded in DATA. */
488 static inline struct iv_use *
489 iv_use (struct ivopts_data *data, unsigned i)
491 return data->iv_uses[i];
494 /* Number of candidates recorded in DATA. */
496 static inline unsigned
497 n_iv_cands (struct ivopts_data *data)
499 return data->iv_candidates.length ();
502 /* Ith candidate recorded in DATA. */
504 static inline struct iv_cand *
505 iv_cand (struct ivopts_data *data, unsigned i)
507 return data->iv_candidates[i];
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
512 edge
513 single_dom_exit (struct loop *loop)
515 edge exit = single_exit (loop);
517 if (!exit)
518 return NULL;
520 if (!just_once_each_iteration_p (loop, exit->src))
521 return NULL;
523 return exit;
526 /* Dumps information about the induction variable IV to FILE. */
528 void
529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
531 if (iv->ssa_name && dump_name)
533 fprintf (file, "ssa name ");
534 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
535 fprintf (file, "\n");
538 fprintf (file, " type ");
539 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
540 fprintf (file, "\n");
542 if (iv->step)
544 fprintf (file, " base ");
545 print_generic_expr (file, iv->base, TDF_SLIM);
546 fprintf (file, "\n");
548 fprintf (file, " step ");
549 print_generic_expr (file, iv->step, TDF_SLIM);
550 fprintf (file, "\n");
552 else
554 fprintf (file, " invariant ");
555 print_generic_expr (file, iv->base, TDF_SLIM);
556 fprintf (file, "\n");
559 if (iv->base_object)
561 fprintf (file, " base object ");
562 print_generic_expr (file, iv->base_object, TDF_SLIM);
563 fprintf (file, "\n");
566 if (iv->biv_p)
567 fprintf (file, " is a biv\n");
569 if (iv->no_overflow)
570 fprintf (file, " iv doesn't overflow wrto loop niter\n");
573 /* Dumps information about the USE to FILE. */
575 void
576 dump_use (FILE *file, struct iv_use *use)
578 fprintf (file, "use %d", use->id);
579 if (use->sub_id)
580 fprintf (file, ".%d", use->sub_id);
582 fprintf (file, "\n");
584 switch (use->type)
586 case USE_NONLINEAR_EXPR:
587 fprintf (file, " generic\n");
588 break;
590 case USE_ADDRESS:
591 fprintf (file, " address\n");
592 break;
594 case USE_COMPARE:
595 fprintf (file, " compare\n");
596 break;
598 default:
599 gcc_unreachable ();
602 fprintf (file, " in statement ");
603 print_gimple_stmt (file, use->stmt, 0, 0);
604 fprintf (file, "\n");
606 fprintf (file, " at position ");
607 if (use->op_p)
608 print_generic_expr (file, *use->op_p, TDF_SLIM);
609 fprintf (file, "\n");
611 dump_iv (file, use->iv, false);
613 if (use->related_cands)
615 fprintf (file, " related candidates ");
616 dump_bitmap (file, use->related_cands);
620 /* Dumps information about the uses to FILE. */
622 void
623 dump_uses (FILE *file, struct ivopts_data *data)
625 unsigned i;
626 struct iv_use *use;
628 for (i = 0; i < n_iv_uses (data); i++)
630 use = iv_use (data, i);
633 dump_use (file, use);
634 use = use->next;
636 while (use);
637 fprintf (file, "\n");
641 /* Dumps information about induction variable candidate CAND to FILE. */
643 void
644 dump_cand (FILE *file, struct iv_cand *cand)
646 struct iv *iv = cand->iv;
648 fprintf (file, "candidate %d%s\n",
649 cand->id, cand->important ? " (important)" : "");
651 if (cand->depends_on)
653 fprintf (file, " depends on ");
654 dump_bitmap (file, cand->depends_on);
657 if (!iv)
659 fprintf (file, " final value replacement\n");
660 return;
663 if (cand->var_before)
665 fprintf (file, " var_before ");
666 print_generic_expr (file, cand->var_before, TDF_SLIM);
667 fprintf (file, "\n");
669 if (cand->var_after)
671 fprintf (file, " var_after ");
672 print_generic_expr (file, cand->var_after, TDF_SLIM);
673 fprintf (file, "\n");
676 switch (cand->pos)
678 case IP_NORMAL:
679 fprintf (file, " incremented before exit test\n");
680 break;
682 case IP_BEFORE_USE:
683 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
684 break;
686 case IP_AFTER_USE:
687 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
688 break;
690 case IP_END:
691 fprintf (file, " incremented at end\n");
692 break;
694 case IP_ORIGINAL:
695 fprintf (file, " original biv\n");
696 break;
699 dump_iv (file, iv, false);
702 /* Returns the info for ssa version VER. */
704 static inline struct version_info *
705 ver_info (struct ivopts_data *data, unsigned ver)
707 return data->version_info + ver;
710 /* Returns the info for ssa name NAME. */
712 static inline struct version_info *
713 name_info (struct ivopts_data *data, tree name)
715 return ver_info (data, SSA_NAME_VERSION (name));
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
719 emitted in LOOP. */
721 static bool
722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
724 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
726 gcc_assert (bb);
728 if (sbb == loop->latch)
729 return true;
731 if (sbb != bb)
732 return false;
734 return stmt == last_stmt (bb);
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
741 static bool
742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
744 basic_block cand_bb = gimple_bb (cand->incremented_at);
745 basic_block stmt_bb = gimple_bb (stmt);
747 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
748 return false;
750 if (stmt_bb != cand_bb)
751 return true;
753 if (true_if_equal
754 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
755 return true;
756 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
762 static bool
763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
765 switch (cand->pos)
767 case IP_END:
768 return false;
770 case IP_NORMAL:
771 return stmt_after_ip_normal_pos (loop, stmt);
773 case IP_ORIGINAL:
774 case IP_AFTER_USE:
775 return stmt_after_inc_pos (cand, stmt, false);
777 case IP_BEFORE_USE:
778 return stmt_after_inc_pos (cand, stmt, true);
780 default:
781 gcc_unreachable ();
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
787 static bool
788 abnormal_ssa_name_p (tree exp)
790 if (!exp)
791 return false;
793 if (TREE_CODE (exp) != SSA_NAME)
794 return false;
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
802 static bool
803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
804 void *data ATTRIBUTE_UNUSED)
806 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
808 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
809 return false;
810 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
811 return false;
814 return !abnormal_ssa_name_p (*index);
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
820 bool
821 contains_abnormal_ssa_name_p (tree expr)
823 enum tree_code code;
824 enum tree_code_class codeclass;
826 if (!expr)
827 return false;
829 code = TREE_CODE (expr);
830 codeclass = TREE_CODE_CLASS (code);
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
844 if (code == COND_EXPR)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
849 switch (codeclass)
851 case tcc_binary:
852 case tcc_comparison:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
854 return true;
856 /* Fallthru. */
857 case tcc_unary:
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
859 return true;
861 break;
863 default:
864 gcc_unreachable ();
867 return false;
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
873 static struct tree_niter_desc *
874 niter_for_exit (struct ivopts_data *data, edge exit)
876 struct tree_niter_desc *desc;
877 tree_niter_desc **slot;
879 if (!data->niters)
881 data->niters = new hash_map<edge, tree_niter_desc *>;
882 slot = NULL;
884 else
885 slot = data->niters->get (exit);
887 if (!slot)
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc = XNEW (struct tree_niter_desc);
893 if (!number_of_iterations_exit (data->current_loop,
894 exit, desc, true)
895 || contains_abnormal_ssa_name_p (desc->niter))
897 XDELETE (desc);
898 desc = NULL;
900 data->niters->put (exit, desc);
902 else
903 desc = *slot;
905 return desc;
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
910 goes wrong. */
912 static struct tree_niter_desc *
913 niter_for_single_dom_exit (struct ivopts_data *data)
915 edge exit = single_dom_exit (data->current_loop);
917 if (!exit)
918 return NULL;
920 return niter_for_exit (data, exit);
923 /* Initializes data structures used by the iv optimization pass, stored
924 in DATA. */
926 static void
927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
929 data->version_info_size = 2 * num_ssa_names;
930 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
931 data->relevant = BITMAP_ALLOC (NULL);
932 data->important_candidates = BITMAP_ALLOC (NULL);
933 data->max_inv_id = 0;
934 data->niters = NULL;
935 data->iv_uses.create (20);
936 data->iv_candidates.create (20);
937 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
938 data->inv_expr_id = 0;
939 data->name_expansion_cache = NULL;
940 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
941 data->iv_common_cands.create (20);
942 decl_rtl_to_reset.create (20);
943 gcc_obstack_init (&data->iv_obstack);
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
949 static tree
950 determine_base_object (tree expr)
952 enum tree_code code = TREE_CODE (expr);
953 tree base, obj;
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr))
959 return determine_base_object (TREE_OPERAND (expr, 0));
961 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
962 return NULL_TREE;
964 switch (code)
966 case INTEGER_CST:
967 return NULL_TREE;
969 case ADDR_EXPR:
970 obj = TREE_OPERAND (expr, 0);
971 base = get_base_address (obj);
973 if (!base)
974 return expr;
976 if (TREE_CODE (base) == MEM_REF)
977 return determine_base_object (TREE_OPERAND (base, 0));
979 return fold_convert (ptr_type_node,
980 build_fold_addr_expr (base));
982 case POINTER_PLUS_EXPR:
983 return determine_base_object (TREE_OPERAND (expr, 0));
985 case PLUS_EXPR:
986 case MINUS_EXPR:
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
988 gcc_unreachable ();
990 default:
991 return fold_convert (ptr_type_node, expr);
995 /* Return true if address expression with non-DECL_P operand appears
996 in EXPR. */
998 static bool
999 contain_complex_addr_expr (tree expr)
1001 bool res = false;
1003 STRIP_NOPS (expr);
1004 switch (TREE_CODE (expr))
1006 case POINTER_PLUS_EXPR:
1007 case PLUS_EXPR:
1008 case MINUS_EXPR:
1009 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011 break;
1013 case ADDR_EXPR:
1014 return (!DECL_P (TREE_OPERAND (expr, 0)));
1016 default:
1017 return false;
1020 return res;
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028 bool no_overflow = false)
1030 tree expr = base;
1031 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032 sizeof (struct iv));
1033 gcc_assert (step != NULL_TREE);
1035 /* Lower address expression in base except ones with DECL_P as operand.
1036 By doing this:
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1040 STRIP_NOPS (expr);
1041 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042 || contain_complex_addr_expr (expr))
1044 aff_tree comb;
1045 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1049 iv->base = base;
1050 iv->base_object = determine_base_object (base);
1051 iv->step = step;
1052 iv->biv_p = false;
1053 iv->have_use_for = false;
1054 iv->use_id = 0;
1055 iv->ssa_name = NULL_TREE;
1056 iv->no_overflow = no_overflow;
1057 iv->have_address_use = false;
1059 return iv;
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067 bool no_overflow)
1069 struct version_info *info = name_info (data, iv);
1071 gcc_assert (!info->iv);
1073 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074 info->iv = alloc_iv (data, base, step, no_overflow);
1075 info->iv->ssa_name = iv;
1078 /* Finds induction variable declaration for VAR. */
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1083 basic_block bb;
1084 tree type = TREE_TYPE (var);
1086 if (!POINTER_TYPE_P (type)
1087 && !INTEGRAL_TYPE_P (type))
1088 return NULL;
1090 if (!name_info (data, var)->iv)
1092 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094 if (!bb
1095 || !flow_bb_inside_loop_p (data->current_loop, bb))
1096 set_iv (data, var, var, build_int_cst (type, 0), true);
1099 return name_info (data, var)->iv;
1102 /* Return the first non-invariant ssa var found in EXPR. */
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1107 int i, n;
1108 tree tmp;
1109 enum tree_code code;
1111 if (!expr || is_gimple_min_invariant (expr))
1112 return NULL;
1114 code = TREE_CODE (expr);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1117 n = TREE_OPERAND_LENGTH (expr);
1118 for (i = 0; i < n; i++)
1120 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1122 if (tmp)
1123 return tmp;
1126 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1129 /* Finds basic ivs. */
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1134 gphi *phi;
1135 affine_iv iv;
1136 tree step, type, base, stop;
1137 bool found = false;
1138 struct loop *loop = data->current_loop;
1139 gphi_iterator psi;
1141 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143 phi = psi.phi ();
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146 continue;
1148 if (virtual_operand_p (PHI_RESULT (phi)))
1149 continue;
1151 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152 continue;
1154 if (integer_zerop (iv.step))
1155 continue;
1157 step = iv.step;
1158 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1163 See PR64705 for the rationale. */
1164 stop = extract_single_var_from_expr (step);
1165 base = expand_simple_operations (base, stop);
1166 if (contains_abnormal_ssa_name_p (base)
1167 || contains_abnormal_ssa_name_p (step))
1168 continue;
1170 type = TREE_TYPE (PHI_RESULT (phi));
1171 base = fold_convert (type, base);
1172 if (step)
1174 if (POINTER_TYPE_P (type))
1175 step = convert_to_ptrofftype (step);
1176 else
1177 step = fold_convert (type, step);
1180 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181 found = true;
1184 return found;
1187 /* Marks basic ivs. */
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1192 gphi *phi;
1193 gimple *def;
1194 tree var;
1195 struct iv *iv, *incr_iv;
1196 struct loop *loop = data->current_loop;
1197 basic_block incr_bb;
1198 gphi_iterator psi;
1200 data->bivs_not_used_in_addr = 0;
1201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1203 phi = psi.phi ();
1205 iv = get_iv (data, PHI_RESULT (phi));
1206 if (!iv)
1207 continue;
1209 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210 def = SSA_NAME_DEF_STMT (var);
1211 /* Don't mark iv peeled from other one as biv. */
1212 if (def
1213 && gimple_code (def) == GIMPLE_PHI
1214 && gimple_bb (def) == loop->header)
1215 continue;
1217 incr_iv = get_iv (data, var);
1218 if (!incr_iv)
1219 continue;
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223 if (incr_bb->loop_father != data->current_loop
1224 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225 continue;
1227 iv->biv_p = true;
1228 incr_iv->biv_p = true;
1229 if (iv->no_overflow)
1230 data->bivs_not_used_in_addr++;
1231 if (incr_iv->no_overflow)
1232 data->bivs_not_used_in_addr++;
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1242 tree lhs, stop;
1243 struct loop *loop = data->current_loop;
1245 iv->base = NULL_TREE;
1246 iv->step = NULL_TREE;
1248 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249 return false;
1251 lhs = gimple_assign_lhs (stmt);
1252 if (TREE_CODE (lhs) != SSA_NAME)
1253 return false;
1255 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256 return false;
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1262 See PR64705 for the rationale. */
1263 stop = extract_single_var_from_expr (iv->step);
1264 iv->base = expand_simple_operations (iv->base, stop);
1265 if (contains_abnormal_ssa_name_p (iv->base)
1266 || contains_abnormal_ssa_name_p (iv->step))
1267 return false;
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt))
1273 return false;
1275 return true;
1278 /* Finds general ivs in statement STMT. */
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1283 affine_iv iv;
1285 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286 return;
1288 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1291 /* Finds general ivs in basic block BB. */
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1296 gimple_stmt_iterator bsi;
1298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299 find_givs_in_stmt (data, gsi_stmt (bsi));
1302 /* Finds general ivs. */
1304 static void
1305 find_givs (struct ivopts_data *data)
1307 struct loop *loop = data->current_loop;
1308 basic_block *body = get_loop_body_in_dom_order (loop);
1309 unsigned i;
1311 for (i = 0; i < loop->num_nodes; i++)
1312 find_givs_in_bb (data, body[i]);
1313 free (body);
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1322 unsigned i;
1323 bitmap_iterator bi;
1325 if (!find_bivs (data))
1326 return false;
1328 find_givs (data);
1329 mark_bivs (data);
1331 if (dump_file && (dump_flags & TDF_DETAILS))
1333 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1335 if (niter)
1337 fprintf (dump_file, " number of iterations ");
1338 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339 if (!integer_zerop (niter->may_be_zero))
1341 fprintf (dump_file, "; zero if ");
1342 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1344 fprintf (dump_file, "\n\n");
1347 fprintf (dump_file, "Induction variables:\n\n");
1349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1351 if (ver_info (data, i)->iv)
1352 dump_iv (dump_file, ver_info (data, i)->iv, true);
1356 return true;
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367 unsigned HOST_WIDE_INT addr_offset = 0)
1369 struct iv_use *use = XCNEW (struct iv_use);
1371 use->id = n_iv_uses (data);
1372 use->sub_id = 0;
1373 use->type = use_type;
1374 use->iv = iv;
1375 use->stmt = stmt;
1376 use->op_p = use_p;
1377 use->related_cands = BITMAP_ALLOC (NULL);
1378 use->next = NULL;
1379 use->addr_base = addr_base;
1380 use->addr_offset = addr_offset;
1382 data->iv_uses.safe_push (use);
1384 return use;
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392 struct iv *iv, gimple *stmt, enum use_type use_type,
1393 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394 unsigned int id_group)
1396 struct iv_use *use = XCNEW (struct iv_use);
1397 struct iv_use *group = iv_use (data, id_group);
1399 use->id = group->id;
1400 use->sub_id = 0;
1401 use->type = use_type;
1402 use->iv = iv;
1403 use->stmt = stmt;
1404 use->op_p = use_p;
1405 use->related_cands = NULL;
1406 use->addr_base = addr_base;
1407 use->addr_offset = addr_offset;
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset <= group->addr_offset)
1412 use->related_cands = group->related_cands;
1413 group->related_cands = NULL;
1414 use->next = group;
1415 data->iv_uses[id_group] = use;
1417 else
1419 struct iv_use *pre;
1422 pre = group;
1423 group = group->next;
1425 while (group && addr_offset > group->addr_offset);
1426 use->next = pre->next;
1427 pre->next = use;
1430 return use;
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1440 basic_block bb;
1441 struct version_info *info;
1443 if (TREE_CODE (op) != SSA_NAME
1444 || virtual_operand_p (op))
1445 return;
1447 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448 if (bb
1449 && flow_bb_inside_loop_p (data->current_loop, bb))
1450 return;
1452 info = name_info (data, op);
1453 info->name = op;
1454 info->has_nonlin_use |= nonlinear_use;
1455 if (!info->inv_id)
1456 info->inv_id = ++data->max_inv_id;
1457 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1460 /* Checks whether the use OP is interesting and if so, records it. */
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1465 struct iv *iv;
1466 gimple *stmt;
1467 struct iv_use *use;
1469 if (TREE_CODE (op) != SSA_NAME)
1470 return NULL;
1472 iv = get_iv (data, op);
1473 if (!iv)
1474 return NULL;
1476 if (iv->have_use_for)
1478 use = iv_use (data, iv->use_id);
1480 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481 return use;
1484 if (integer_zerop (iv->step))
1486 record_invariant (data, op, true);
1487 return NULL;
1489 iv->have_use_for = true;
1491 stmt = SSA_NAME_DEF_STMT (op);
1492 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493 || is_gimple_assign (stmt));
1495 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496 iv->use_id = use->id;
1498 return use;
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511 tree **control_var, tree **bound,
1512 struct iv **iv_var, struct iv **iv_bound)
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv;
1516 static tree zero;
1517 tree *op0 = &zero, *op1 = &zero;
1518 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519 bool ret = false;
1521 if (gimple_code (stmt) == GIMPLE_COND)
1523 gcond *cond_stmt = as_a <gcond *> (stmt);
1524 op0 = gimple_cond_lhs_ptr (cond_stmt);
1525 op1 = gimple_cond_rhs_ptr (cond_stmt);
1527 else
1529 op0 = gimple_assign_rhs1_ptr (stmt);
1530 op1 = gimple_assign_rhs2_ptr (stmt);
1533 zero = integer_zero_node;
1534 const_iv.step = integer_zero_node;
1536 if (TREE_CODE (*op0) == SSA_NAME)
1537 iv0 = get_iv (data, *op0);
1538 if (TREE_CODE (*op1) == SSA_NAME)
1539 iv1 = get_iv (data, *op1);
1541 /* Exactly one of the compared values must be an iv, and the other one must
1542 be an invariant. */
1543 if (!iv0 || !iv1)
1544 goto end;
1546 if (integer_zerop (iv0->step))
1548 /* Control variable may be on the other side. */
1549 std::swap (op0, op1);
1550 std::swap (iv0, iv1);
1552 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1554 end:
1555 if (control_var)
1556 *control_var = op0;
1557 if (iv_var)
1558 *iv_var = iv0;
1559 if (bound)
1560 *bound = op1;
1561 if (iv_bound)
1562 *iv_bound = iv1;
1564 return ret;
1567 /* Checks whether the condition in STMT is interesting and if so,
1568 records it. */
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1573 tree *var_p, *bound_p;
1574 struct iv *var_iv;
1576 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1578 find_interesting_uses_op (data, *var_p);
1579 find_interesting_uses_op (data, *bound_p);
1580 return;
1583 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1594 basic_block def_bb;
1595 unsigned i, len;
1597 if (is_gimple_min_invariant (expr))
1598 return current_loops->tree_root;
1600 if (TREE_CODE (expr) == SSA_NAME)
1602 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603 if (def_bb)
1605 if (flow_bb_inside_loop_p (loop, def_bb))
1606 return NULL;
1607 return superloop_at_depth (loop,
1608 loop_depth (def_bb->loop_father) + 1);
1611 return current_loops->tree_root;
1614 if (!EXPR_P (expr))
1615 return NULL;
1617 unsigned maxdepth = 0;
1618 len = TREE_OPERAND_LENGTH (expr);
1619 for (i = 0; i < len; i++)
1621 struct loop *ivloop;
1622 if (!TREE_OPERAND (expr, i))
1623 continue;
1625 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626 if (!ivloop)
1627 return NULL;
1628 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1631 return superloop_at_depth (loop, maxdepth);
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1641 basic_block def_bb;
1642 unsigned i, len;
1644 gcc_assert (loop_depth (loop) > 0);
1646 if (is_gimple_min_invariant (expr))
1647 return true;
1649 if (TREE_CODE (expr) == SSA_NAME)
1651 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652 if (def_bb
1653 && flow_bb_inside_loop_p (loop, def_bb))
1654 return false;
1656 return true;
1659 if (!EXPR_P (expr))
1660 return false;
1662 len = TREE_OPERAND_LENGTH (expr);
1663 for (i = 0; i < len; i++)
1664 if (TREE_OPERAND (expr, i)
1665 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666 return false;
1668 return true;
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1678 struct iv *iv;
1679 unsigned i, n;
1680 tree e2, e1;
1681 enum tree_code code;
1682 gimple *stmt;
1684 if (expr == NULL_TREE)
1685 return NULL;
1687 if (is_gimple_min_invariant (expr))
1688 return NULL;
1690 code = TREE_CODE (expr);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1693 n = TREE_OPERAND_LENGTH (expr);
1694 for (i = 0; i < n; i++)
1696 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697 if (iv)
1698 return iv;
1702 /* Stop if it's not ssa name. */
1703 if (code != SSA_NAME)
1704 return NULL;
1706 iv = get_iv (data, expr);
1707 if (!iv || integer_zerop (iv->step))
1708 return NULL;
1709 else if (iv->biv_p)
1710 return iv;
1712 stmt = SSA_NAME_DEF_STMT (expr);
1713 if (gphi *phi = dyn_cast <gphi *> (stmt))
1715 ssa_op_iter iter;
1716 use_operand_p use_p;
1718 if (virtual_operand_p (gimple_phi_result (phi)))
1719 return NULL;
1721 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1723 tree use = USE_FROM_PTR (use_p);
1724 iv = find_deriving_biv_for_expr (data, use);
1725 if (iv)
1726 return iv;
1728 return NULL;
1730 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731 return NULL;
1733 e1 = gimple_assign_rhs1 (stmt);
1734 code = gimple_assign_rhs_code (stmt);
1735 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736 return find_deriving_biv_for_expr (data, e1);
1738 switch (code)
1740 case MULT_EXPR:
1741 case PLUS_EXPR:
1742 case MINUS_EXPR:
1743 case POINTER_PLUS_EXPR:
1744 /* Increments, decrements and multiplications by a constant
1745 are simple. */
1746 e2 = gimple_assign_rhs2 (stmt);
1747 iv = find_deriving_biv_for_expr (data, e2);
1748 if (iv)
1749 return iv;
1751 /* Fallthru. */
1752 CASE_CONVERT:
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data, e1);
1756 default:
1757 break;
1760 return NULL;
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1769 unsigned i;
1770 tree type, base_1, base_2;
1771 bitmap_iterator bi;
1773 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774 || biv->have_address_use || !biv->no_overflow)
1775 return;
1777 type = TREE_TYPE (biv->base);
1778 if (!INTEGRAL_TYPE_P (type))
1779 return;
1781 biv->have_address_use = true;
1782 data->bivs_not_used_in_addr--;
1783 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1786 struct iv *iv = ver_info (data, i)->iv;
1788 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789 || iv->have_address_use || !iv->no_overflow)
1790 continue;
1792 if (type != TREE_TYPE (iv->base)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794 continue;
1796 if (!operand_equal_p (biv->step, iv->step, 0))
1797 continue;
1799 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800 if (operand_equal_p (base_1, iv->base, 0)
1801 || operand_equal_p (base_2, biv->base, 0))
1803 iv->have_address_use = true;
1804 data->bivs_not_used_in_addr--;
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1813 struct ifs_ivopts_data
1815 struct ivopts_data *ivopts_data;
1816 gimple *stmt;
1817 tree step;
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1823 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824 struct iv *iv;
1825 bool use_overflow_semantics = false;
1826 tree step, iv_base, iv_step, lbound, off;
1827 struct loop *loop = dta->ivopts_data->current_loop;
1829 /* If base is a component ref, require that the offset of the reference
1830 be invariant. */
1831 if (TREE_CODE (base) == COMPONENT_REF)
1833 off = component_ref_field_offset (base);
1834 return expr_invariant_in_loop_p (loop, off);
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846 return false;
1848 step = array_ref_element_size (base);
1849 lbound = array_ref_low_bound (base);
1851 if (!expr_invariant_in_loop_p (loop, step)
1852 || !expr_invariant_in_loop_p (loop, lbound))
1853 return false;
1856 if (TREE_CODE (*idx) != SSA_NAME)
1857 return true;
1859 iv = get_iv (dta->ivopts_data, *idx);
1860 if (!iv)
1861 return false;
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1866 *idx = iv->base;
1868 if (integer_zerop (iv->step))
1869 return true;
1871 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1873 step = array_ref_element_size (base);
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step) != INTEGER_CST)
1877 return false;
1879 else
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step = size_one_node;
1883 iv_base = iv->base;
1884 iv_step = iv->step;
1885 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886 use_overflow_semantics = true;
1888 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889 sizetype, &iv_base, &iv_step, dta->stmt,
1890 use_overflow_semantics))
1892 /* The index might wrap. */
1893 return false;
1896 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1899 if (dta->ivopts_data->bivs_not_used_in_addr)
1901 if (!iv->biv_p)
1902 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1904 record_biv_for_address_use (dta->ivopts_data, iv);
1906 return true;
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914 void *vdata)
1916 struct ivopts_data *data = (struct ivopts_data *) vdata;
1917 find_interesting_uses_op (data, *idx);
1918 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1920 find_interesting_uses_op (data, array_ref_element_size (base));
1921 find_interesting_uses_op (data, array_ref_low_bound (base));
1923 return true;
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1934 tree mby;
1935 enum tree_code code;
1936 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937 widest_int res, p0, p1;
1939 STRIP_NOPS (top);
1940 STRIP_NOPS (bot);
1942 if (operand_equal_p (top, bot, 0))
1944 *mul = 1;
1945 return true;
1948 code = TREE_CODE (top);
1949 switch (code)
1951 case MULT_EXPR:
1952 mby = TREE_OPERAND (top, 1);
1953 if (TREE_CODE (mby) != INTEGER_CST)
1954 return false;
1956 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957 return false;
1959 *mul = wi::sext (res * wi::to_widest (mby), precision);
1960 return true;
1962 case PLUS_EXPR:
1963 case MINUS_EXPR:
1964 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966 return false;
1968 if (code == MINUS_EXPR)
1969 p1 = -p1;
1970 *mul = wi::sext (p0 + p1, precision);
1971 return true;
1973 case INTEGER_CST:
1974 if (TREE_CODE (bot) != INTEGER_CST)
1975 return false;
1977 p0 = widest_int::from (top, SIGNED);
1978 p1 = widest_int::from (bot, SIGNED);
1979 if (p1 == 0)
1980 return false;
1981 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982 return res == 0;
1984 default:
1985 return false;
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref) == TARGET_MEM_REF)
1997 return false;
1999 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2003 unsigned HOST_WIDE_INT bitpos;
2004 unsigned int ref_align;
2005 get_object_alignment_1 (ref, &ref_align, &bitpos);
2006 if (ref_align < align
2007 || (bitpos % align) != 0
2008 || (bitpos % BITS_PER_UNIT) != 0)
2009 return true;
2011 unsigned int trailing_zeros = tree_ctz (step);
2012 if (trailing_zeros < HOST_BITS_PER_INT
2013 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014 return true;
2016 return false;
2019 /* Return true if EXPR may be non-addressable. */
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2024 switch (TREE_CODE (expr))
2026 case TARGET_MEM_REF:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2029 return false;
2031 case MEM_REF:
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr);
2035 case BIT_FIELD_REF:
2036 if (REF_REVERSE_STORAGE_ORDER (expr))
2037 return true;
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2040 case COMPONENT_REF:
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042 return true;
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049 return true;
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2052 case VIEW_CONVERT_EXPR:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059 return true;
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2062 CASE_CONVERT:
2063 return true;
2065 default:
2066 break;
2069 return false;
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082 struct iv *iv, gimple *stmt, enum use_type use_type)
2084 unsigned int i;
2085 struct iv_use *use;
2086 tree addr_base;
2087 unsigned HOST_WIDE_INT addr_offset;
2089 /* Only support sub use for address type uses, that is, with base
2090 object. */
2091 if (!iv->base_object)
2092 return record_use (data, use_p, iv, stmt, use_type);
2094 addr_base = strip_offset (iv->base, &addr_offset);
2095 for (i = 0; i < n_iv_uses (data); i++)
2097 use = iv_use (data, i);
2098 if (use->type != USE_ADDRESS || !use->iv->base_object)
2099 continue;
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103 && operand_equal_p (iv->step, use->iv->step, 0)
2104 && operand_equal_p (addr_base, use->addr_base, 0))
2105 break;
2108 if (i == n_iv_uses (data))
2109 return record_use (data, use_p, iv, stmt,
2110 use_type, addr_base, addr_offset);
2111 else
2112 return record_sub_use (data, use_p, iv, stmt,
2113 use_type, addr_base, addr_offset, i);
2116 /* Finds addresses in *OP_P inside STMT. */
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120 tree *op_p)
2122 tree base = *op_p, step = size_zero_node;
2123 struct iv *civ;
2124 struct ifs_ivopts_data ifs_ivopts_data;
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt))
2129 goto fail;
2131 /* Ignore bitfields for now. Not really something terribly complicated
2132 to handle. TODO. */
2133 if (TREE_CODE (base) == BIT_FIELD_REF)
2134 goto fail;
2136 base = unshare_expr (base);
2138 if (TREE_CODE (base) == TARGET_MEM_REF)
2140 tree type = build_pointer_type (TREE_TYPE (base));
2141 tree astep;
2143 if (TMR_BASE (base)
2144 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2146 civ = get_iv (data, TMR_BASE (base));
2147 if (!civ)
2148 goto fail;
2150 TMR_BASE (base) = civ->base;
2151 step = civ->step;
2153 if (TMR_INDEX2 (base)
2154 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2156 civ = get_iv (data, TMR_INDEX2 (base));
2157 if (!civ)
2158 goto fail;
2160 TMR_INDEX2 (base) = civ->base;
2161 step = civ->step;
2163 if (TMR_INDEX (base)
2164 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2166 civ = get_iv (data, TMR_INDEX (base));
2167 if (!civ)
2168 goto fail;
2170 TMR_INDEX (base) = civ->base;
2171 astep = civ->step;
2173 if (astep)
2175 if (TMR_STEP (base))
2176 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2178 step = fold_build2 (PLUS_EXPR, type, step, astep);
2182 if (integer_zerop (step))
2183 goto fail;
2184 base = tree_mem_ref_addr (type, base);
2186 else
2188 ifs_ivopts_data.ivopts_data = data;
2189 ifs_ivopts_data.stmt = stmt;
2190 ifs_ivopts_data.step = size_zero_node;
2191 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192 || integer_zerop (ifs_ivopts_data.step))
2193 goto fail;
2194 step = ifs_ivopts_data.step;
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base))
2199 goto fail;
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204 goto fail;
2206 base = build_fold_addr_expr (base);
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base) == ADDR_EXPR)
2212 tree *ref = &TREE_OPERAND (base, 0);
2213 while (handled_component_p (*ref))
2214 ref = &TREE_OPERAND (*ref, 0);
2215 if (TREE_CODE (*ref) == MEM_REF)
2217 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218 TREE_OPERAND (*ref, 0),
2219 TREE_OPERAND (*ref, 1));
2220 if (tem)
2221 *ref = tem;
2226 civ = alloc_iv (data, base, step);
2227 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228 return;
2230 fail:
2231 for_each_index (op_p, idx_record_use, data);
2234 /* Finds and records invariants used in STMT. */
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2239 ssa_op_iter iter;
2240 use_operand_p use_p;
2241 tree op;
2243 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2245 op = USE_FROM_PTR (use_p);
2246 record_invariant (data, op, false);
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2255 struct iv *iv;
2256 tree op, *lhs, *rhs;
2257 ssa_op_iter iter;
2258 use_operand_p use_p;
2259 enum tree_code code;
2261 find_invariants_stmt (data, stmt);
2263 if (gimple_code (stmt) == GIMPLE_COND)
2265 find_interesting_uses_cond (data, stmt);
2266 return;
2269 if (is_gimple_assign (stmt))
2271 lhs = gimple_assign_lhs_ptr (stmt);
2272 rhs = gimple_assign_rhs1_ptr (stmt);
2274 if (TREE_CODE (*lhs) == SSA_NAME)
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2279 iv = get_iv (data, *lhs);
2281 if (iv && !integer_zerop (iv->step))
2282 return;
2285 code = gimple_assign_rhs_code (stmt);
2286 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs)
2288 || is_gimple_val (*rhs)))
2290 if (REFERENCE_CLASS_P (*rhs))
2291 find_interesting_uses_address (data, stmt, rhs);
2292 else
2293 find_interesting_uses_op (data, *rhs);
2295 if (REFERENCE_CLASS_P (*lhs))
2296 find_interesting_uses_address (data, stmt, lhs);
2297 return;
2299 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2301 find_interesting_uses_cond (data, stmt);
2302 return;
2305 /* TODO -- we should also handle address uses of type
2307 memory = call (whatever);
2311 call (memory). */
2314 if (gimple_code (stmt) == GIMPLE_PHI
2315 && gimple_bb (stmt) == data->current_loop->header)
2317 iv = get_iv (data, PHI_RESULT (stmt));
2319 if (iv && !integer_zerop (iv->step))
2320 return;
2323 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2325 op = USE_FROM_PTR (use_p);
2327 if (TREE_CODE (op) != SSA_NAME)
2328 continue;
2330 iv = get_iv (data, op);
2331 if (!iv)
2332 continue;
2334 find_interesting_uses_op (data, op);
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2344 gphi *phi;
2345 gphi_iterator psi;
2346 tree def;
2348 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2350 phi = psi.phi ();
2351 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352 if (!virtual_operand_p (def))
2353 find_interesting_uses_op (data, def);
2357 /* Finds uses of the induction variables that are interesting. */
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2362 basic_block bb;
2363 gimple_stmt_iterator bsi;
2364 basic_block *body = get_loop_body (data->current_loop);
2365 unsigned i;
2366 struct version_info *info;
2367 edge e;
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 fprintf (dump_file, "Uses:\n\n");
2372 for (i = 0; i < data->current_loop->num_nodes; i++)
2374 edge_iterator ei;
2375 bb = body[i];
2377 FOR_EACH_EDGE (e, ei, bb->succs)
2378 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380 find_interesting_uses_outside (data, e);
2382 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385 if (!is_gimple_debug (gsi_stmt (bsi)))
2386 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2391 bitmap_iterator bi;
2393 fprintf (dump_file, "\n");
2395 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2397 info = ver_info (data, i);
2398 if (info->inv_id)
2400 fprintf (dump_file, " ");
2401 print_generic_expr (dump_file, info->name, TDF_SLIM);
2402 fprintf (dump_file, " is invariant (%d)%s\n",
2403 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2407 fprintf (dump_file, "\n");
2410 free (body);
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2419 int width;
2420 rtx reg, addr;
2421 HOST_WIDE_INT i, off;
2422 unsigned list_index, num;
2423 addr_space_t as;
2424 machine_mode mem_mode, addr_mode;
2425 static vec<HOST_WIDE_INT> max_offset_list;
2427 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430 num = max_offset_list.length ();
2431 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432 if (list_index >= num)
2434 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435 for (; num < max_offset_list.length (); num++)
2436 max_offset_list[num] = -1;
2439 off = max_offset_list[list_index];
2440 if (off != -1)
2441 return off;
2443 addr_mode = targetm.addr_space.address_mode (as);
2444 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2447 width = GET_MODE_BITSIZE (addr_mode) - 1;
2448 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449 width = HOST_BITS_PER_WIDE_INT - 1;
2451 for (i = width; i > 0; i--)
2453 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455 if (memory_address_addr_space_p (mem_mode, addr, as))
2456 break;
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off = ((unsigned HOST_WIDE_INT) 1 << i);
2461 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2463 off -= GET_MODE_SIZE (mem_mode);
2464 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465 if (memory_address_addr_space_p (mem_mode, addr, as))
2466 break;
2469 if (i == 0)
2470 off = 0;
2472 max_offset_list[list_index] = off;
2473 return off;
2476 /* Check if all small groups should be split. Return true if and
2477 only if:
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2482 Return false otherwise. We want to split such groups because:
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2489 TODO -- Above claim may not hold when auto increment is supported. */
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2494 bool split_p = false;
2495 unsigned int i, n, distinct;
2496 struct iv_use *pre, *use;
2498 n = n_iv_uses (data);
2499 for (i = 0; i < n; i++)
2501 use = iv_use (data, i);
2502 if (!use->next)
2503 continue;
2505 distinct = 1;
2506 gcc_assert (use->type == USE_ADDRESS);
2507 for (pre = use, use = use->next; use; pre = use, use = use->next)
2509 if (pre->addr_offset != use->addr_offset)
2510 distinct++;
2512 if (distinct > 2)
2513 return false;
2515 if (distinct == 2)
2516 split_p = true;
2519 return split_p;
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2529 HOST_WIDE_INT max_offset = -1;
2530 unsigned int i, n, sub_id;
2531 struct iv_use *pre, *use;
2532 unsigned HOST_WIDE_INT addr_offset_first;
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data))
2536 max_offset = 0;
2538 n = n_iv_uses (data);
2539 for (i = 0; i < n; i++)
2541 use = iv_use (data, i);
2542 if (!use->next)
2543 continue;
2545 gcc_assert (use->type == USE_ADDRESS);
2546 if (max_offset != 0)
2547 max_offset = compute_max_addr_offset (use);
2549 while (use)
2551 sub_id = 0;
2552 addr_offset_first = use->addr_offset;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre = use, use = use->next;
2556 use && (use->addr_offset - addr_offset_first
2557 <= (unsigned HOST_WIDE_INT) max_offset);
2558 pre = use, use = use->next)
2560 use->id = pre->id;
2561 use->sub_id = ++sub_id;
2564 /* Break the list and create new group. */
2565 if (use)
2567 pre->next = NULL;
2568 use->id = n_iv_uses (data);
2569 use->related_cands = BITMAP_ALLOC (NULL);
2570 data->iv_uses.safe_push (use);
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 dump_uses (dump_file, data);
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585 HOST_WIDE_INT *offset)
2587 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588 enum tree_code code;
2589 tree type, orig_type = TREE_TYPE (expr);
2590 HOST_WIDE_INT off0, off1, st;
2591 tree orig_expr = expr;
2593 STRIP_NOPS (expr);
2595 type = TREE_TYPE (expr);
2596 code = TREE_CODE (expr);
2597 *offset = 0;
2599 switch (code)
2601 case INTEGER_CST:
2602 if (!cst_and_fits_in_hwi (expr)
2603 || integer_zerop (expr))
2604 return orig_expr;
2606 *offset = int_cst_value (expr);
2607 return build_int_cst (orig_type, 0);
2609 case POINTER_PLUS_EXPR:
2610 case PLUS_EXPR:
2611 case MINUS_EXPR:
2612 op0 = TREE_OPERAND (expr, 0);
2613 op1 = TREE_OPERAND (expr, 1);
2615 op0 = strip_offset_1 (op0, false, false, &off0);
2616 op1 = strip_offset_1 (op1, false, false, &off1);
2618 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619 if (op0 == TREE_OPERAND (expr, 0)
2620 && op1 == TREE_OPERAND (expr, 1))
2621 return orig_expr;
2623 if (integer_zerop (op1))
2624 expr = op0;
2625 else if (integer_zerop (op0))
2627 if (code == MINUS_EXPR)
2628 expr = fold_build1 (NEGATE_EXPR, type, op1);
2629 else
2630 expr = op1;
2632 else
2633 expr = fold_build2 (code, type, op0, op1);
2635 return fold_convert (orig_type, expr);
2637 case MULT_EXPR:
2638 op1 = TREE_OPERAND (expr, 1);
2639 if (!cst_and_fits_in_hwi (op1))
2640 return orig_expr;
2642 op0 = TREE_OPERAND (expr, 0);
2643 op0 = strip_offset_1 (op0, false, false, &off0);
2644 if (op0 == TREE_OPERAND (expr, 0))
2645 return orig_expr;
2647 *offset = off0 * int_cst_value (op1);
2648 if (integer_zerop (op0))
2649 expr = op0;
2650 else
2651 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2653 return fold_convert (orig_type, expr);
2655 case ARRAY_REF:
2656 case ARRAY_RANGE_REF:
2657 if (!inside_addr)
2658 return orig_expr;
2660 step = array_ref_element_size (expr);
2661 if (!cst_and_fits_in_hwi (step))
2662 break;
2664 st = int_cst_value (step);
2665 op1 = TREE_OPERAND (expr, 1);
2666 op1 = strip_offset_1 (op1, false, false, &off1);
2667 *offset = off1 * st;
2669 if (top_compref
2670 && integer_zerop (op1))
2672 /* Strip the component reference completely. */
2673 op0 = TREE_OPERAND (expr, 0);
2674 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675 *offset += off0;
2676 return op0;
2678 break;
2680 case COMPONENT_REF:
2682 tree field;
2684 if (!inside_addr)
2685 return orig_expr;
2687 tmp = component_ref_field_offset (expr);
2688 field = TREE_OPERAND (expr, 1);
2689 if (top_compref
2690 && cst_and_fits_in_hwi (tmp)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2693 HOST_WIDE_INT boffset, abs_off;
2695 /* Strip the component reference completely. */
2696 op0 = TREE_OPERAND (expr, 0);
2697 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700 if (boffset < 0)
2701 abs_off = -abs_off;
2703 *offset = off0 + int_cst_value (tmp) + abs_off;
2704 return op0;
2707 break;
2709 case ADDR_EXPR:
2710 op0 = TREE_OPERAND (expr, 0);
2711 op0 = strip_offset_1 (op0, true, true, &off0);
2712 *offset += off0;
2714 if (op0 == TREE_OPERAND (expr, 0))
2715 return orig_expr;
2717 expr = build_fold_addr_expr (op0);
2718 return fold_convert (orig_type, expr);
2720 case MEM_REF:
2721 /* ??? Offset operand? */
2722 inside_addr = false;
2723 break;
2725 default:
2726 return orig_expr;
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0 = TREE_OPERAND (expr, 0);
2732 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733 *offset += off0;
2735 if (op0 == TREE_OPERAND (expr, 0)
2736 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737 return orig_expr;
2739 expr = copy_node (expr);
2740 TREE_OPERAND (expr, 0) = op0;
2741 if (op1)
2742 TREE_OPERAND (expr, 1) = op1;
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2746 will fix that. */
2747 expr = fold_convert (orig_type, expr);
2749 return expr;
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2757 HOST_WIDE_INT off;
2758 tree core = strip_offset_1 (expr, false, false, &off);
2759 *offset = off;
2760 return core;
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2765 with overflows. */
2767 static tree
2768 generic_type_for (tree type)
2770 if (POINTER_TYPE_P (type))
2771 return unsigned_type_for (type);
2773 if (TYPE_UNSIGNED (type))
2774 return type;
2776 return unsigned_type_for (type);
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2786 bitmap *depends_on = (bitmap *) data;
2787 struct version_info *info;
2789 if (TREE_CODE (*expr_p) != SSA_NAME)
2790 return NULL_TREE;
2791 info = name_info (fd_ivopts_data, *expr_p);
2793 if (!info->inv_id || info->has_nonlin_use)
2794 return NULL_TREE;
2796 if (!*depends_on)
2797 *depends_on = BITMAP_ALLOC (NULL);
2798 bitmap_set_bit (*depends_on, info->inv_id);
2800 return NULL_TREE;
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810 tree base, tree step, bool important, enum iv_position pos,
2811 struct iv_use *use, gimple *incremented_at,
2812 struct iv *orig_iv = NULL)
2814 unsigned i;
2815 struct iv_cand *cand = NULL;
2816 tree type, orig_type;
2818 /* For non-original variables, make sure their values are computed in a type
2819 that does not invoke undefined behavior on overflows (since in general,
2820 we cannot prove that these induction variables are non-wrapping). */
2821 if (pos != IP_ORIGINAL)
2823 orig_type = TREE_TYPE (base);
2824 type = generic_type_for (orig_type);
2825 if (type != orig_type)
2827 base = fold_convert (type, base);
2828 step = fold_convert (type, step);
2832 for (i = 0; i < n_iv_cands (data); i++)
2834 cand = iv_cand (data, i);
2836 if (cand->pos != pos)
2837 continue;
2839 if (cand->incremented_at != incremented_at
2840 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2841 && cand->ainc_use != use))
2842 continue;
2844 if (!cand->iv)
2846 if (!base && !step)
2847 break;
2849 continue;
2852 if (!base && !step)
2853 continue;
2855 if (operand_equal_p (base, cand->iv->base, 0)
2856 && operand_equal_p (step, cand->iv->step, 0)
2857 && (TYPE_PRECISION (TREE_TYPE (base))
2858 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2859 break;
2862 if (i == n_iv_cands (data))
2864 cand = XCNEW (struct iv_cand);
2865 cand->id = i;
2867 if (!base && !step)
2868 cand->iv = NULL;
2869 else
2870 cand->iv = alloc_iv (data, base, step);
2872 cand->pos = pos;
2873 if (pos != IP_ORIGINAL && cand->iv)
2875 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2876 cand->var_after = cand->var_before;
2878 cand->important = important;
2879 cand->incremented_at = incremented_at;
2880 data->iv_candidates.safe_push (cand);
2882 if (step
2883 && TREE_CODE (step) != INTEGER_CST)
2885 fd_ivopts_data = data;
2886 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2889 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2890 cand->ainc_use = use;
2891 else
2892 cand->ainc_use = NULL;
2894 cand->orig_iv = orig_iv;
2895 if (dump_file && (dump_flags & TDF_DETAILS))
2896 dump_cand (dump_file, cand);
2899 if (important && !cand->important)
2901 cand->important = true;
2902 if (dump_file && (dump_flags & TDF_DETAILS))
2903 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2906 if (use)
2908 bitmap_set_bit (use->related_cands, i);
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, "Candidate %d is related to use %d\n",
2911 cand->id, use->id);
2914 return cand;
2917 /* Returns true if incrementing the induction variable at the end of the LOOP
2918 is allowed.
2920 The purpose is to avoid splitting latch edge with a biv increment, thus
2921 creating a jump, possibly confusing other optimization passes and leaving
2922 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2923 is not available (so we do not have a better alternative), or if the latch
2924 edge is already nonempty. */
2926 static bool
2927 allow_ip_end_pos_p (struct loop *loop)
2929 if (!ip_normal_pos (loop))
2930 return true;
2932 if (!empty_block_p (ip_end_pos (loop)))
2933 return true;
2935 return false;
2938 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2939 Important field is set to IMPORTANT. */
2941 static void
2942 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2943 bool important, struct iv_use *use)
2945 basic_block use_bb = gimple_bb (use->stmt);
2946 machine_mode mem_mode;
2947 unsigned HOST_WIDE_INT cstepi;
2949 /* If we insert the increment in any position other than the standard
2950 ones, we must ensure that it is incremented once per iteration.
2951 It must not be in an inner nested loop, or one side of an if
2952 statement. */
2953 if (use_bb->loop_father != data->current_loop
2954 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2955 || stmt_could_throw_p (use->stmt)
2956 || !cst_and_fits_in_hwi (step))
2957 return;
2959 cstepi = int_cst_value (step);
2961 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2962 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2963 || USE_STORE_PRE_INCREMENT (mem_mode))
2964 && GET_MODE_SIZE (mem_mode) == cstepi)
2965 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2966 || USE_STORE_PRE_DECREMENT (mem_mode))
2967 && GET_MODE_SIZE (mem_mode) == -cstepi))
2969 enum tree_code code = MINUS_EXPR;
2970 tree new_base;
2971 tree new_step = step;
2973 if (POINTER_TYPE_P (TREE_TYPE (base)))
2975 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2976 code = POINTER_PLUS_EXPR;
2978 else
2979 new_step = fold_convert (TREE_TYPE (base), new_step);
2980 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2981 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2982 use->stmt);
2984 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2985 || USE_STORE_POST_INCREMENT (mem_mode))
2986 && GET_MODE_SIZE (mem_mode) == cstepi)
2987 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2988 || USE_STORE_POST_DECREMENT (mem_mode))
2989 && GET_MODE_SIZE (mem_mode) == -cstepi))
2991 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2992 use->stmt);
2996 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2997 position to POS. If USE is not NULL, the candidate is set as related to
2998 it. The candidate computation is scheduled before exit condition and at
2999 the end of loop. */
3001 static void
3002 add_candidate (struct ivopts_data *data,
3003 tree base, tree step, bool important, struct iv_use *use,
3004 struct iv *orig_iv = NULL)
3006 gcc_assert (use == NULL || use->sub_id == 0);
3008 if (ip_normal_pos (data->current_loop))
3009 add_candidate_1 (data, base, step, important,
3010 IP_NORMAL, use, NULL, orig_iv);
3011 if (ip_end_pos (data->current_loop)
3012 && allow_ip_end_pos_p (data->current_loop))
3013 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3016 /* Adds standard iv candidates. */
3018 static void
3019 add_standard_iv_candidates (struct ivopts_data *data)
3021 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3023 /* The same for a double-integer type if it is still fast enough. */
3024 if (TYPE_PRECISION
3025 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3026 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3027 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3028 build_int_cst (long_integer_type_node, 1), true, NULL);
3030 /* The same for a double-integer type if it is still fast enough. */
3031 if (TYPE_PRECISION
3032 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3033 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3034 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3035 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3039 /* Adds candidates bases on the old induction variable IV. */
3041 static void
3042 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3044 gimple *phi;
3045 tree def;
3046 struct iv_cand *cand;
3048 /* Check if this biv is used in address type use. */
3049 if (iv->no_overflow && iv->have_address_use
3050 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3051 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3053 tree base = fold_convert (sizetype, iv->base);
3054 tree step = fold_convert (sizetype, iv->step);
3056 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3057 add_candidate (data, base, step, true, NULL, iv);
3058 /* Add iv cand of the original type only if it has nonlinear use. */
3059 if (iv->have_use_for)
3060 add_candidate (data, iv->base, iv->step, true, NULL);
3062 else
3063 add_candidate (data, iv->base, iv->step, true, NULL);
3065 /* The same, but with initial value zero. */
3066 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3067 add_candidate (data, size_int (0), iv->step, true, NULL);
3068 else
3069 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3070 iv->step, true, NULL);
3072 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3073 if (gimple_code (phi) == GIMPLE_PHI)
3075 /* Additionally record the possibility of leaving the original iv
3076 untouched. */
3077 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3078 /* Don't add candidate if it's from another PHI node because
3079 it's an affine iv appearing in the form of PEELED_CHREC. */
3080 phi = SSA_NAME_DEF_STMT (def);
3081 if (gimple_code (phi) != GIMPLE_PHI)
3083 cand = add_candidate_1 (data,
3084 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3085 SSA_NAME_DEF_STMT (def));
3086 cand->var_before = iv->ssa_name;
3087 cand->var_after = def;
3089 else
3090 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3094 /* Adds candidates based on the old induction variables. */
3096 static void
3097 add_iv_candidate_for_bivs (struct ivopts_data *data)
3099 unsigned i;
3100 struct iv *iv;
3101 bitmap_iterator bi;
3103 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3105 iv = ver_info (data, i)->iv;
3106 if (iv && iv->biv_p && !integer_zerop (iv->step))
3107 add_iv_candidate_for_biv (data, iv);
3111 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3113 static void
3114 record_common_cand (struct ivopts_data *data, tree base,
3115 tree step, struct iv_use *use)
3117 struct iv_common_cand ent;
3118 struct iv_common_cand **slot;
3120 gcc_assert (use != NULL);
3122 ent.base = base;
3123 ent.step = step;
3124 ent.hash = iterative_hash_expr (base, 0);
3125 ent.hash = iterative_hash_expr (step, ent.hash);
3127 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3128 if (*slot == NULL)
3130 *slot = new iv_common_cand ();
3131 (*slot)->base = base;
3132 (*slot)->step = step;
3133 (*slot)->uses.create (8);
3134 (*slot)->hash = ent.hash;
3135 data->iv_common_cands.safe_push ((*slot));
3137 (*slot)->uses.safe_push (use);
3138 return;
3141 /* Comparison function used to sort common candidates. */
3143 static int
3144 common_cand_cmp (const void *p1, const void *p2)
3146 unsigned n1, n2;
3147 const struct iv_common_cand *const *const ccand1
3148 = (const struct iv_common_cand *const *)p1;
3149 const struct iv_common_cand *const *const ccand2
3150 = (const struct iv_common_cand *const *)p2;
3152 n1 = (*ccand1)->uses.length ();
3153 n2 = (*ccand2)->uses.length ();
3154 return n2 - n1;
3157 /* Adds IV candidates based on common candidated recorded. */
3159 static void
3160 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3162 unsigned i, j;
3163 struct iv_cand *cand_1, *cand_2;
3165 data->iv_common_cands.qsort (common_cand_cmp);
3166 for (i = 0; i < data->iv_common_cands.length (); i++)
3168 struct iv_common_cand *ptr = data->iv_common_cands[i];
3170 /* Only add IV candidate if it's derived from multiple uses. */
3171 if (ptr->uses.length () <= 1)
3172 break;
3174 cand_1 = NULL;
3175 cand_2 = NULL;
3176 if (ip_normal_pos (data->current_loop))
3177 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3178 false, IP_NORMAL, NULL, NULL);
3180 if (ip_end_pos (data->current_loop)
3181 && allow_ip_end_pos_p (data->current_loop))
3182 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3183 false, IP_END, NULL, NULL);
3185 /* Bind deriving uses and the new candidates. */
3186 for (j = 0; j < ptr->uses.length (); j++)
3188 struct iv_use *use = ptr->uses[j];
3189 if (cand_1)
3190 bitmap_set_bit (use->related_cands, cand_1->id);
3191 if (cand_2)
3192 bitmap_set_bit (use->related_cands, cand_2->id);
3196 /* Release data since it is useless from this point. */
3197 data->iv_common_cand_tab->empty ();
3198 data->iv_common_cands.truncate (0);
3201 /* Adds candidates based on the value of USE's iv. */
3203 static void
3204 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3206 unsigned HOST_WIDE_INT offset;
3207 tree base;
3208 tree basetype;
3209 struct iv *iv = use->iv;
3211 add_candidate (data, iv->base, iv->step, false, use);
3213 /* Record common candidate for use in case it can be shared by others. */
3214 record_common_cand (data, iv->base, iv->step, use);
3216 /* Record common candidate with initial value zero. */
3217 basetype = TREE_TYPE (iv->base);
3218 if (POINTER_TYPE_P (basetype))
3219 basetype = sizetype;
3220 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3222 /* Record common candidate with constant offset stripped in base. */
3224 base = strip_offset (iv->base, &offset);
3225 if (offset || base != iv->base)
3226 record_common_cand (data, base, iv->step, use);
3229 /* Record common candidate with base_object removed in base. */
3230 if (iv->base_object != NULL)
3232 unsigned i;
3233 aff_tree aff_base;
3234 tree step, base_object = iv->base_object;
3236 base = iv->base;
3237 step = iv->step;
3238 STRIP_NOPS (base);
3239 STRIP_NOPS (step);
3240 STRIP_NOPS (base_object);
3241 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3242 for (i = 0; i < aff_base.n; i++)
3244 if (aff_base.elts[i].coef != 1)
3245 continue;
3247 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3248 break;
3250 if (i < aff_base.n)
3252 aff_combination_remove_elt (&aff_base, i);
3253 base = aff_combination_to_tree (&aff_base);
3254 basetype = TREE_TYPE (base);
3255 if (POINTER_TYPE_P (basetype))
3256 basetype = sizetype;
3258 step = fold_convert (basetype, step);
3259 record_common_cand (data, base, step, use);
3260 /* Also record common candidate with offset stripped. */
3261 base = strip_offset (base, &offset);
3262 if (offset)
3263 record_common_cand (data, base, step, use);
3267 /* At last, add auto-incremental candidates. Make such variables
3268 important since other iv uses with same base object may be based
3269 on it. */
3270 if (use != NULL && use->type == USE_ADDRESS)
3271 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3274 /* Adds candidates based on the uses. */
3276 static void
3277 add_iv_candidate_for_uses (struct ivopts_data *data)
3279 unsigned i;
3281 for (i = 0; i < n_iv_uses (data); i++)
3283 struct iv_use *use = iv_use (data, i);
3285 if (!use)
3286 continue;
3288 switch (use->type)
3290 case USE_NONLINEAR_EXPR:
3291 case USE_COMPARE:
3292 case USE_ADDRESS:
3293 /* Just add the ivs based on the value of the iv used here. */
3294 add_iv_candidate_for_use (data, use);
3295 break;
3297 default:
3298 gcc_unreachable ();
3301 add_iv_candidate_derived_from_uses (data);
3304 /* Record important candidates and add them to related_cands bitmaps. */
3306 static void
3307 record_important_candidates (struct ivopts_data *data)
3309 unsigned i;
3310 struct iv_use *use;
3312 for (i = 0; i < n_iv_cands (data); i++)
3314 struct iv_cand *cand = iv_cand (data, i);
3316 if (cand->important)
3317 bitmap_set_bit (data->important_candidates, i);
3320 data->consider_all_candidates = (n_iv_cands (data)
3321 <= CONSIDER_ALL_CANDIDATES_BOUND);
3323 /* Add important candidates to uses' related_cands bitmaps. */
3324 for (i = 0; i < n_iv_uses (data); i++)
3326 use = iv_use (data, i);
3327 bitmap_ior_into (use->related_cands, data->important_candidates);
3331 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3332 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3333 we allocate a simple list to every use. */
3335 static void
3336 alloc_use_cost_map (struct ivopts_data *data)
3338 unsigned i, size, s;
3340 for (i = 0; i < n_iv_uses (data); i++)
3342 struct iv_use *use = iv_use (data, i);
3344 if (data->consider_all_candidates)
3345 size = n_iv_cands (data);
3346 else
3348 s = bitmap_count_bits (use->related_cands);
3350 /* Round up to the power of two, so that moduling by it is fast. */
3351 size = s ? (1 << ceil_log2 (s)) : 1;
3354 use->n_map_members = size;
3355 use->cost_map = XCNEWVEC (struct cost_pair, size);
3359 /* Returns description of computation cost of expression whose runtime
3360 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3362 static comp_cost
3363 new_cost (unsigned runtime, unsigned complexity)
3365 comp_cost cost;
3367 cost.cost = runtime;
3368 cost.complexity = complexity;
3370 return cost;
3373 /* Returns true if COST is infinite. */
3375 static bool
3376 infinite_cost_p (comp_cost cost)
3378 return cost.cost == INFTY;
3381 /* Adds costs COST1 and COST2. */
3383 static comp_cost
3384 add_costs (comp_cost cost1, comp_cost cost2)
3386 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3387 return infinite_cost;
3389 cost1.cost += cost2.cost;
3390 cost1.complexity += cost2.complexity;
3392 return cost1;
3394 /* Subtracts costs COST1 and COST2. */
3396 static comp_cost
3397 sub_costs (comp_cost cost1, comp_cost cost2)
3399 cost1.cost -= cost2.cost;
3400 cost1.complexity -= cost2.complexity;
3402 return cost1;
3405 /* Returns a negative number if COST1 < COST2, a positive number if
3406 COST1 > COST2, and 0 if COST1 = COST2. */
3408 static int
3409 compare_costs (comp_cost cost1, comp_cost cost2)
3411 if (cost1.cost == cost2.cost)
3412 return cost1.complexity - cost2.complexity;
3414 return cost1.cost - cost2.cost;
3417 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3418 on invariants DEPENDS_ON and that the value used in expressing it
3419 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3421 static void
3422 set_use_iv_cost (struct ivopts_data *data,
3423 struct iv_use *use, struct iv_cand *cand,
3424 comp_cost cost, bitmap depends_on, tree value,
3425 enum tree_code comp, int inv_expr_id)
3427 unsigned i, s;
3429 if (infinite_cost_p (cost))
3431 BITMAP_FREE (depends_on);
3432 return;
3435 if (data->consider_all_candidates)
3437 use->cost_map[cand->id].cand = cand;
3438 use->cost_map[cand->id].cost = cost;
3439 use->cost_map[cand->id].depends_on = depends_on;
3440 use->cost_map[cand->id].value = value;
3441 use->cost_map[cand->id].comp = comp;
3442 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3443 return;
3446 /* n_map_members is a power of two, so this computes modulo. */
3447 s = cand->id & (use->n_map_members - 1);
3448 for (i = s; i < use->n_map_members; i++)
3449 if (!use->cost_map[i].cand)
3450 goto found;
3451 for (i = 0; i < s; i++)
3452 if (!use->cost_map[i].cand)
3453 goto found;
3455 gcc_unreachable ();
3457 found:
3458 use->cost_map[i].cand = cand;
3459 use->cost_map[i].cost = cost;
3460 use->cost_map[i].depends_on = depends_on;
3461 use->cost_map[i].value = value;
3462 use->cost_map[i].comp = comp;
3463 use->cost_map[i].inv_expr_id = inv_expr_id;
3466 /* Gets cost of (USE, CANDIDATE) pair. */
3468 static struct cost_pair *
3469 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3470 struct iv_cand *cand)
3472 unsigned i, s;
3473 struct cost_pair *ret;
3475 if (!cand)
3476 return NULL;
3478 if (data->consider_all_candidates)
3480 ret = use->cost_map + cand->id;
3481 if (!ret->cand)
3482 return NULL;
3484 return ret;
3487 /* n_map_members is a power of two, so this computes modulo. */
3488 s = cand->id & (use->n_map_members - 1);
3489 for (i = s; i < use->n_map_members; i++)
3490 if (use->cost_map[i].cand == cand)
3491 return use->cost_map + i;
3492 else if (use->cost_map[i].cand == NULL)
3493 return NULL;
3494 for (i = 0; i < s; i++)
3495 if (use->cost_map[i].cand == cand)
3496 return use->cost_map + i;
3497 else if (use->cost_map[i].cand == NULL)
3498 return NULL;
3500 return NULL;
3503 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3504 static rtx
3505 produce_memory_decl_rtl (tree obj, int *regno)
3507 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3508 machine_mode address_mode = targetm.addr_space.address_mode (as);
3509 rtx x;
3511 gcc_assert (obj);
3512 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3514 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3515 x = gen_rtx_SYMBOL_REF (address_mode, name);
3516 SET_SYMBOL_REF_DECL (x, obj);
3517 x = gen_rtx_MEM (DECL_MODE (obj), x);
3518 set_mem_addr_space (x, as);
3519 targetm.encode_section_info (obj, x, true);
3521 else
3523 x = gen_raw_REG (address_mode, (*regno)++);
3524 x = gen_rtx_MEM (DECL_MODE (obj), x);
3525 set_mem_addr_space (x, as);
3528 return x;
3531 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3532 walk_tree. DATA contains the actual fake register number. */
3534 static tree
3535 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3537 tree obj = NULL_TREE;
3538 rtx x = NULL_RTX;
3539 int *regno = (int *) data;
3541 switch (TREE_CODE (*expr_p))
3543 case ADDR_EXPR:
3544 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3545 handled_component_p (*expr_p);
3546 expr_p = &TREE_OPERAND (*expr_p, 0))
3547 continue;
3548 obj = *expr_p;
3549 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3550 x = produce_memory_decl_rtl (obj, regno);
3551 break;
3553 case SSA_NAME:
3554 *ws = 0;
3555 obj = SSA_NAME_VAR (*expr_p);
3556 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3557 if (!obj)
3558 return NULL_TREE;
3559 if (!DECL_RTL_SET_P (obj))
3560 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3561 break;
3563 case VAR_DECL:
3564 case PARM_DECL:
3565 case RESULT_DECL:
3566 *ws = 0;
3567 obj = *expr_p;
3569 if (DECL_RTL_SET_P (obj))
3570 break;
3572 if (DECL_MODE (obj) == BLKmode)
3573 x = produce_memory_decl_rtl (obj, regno);
3574 else
3575 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3577 break;
3579 default:
3580 break;
3583 if (x)
3585 decl_rtl_to_reset.safe_push (obj);
3586 SET_DECL_RTL (obj, x);
3589 return NULL_TREE;
3592 /* Determines cost of the computation of EXPR. */
3594 static unsigned
3595 computation_cost (tree expr, bool speed)
3597 rtx_insn *seq;
3598 rtx rslt;
3599 tree type = TREE_TYPE (expr);
3600 unsigned cost;
3601 /* Avoid using hard regs in ways which may be unsupported. */
3602 int regno = LAST_VIRTUAL_REGISTER + 1;
3603 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3604 enum node_frequency real_frequency = node->frequency;
3606 node->frequency = NODE_FREQUENCY_NORMAL;
3607 crtl->maybe_hot_insn_p = speed;
3608 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3609 start_sequence ();
3610 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3611 seq = get_insns ();
3612 end_sequence ();
3613 default_rtl_profile ();
3614 node->frequency = real_frequency;
3616 cost = seq_cost (seq, speed);
3617 if (MEM_P (rslt))
3618 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3619 TYPE_ADDR_SPACE (type), speed);
3620 else if (!REG_P (rslt))
3621 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3623 return cost;
3626 /* Returns variable containing the value of candidate CAND at statement AT. */
3628 static tree
3629 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3631 if (stmt_after_increment (loop, cand, stmt))
3632 return cand->var_after;
3633 else
3634 return cand->var_before;
3637 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3638 same precision that is at least as wide as the precision of TYPE, stores
3639 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3640 type of A and B. */
3642 static tree
3643 determine_common_wider_type (tree *a, tree *b)
3645 tree wider_type = NULL;
3646 tree suba, subb;
3647 tree atype = TREE_TYPE (*a);
3649 if (CONVERT_EXPR_P (*a))
3651 suba = TREE_OPERAND (*a, 0);
3652 wider_type = TREE_TYPE (suba);
3653 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3654 return atype;
3656 else
3657 return atype;
3659 if (CONVERT_EXPR_P (*b))
3661 subb = TREE_OPERAND (*b, 0);
3662 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3663 return atype;
3665 else
3666 return atype;
3668 *a = suba;
3669 *b = subb;
3670 return wider_type;
3673 /* Determines the expression by that USE is expressed from induction variable
3674 CAND at statement AT in LOOP. The expression is stored in a decomposed
3675 form into AFF. Returns false if USE cannot be expressed using CAND. */
3677 static bool
3678 get_computation_aff (struct loop *loop,
3679 struct iv_use *use, struct iv_cand *cand, gimple *at,
3680 struct aff_tree *aff)
3682 tree ubase = use->iv->base;
3683 tree ustep = use->iv->step;
3684 tree cbase = cand->iv->base;
3685 tree cstep = cand->iv->step, cstep_common;
3686 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3687 tree common_type, var;
3688 tree uutype;
3689 aff_tree cbase_aff, var_aff;
3690 widest_int rat;
3692 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3694 /* We do not have a precision to express the values of use. */
3695 return false;
3698 var = var_at_stmt (loop, cand, at);
3699 uutype = unsigned_type_for (utype);
3701 /* If the conversion is not noop, perform it. */
3702 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3704 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3705 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3707 tree inner_base, inner_step, inner_type;
3708 inner_base = TREE_OPERAND (cbase, 0);
3709 if (CONVERT_EXPR_P (cstep))
3710 inner_step = TREE_OPERAND (cstep, 0);
3711 else
3712 inner_step = cstep;
3714 inner_type = TREE_TYPE (inner_base);
3715 /* If candidate is added from a biv whose type is smaller than
3716 ctype, we know both candidate and the biv won't overflow.
3717 In this case, it's safe to skip the convertion in candidate.
3718 As an example, (unsigned short)((unsigned long)A) equals to
3719 (unsigned short)A, if A has a type no larger than short. */
3720 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3722 cbase = inner_base;
3723 cstep = inner_step;
3726 cstep = fold_convert (uutype, cstep);
3727 cbase = fold_convert (uutype, cbase);
3728 var = fold_convert (uutype, var);
3731 if (!constant_multiple_of (ustep, cstep, &rat))
3732 return false;
3734 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3735 type, we achieve better folding by computing their difference in this
3736 wider type, and cast the result to UUTYPE. We do not need to worry about
3737 overflows, as all the arithmetics will in the end be performed in UUTYPE
3738 anyway. */
3739 common_type = determine_common_wider_type (&ubase, &cbase);
3741 /* use = ubase - ratio * cbase + ratio * var. */
3742 tree_to_aff_combination (ubase, common_type, aff);
3743 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3744 tree_to_aff_combination (var, uutype, &var_aff);
3746 /* We need to shift the value if we are after the increment. */
3747 if (stmt_after_increment (loop, cand, at))
3749 aff_tree cstep_aff;
3751 if (common_type != uutype)
3752 cstep_common = fold_convert (common_type, cstep);
3753 else
3754 cstep_common = cstep;
3756 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3757 aff_combination_add (&cbase_aff, &cstep_aff);
3760 aff_combination_scale (&cbase_aff, -rat);
3761 aff_combination_add (aff, &cbase_aff);
3762 if (common_type != uutype)
3763 aff_combination_convert (aff, uutype);
3765 aff_combination_scale (&var_aff, rat);
3766 aff_combination_add (aff, &var_aff);
3768 return true;
3771 /* Return the type of USE. */
3773 static tree
3774 get_use_type (struct iv_use *use)
3776 tree base_type = TREE_TYPE (use->iv->base);
3777 tree type;
3779 if (use->type == USE_ADDRESS)
3781 /* The base_type may be a void pointer. Create a pointer type based on
3782 the mem_ref instead. */
3783 type = build_pointer_type (TREE_TYPE (*use->op_p));
3784 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3785 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3787 else
3788 type = base_type;
3790 return type;
3793 /* Determines the expression by that USE is expressed from induction variable
3794 CAND at statement AT in LOOP. The computation is unshared. */
3796 static tree
3797 get_computation_at (struct loop *loop,
3798 struct iv_use *use, struct iv_cand *cand, gimple *at)
3800 aff_tree aff;
3801 tree type = get_use_type (use);
3803 if (!get_computation_aff (loop, use, cand, at, &aff))
3804 return NULL_TREE;
3805 unshare_aff_combination (&aff);
3806 return fold_convert (type, aff_combination_to_tree (&aff));
3809 /* Determines the expression by that USE is expressed from induction variable
3810 CAND in LOOP. The computation is unshared. */
3812 static tree
3813 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3815 return get_computation_at (loop, use, cand, use->stmt);
3818 /* Adjust the cost COST for being in loop setup rather than loop body.
3819 If we're optimizing for space, the loop setup overhead is constant;
3820 if we're optimizing for speed, amortize it over the per-iteration cost. */
3821 static unsigned
3822 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3824 if (cost == INFTY)
3825 return cost;
3826 else if (optimize_loop_for_speed_p (data->current_loop))
3827 return cost / avg_loop_niter (data->current_loop);
3828 else
3829 return cost;
3832 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3833 validity for a memory reference accessing memory of mode MODE in
3834 address space AS. */
3837 bool
3838 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3839 addr_space_t as)
3841 #define MAX_RATIO 128
3842 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3843 static vec<sbitmap> valid_mult_list;
3844 sbitmap valid_mult;
3846 if (data_index >= valid_mult_list.length ())
3847 valid_mult_list.safe_grow_cleared (data_index + 1);
3849 valid_mult = valid_mult_list[data_index];
3850 if (!valid_mult)
3852 machine_mode address_mode = targetm.addr_space.address_mode (as);
3853 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3854 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3855 rtx addr, scaled;
3856 HOST_WIDE_INT i;
3858 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3859 bitmap_clear (valid_mult);
3860 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3861 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3862 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3864 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3865 if (memory_address_addr_space_p (mode, addr, as)
3866 || memory_address_addr_space_p (mode, scaled, as))
3867 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3870 if (dump_file && (dump_flags & TDF_DETAILS))
3872 fprintf (dump_file, " allowed multipliers:");
3873 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3874 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3875 fprintf (dump_file, " %d", (int) i);
3876 fprintf (dump_file, "\n");
3877 fprintf (dump_file, "\n");
3880 valid_mult_list[data_index] = valid_mult;
3883 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3884 return false;
3886 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3889 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3890 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3891 variable is omitted. Compute the cost for a memory reference that accesses
3892 a memory location of mode MEM_MODE in address space AS.
3894 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3895 size of MEM_MODE / RATIO) is available. To make this determination, we
3896 look at the size of the increment to be made, which is given in CSTEP.
3897 CSTEP may be zero if the step is unknown.
3898 STMT_AFTER_INC is true iff the statement we're looking at is after the
3899 increment of the original biv.
3901 TODO -- there must be some better way. This all is quite crude. */
3903 enum ainc_type
3905 AINC_PRE_INC, /* Pre increment. */
3906 AINC_PRE_DEC, /* Pre decrement. */
3907 AINC_POST_INC, /* Post increment. */
3908 AINC_POST_DEC, /* Post decrement. */
3909 AINC_NONE /* Also the number of auto increment types. */
3912 struct address_cost_data
3914 HOST_WIDE_INT min_offset, max_offset;
3915 unsigned costs[2][2][2][2];
3916 unsigned ainc_costs[AINC_NONE];
3920 static comp_cost
3921 get_address_cost (bool symbol_present, bool var_present,
3922 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3923 HOST_WIDE_INT cstep, machine_mode mem_mode,
3924 addr_space_t as, bool speed,
3925 bool stmt_after_inc, bool *may_autoinc)
3927 machine_mode address_mode = targetm.addr_space.address_mode (as);
3928 static vec<address_cost_data *> address_cost_data_list;
3929 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3930 address_cost_data *data;
3931 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3932 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3933 unsigned cost, acost, complexity;
3934 enum ainc_type autoinc_type;
3935 bool offset_p, ratio_p, autoinc;
3936 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3937 unsigned HOST_WIDE_INT mask;
3938 unsigned bits;
3940 if (data_index >= address_cost_data_list.length ())
3941 address_cost_data_list.safe_grow_cleared (data_index + 1);
3943 data = address_cost_data_list[data_index];
3944 if (!data)
3946 HOST_WIDE_INT i;
3947 HOST_WIDE_INT rat, off = 0;
3948 int old_cse_not_expected, width;
3949 unsigned sym_p, var_p, off_p, rat_p, add_c;
3950 rtx_insn *seq;
3951 rtx addr, base;
3952 rtx reg0, reg1;
3954 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3956 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3958 width = GET_MODE_BITSIZE (address_mode) - 1;
3959 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3960 width = HOST_BITS_PER_WIDE_INT - 1;
3961 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3963 for (i = width; i >= 0; i--)
3965 off = -((unsigned HOST_WIDE_INT) 1 << i);
3966 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3967 if (memory_address_addr_space_p (mem_mode, addr, as))
3968 break;
3970 data->min_offset = (i == -1? 0 : off);
3972 for (i = width; i >= 0; i--)
3974 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3975 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3976 if (memory_address_addr_space_p (mem_mode, addr, as))
3977 break;
3978 /* For some strict-alignment targets, the offset must be naturally
3979 aligned. Try an aligned offset if mem_mode is not QImode. */
3980 off = mem_mode != QImode
3981 ? ((unsigned HOST_WIDE_INT) 1 << i)
3982 - GET_MODE_SIZE (mem_mode)
3983 : 0;
3984 if (off > 0)
3986 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3987 if (memory_address_addr_space_p (mem_mode, addr, as))
3988 break;
3991 if (i == -1)
3992 off = 0;
3993 data->max_offset = off;
3995 if (dump_file && (dump_flags & TDF_DETAILS))
3997 fprintf (dump_file, "get_address_cost:\n");
3998 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3999 GET_MODE_NAME (mem_mode),
4000 data->min_offset);
4001 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4002 GET_MODE_NAME (mem_mode),
4003 data->max_offset);
4006 rat = 1;
4007 for (i = 2; i <= MAX_RATIO; i++)
4008 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4010 rat = i;
4011 break;
4014 /* Compute the cost of various addressing modes. */
4015 acost = 0;
4016 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4017 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4019 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4020 || USE_STORE_PRE_DECREMENT (mem_mode))
4022 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4023 has_predec[mem_mode]
4024 = memory_address_addr_space_p (mem_mode, addr, as);
4026 if (has_predec[mem_mode])
4027 data->ainc_costs[AINC_PRE_DEC]
4028 = address_cost (addr, mem_mode, as, speed);
4030 if (USE_LOAD_POST_DECREMENT (mem_mode)
4031 || USE_STORE_POST_DECREMENT (mem_mode))
4033 addr = gen_rtx_POST_DEC (address_mode, reg0);
4034 has_postdec[mem_mode]
4035 = memory_address_addr_space_p (mem_mode, addr, as);
4037 if (has_postdec[mem_mode])
4038 data->ainc_costs[AINC_POST_DEC]
4039 = address_cost (addr, mem_mode, as, speed);
4041 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4042 || USE_STORE_PRE_DECREMENT (mem_mode))
4044 addr = gen_rtx_PRE_INC (address_mode, reg0);
4045 has_preinc[mem_mode]
4046 = memory_address_addr_space_p (mem_mode, addr, as);
4048 if (has_preinc[mem_mode])
4049 data->ainc_costs[AINC_PRE_INC]
4050 = address_cost (addr, mem_mode, as, speed);
4052 if (USE_LOAD_POST_INCREMENT (mem_mode)
4053 || USE_STORE_POST_INCREMENT (mem_mode))
4055 addr = gen_rtx_POST_INC (address_mode, reg0);
4056 has_postinc[mem_mode]
4057 = memory_address_addr_space_p (mem_mode, addr, as);
4059 if (has_postinc[mem_mode])
4060 data->ainc_costs[AINC_POST_INC]
4061 = address_cost (addr, mem_mode, as, speed);
4063 for (i = 0; i < 16; i++)
4065 sym_p = i & 1;
4066 var_p = (i >> 1) & 1;
4067 off_p = (i >> 2) & 1;
4068 rat_p = (i >> 3) & 1;
4070 addr = reg0;
4071 if (rat_p)
4072 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4073 gen_int_mode (rat, address_mode));
4075 if (var_p)
4076 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4078 if (sym_p)
4080 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4081 /* ??? We can run into trouble with some backends by presenting
4082 it with symbols which haven't been properly passed through
4083 targetm.encode_section_info. By setting the local bit, we
4084 enhance the probability of things working. */
4085 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4087 if (off_p)
4088 base = gen_rtx_fmt_e (CONST, address_mode,
4089 gen_rtx_fmt_ee
4090 (PLUS, address_mode, base,
4091 gen_int_mode (off, address_mode)));
4093 else if (off_p)
4094 base = gen_int_mode (off, address_mode);
4095 else
4096 base = NULL_RTX;
4098 if (base)
4099 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4101 start_sequence ();
4102 /* To avoid splitting addressing modes, pretend that no cse will
4103 follow. */
4104 old_cse_not_expected = cse_not_expected;
4105 cse_not_expected = true;
4106 addr = memory_address_addr_space (mem_mode, addr, as);
4107 cse_not_expected = old_cse_not_expected;
4108 seq = get_insns ();
4109 end_sequence ();
4111 acost = seq_cost (seq, speed);
4112 acost += address_cost (addr, mem_mode, as, speed);
4114 if (!acost)
4115 acost = 1;
4116 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4119 /* On some targets, it is quite expensive to load symbol to a register,
4120 which makes addresses that contain symbols look much more expensive.
4121 However, the symbol will have to be loaded in any case before the
4122 loop (and quite likely we have it in register already), so it does not
4123 make much sense to penalize them too heavily. So make some final
4124 tweaks for the SYMBOL_PRESENT modes:
4126 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4127 var is cheaper, use this mode with small penalty.
4128 If VAR_PRESENT is true, try whether the mode with
4129 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4130 if this is the case, use it. */
4131 add_c = add_cost (speed, address_mode);
4132 for (i = 0; i < 8; i++)
4134 var_p = i & 1;
4135 off_p = (i >> 1) & 1;
4136 rat_p = (i >> 2) & 1;
4138 acost = data->costs[0][1][off_p][rat_p] + 1;
4139 if (var_p)
4140 acost += add_c;
4142 if (acost < data->costs[1][var_p][off_p][rat_p])
4143 data->costs[1][var_p][off_p][rat_p] = acost;
4146 if (dump_file && (dump_flags & TDF_DETAILS))
4148 fprintf (dump_file, "Address costs:\n");
4150 for (i = 0; i < 16; i++)
4152 sym_p = i & 1;
4153 var_p = (i >> 1) & 1;
4154 off_p = (i >> 2) & 1;
4155 rat_p = (i >> 3) & 1;
4157 fprintf (dump_file, " ");
4158 if (sym_p)
4159 fprintf (dump_file, "sym + ");
4160 if (var_p)
4161 fprintf (dump_file, "var + ");
4162 if (off_p)
4163 fprintf (dump_file, "cst + ");
4164 if (rat_p)
4165 fprintf (dump_file, "rat * ");
4167 acost = data->costs[sym_p][var_p][off_p][rat_p];
4168 fprintf (dump_file, "index costs %d\n", acost);
4170 if (has_predec[mem_mode] || has_postdec[mem_mode]
4171 || has_preinc[mem_mode] || has_postinc[mem_mode])
4172 fprintf (dump_file, " May include autoinc/dec\n");
4173 fprintf (dump_file, "\n");
4176 address_cost_data_list[data_index] = data;
4179 bits = GET_MODE_BITSIZE (address_mode);
4180 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4181 offset &= mask;
4182 if ((offset >> (bits - 1) & 1))
4183 offset |= ~mask;
4184 s_offset = offset;
4186 autoinc = false;
4187 autoinc_type = AINC_NONE;
4188 msize = GET_MODE_SIZE (mem_mode);
4189 autoinc_offset = offset;
4190 if (stmt_after_inc)
4191 autoinc_offset += ratio * cstep;
4192 if (symbol_present || var_present || ratio != 1)
4193 autoinc = false;
4194 else
4196 if (has_postinc[mem_mode] && autoinc_offset == 0
4197 && msize == cstep)
4198 autoinc_type = AINC_POST_INC;
4199 else if (has_postdec[mem_mode] && autoinc_offset == 0
4200 && msize == -cstep)
4201 autoinc_type = AINC_POST_DEC;
4202 else if (has_preinc[mem_mode] && autoinc_offset == msize
4203 && msize == cstep)
4204 autoinc_type = AINC_PRE_INC;
4205 else if (has_predec[mem_mode] && autoinc_offset == -msize
4206 && msize == -cstep)
4207 autoinc_type = AINC_PRE_DEC;
4209 if (autoinc_type != AINC_NONE)
4210 autoinc = true;
4213 cost = 0;
4214 offset_p = (s_offset != 0
4215 && data->min_offset <= s_offset
4216 && s_offset <= data->max_offset);
4217 ratio_p = (ratio != 1
4218 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4220 if (ratio != 1 && !ratio_p)
4221 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4223 if (s_offset && !offset_p && !symbol_present)
4224 cost += add_cost (speed, address_mode);
4226 if (may_autoinc)
4227 *may_autoinc = autoinc;
4228 if (autoinc)
4229 acost = data->ainc_costs[autoinc_type];
4230 else
4231 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4232 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4233 return new_cost (cost + acost, complexity);
4236 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4237 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4238 calculating the operands of EXPR. Returns true if successful, and returns
4239 the cost in COST. */
4241 static bool
4242 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4243 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4245 comp_cost res;
4246 tree op1 = TREE_OPERAND (expr, 1);
4247 tree cst = TREE_OPERAND (mult, 1);
4248 tree multop = TREE_OPERAND (mult, 0);
4249 int m = exact_log2 (int_cst_value (cst));
4250 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4251 int as_cost, sa_cost;
4252 bool mult_in_op1;
4254 if (!(m >= 0 && m < maxm))
4255 return false;
4257 STRIP_NOPS (op1);
4258 mult_in_op1 = operand_equal_p (op1, mult, 0);
4260 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4262 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4263 use that in preference to a shift insn followed by an add insn. */
4264 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4265 ? shiftadd_cost (speed, mode, m)
4266 : (mult_in_op1
4267 ? shiftsub1_cost (speed, mode, m)
4268 : shiftsub0_cost (speed, mode, m)));
4270 res = new_cost (MIN (as_cost, sa_cost), 0);
4271 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4273 STRIP_NOPS (multop);
4274 if (!is_gimple_val (multop))
4275 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4277 *cost = res;
4278 return true;
4281 /* Estimates cost of forcing expression EXPR into a variable. */
4283 static comp_cost
4284 force_expr_to_var_cost (tree expr, bool speed)
4286 static bool costs_initialized = false;
4287 static unsigned integer_cost [2];
4288 static unsigned symbol_cost [2];
4289 static unsigned address_cost [2];
4290 tree op0, op1;
4291 comp_cost cost0, cost1, cost;
4292 machine_mode mode;
4294 if (!costs_initialized)
4296 tree type = build_pointer_type (integer_type_node);
4297 tree var, addr;
4298 rtx x;
4299 int i;
4301 var = create_tmp_var_raw (integer_type_node, "test_var");
4302 TREE_STATIC (var) = 1;
4303 x = produce_memory_decl_rtl (var, NULL);
4304 SET_DECL_RTL (var, x);
4306 addr = build1 (ADDR_EXPR, type, var);
4309 for (i = 0; i < 2; i++)
4311 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4312 2000), i);
4314 symbol_cost[i] = computation_cost (addr, i) + 1;
4316 address_cost[i]
4317 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4318 if (dump_file && (dump_flags & TDF_DETAILS))
4320 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4321 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4322 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4323 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4324 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4325 fprintf (dump_file, "\n");
4329 costs_initialized = true;
4332 STRIP_NOPS (expr);
4334 if (SSA_VAR_P (expr))
4335 return no_cost;
4337 if (is_gimple_min_invariant (expr))
4339 if (TREE_CODE (expr) == INTEGER_CST)
4340 return new_cost (integer_cost [speed], 0);
4342 if (TREE_CODE (expr) == ADDR_EXPR)
4344 tree obj = TREE_OPERAND (expr, 0);
4346 if (TREE_CODE (obj) == VAR_DECL
4347 || TREE_CODE (obj) == PARM_DECL
4348 || TREE_CODE (obj) == RESULT_DECL)
4349 return new_cost (symbol_cost [speed], 0);
4352 return new_cost (address_cost [speed], 0);
4355 switch (TREE_CODE (expr))
4357 case POINTER_PLUS_EXPR:
4358 case PLUS_EXPR:
4359 case MINUS_EXPR:
4360 case MULT_EXPR:
4361 op0 = TREE_OPERAND (expr, 0);
4362 op1 = TREE_OPERAND (expr, 1);
4363 STRIP_NOPS (op0);
4364 STRIP_NOPS (op1);
4365 break;
4367 CASE_CONVERT:
4368 case NEGATE_EXPR:
4369 op0 = TREE_OPERAND (expr, 0);
4370 STRIP_NOPS (op0);
4371 op1 = NULL_TREE;
4372 break;
4374 default:
4375 /* Just an arbitrary value, FIXME. */
4376 return new_cost (target_spill_cost[speed], 0);
4379 if (op0 == NULL_TREE
4380 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4381 cost0 = no_cost;
4382 else
4383 cost0 = force_expr_to_var_cost (op0, speed);
4385 if (op1 == NULL_TREE
4386 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4387 cost1 = no_cost;
4388 else
4389 cost1 = force_expr_to_var_cost (op1, speed);
4391 mode = TYPE_MODE (TREE_TYPE (expr));
4392 switch (TREE_CODE (expr))
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case NEGATE_EXPR:
4398 cost = new_cost (add_cost (speed, mode), 0);
4399 if (TREE_CODE (expr) != NEGATE_EXPR)
4401 tree mult = NULL_TREE;
4402 comp_cost sa_cost;
4403 if (TREE_CODE (op1) == MULT_EXPR)
4404 mult = op1;
4405 else if (TREE_CODE (op0) == MULT_EXPR)
4406 mult = op0;
4408 if (mult != NULL_TREE
4409 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4410 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4411 speed, &sa_cost))
4412 return sa_cost;
4414 break;
4416 CASE_CONVERT:
4418 tree inner_mode, outer_mode;
4419 outer_mode = TREE_TYPE (expr);
4420 inner_mode = TREE_TYPE (op0);
4421 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4422 TYPE_MODE (inner_mode), speed), 0);
4424 break;
4426 case MULT_EXPR:
4427 if (cst_and_fits_in_hwi (op0))
4428 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4429 mode, speed), 0);
4430 else if (cst_and_fits_in_hwi (op1))
4431 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4432 mode, speed), 0);
4433 else
4434 return new_cost (target_spill_cost [speed], 0);
4435 break;
4437 default:
4438 gcc_unreachable ();
4441 cost = add_costs (cost, cost0);
4442 cost = add_costs (cost, cost1);
4444 /* Bound the cost by target_spill_cost. The parts of complicated
4445 computations often are either loop invariant or at least can
4446 be shared between several iv uses, so letting this grow without
4447 limits would not give reasonable results. */
4448 if (cost.cost > (int) target_spill_cost [speed])
4449 cost.cost = target_spill_cost [speed];
4451 return cost;
4454 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4455 invariants the computation depends on. */
4457 static comp_cost
4458 force_var_cost (struct ivopts_data *data,
4459 tree expr, bitmap *depends_on)
4461 if (depends_on)
4463 fd_ivopts_data = data;
4464 walk_tree (&expr, find_depends, depends_on, NULL);
4467 return force_expr_to_var_cost (expr, data->speed);
4470 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4471 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4472 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4473 invariants the computation depends on. */
4475 static comp_cost
4476 split_address_cost (struct ivopts_data *data,
4477 tree addr, bool *symbol_present, bool *var_present,
4478 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4480 tree core;
4481 HOST_WIDE_INT bitsize;
4482 HOST_WIDE_INT bitpos;
4483 tree toffset;
4484 machine_mode mode;
4485 int unsignedp, reversep, volatilep;
4487 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4488 &unsignedp, &reversep, &volatilep, false);
4490 if (toffset != 0
4491 || bitpos % BITS_PER_UNIT != 0
4492 || reversep
4493 || TREE_CODE (core) != VAR_DECL)
4495 *symbol_present = false;
4496 *var_present = true;
4497 fd_ivopts_data = data;
4498 if (depends_on)
4499 walk_tree (&addr, find_depends, depends_on, NULL);
4501 return new_cost (target_spill_cost[data->speed], 0);
4504 *offset += bitpos / BITS_PER_UNIT;
4505 if (TREE_STATIC (core)
4506 || DECL_EXTERNAL (core))
4508 *symbol_present = true;
4509 *var_present = false;
4510 return no_cost;
4513 *symbol_present = false;
4514 *var_present = true;
4515 return no_cost;
4518 /* Estimates cost of expressing difference of addresses E1 - E2 as
4519 var + symbol + offset. The value of offset is added to OFFSET,
4520 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4521 part is missing. DEPENDS_ON is a set of the invariants the computation
4522 depends on. */
4524 static comp_cost
4525 ptr_difference_cost (struct ivopts_data *data,
4526 tree e1, tree e2, bool *symbol_present, bool *var_present,
4527 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4529 HOST_WIDE_INT diff = 0;
4530 aff_tree aff_e1, aff_e2;
4531 tree type;
4533 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4535 if (ptr_difference_const (e1, e2, &diff))
4537 *offset += diff;
4538 *symbol_present = false;
4539 *var_present = false;
4540 return no_cost;
4543 if (integer_zerop (e2))
4544 return split_address_cost (data, TREE_OPERAND (e1, 0),
4545 symbol_present, var_present, offset, depends_on);
4547 *symbol_present = false;
4548 *var_present = true;
4550 type = signed_type_for (TREE_TYPE (e1));
4551 tree_to_aff_combination (e1, type, &aff_e1);
4552 tree_to_aff_combination (e2, type, &aff_e2);
4553 aff_combination_scale (&aff_e2, -1);
4554 aff_combination_add (&aff_e1, &aff_e2);
4556 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4559 /* Estimates cost of expressing difference E1 - E2 as
4560 var + symbol + offset. The value of offset is added to OFFSET,
4561 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4562 part is missing. DEPENDS_ON is a set of the invariants the computation
4563 depends on. */
4565 static comp_cost
4566 difference_cost (struct ivopts_data *data,
4567 tree e1, tree e2, bool *symbol_present, bool *var_present,
4568 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4570 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4571 unsigned HOST_WIDE_INT off1, off2;
4572 aff_tree aff_e1, aff_e2;
4573 tree type;
4575 e1 = strip_offset (e1, &off1);
4576 e2 = strip_offset (e2, &off2);
4577 *offset += off1 - off2;
4579 STRIP_NOPS (e1);
4580 STRIP_NOPS (e2);
4582 if (TREE_CODE (e1) == ADDR_EXPR)
4583 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4584 offset, depends_on);
4585 *symbol_present = false;
4587 if (operand_equal_p (e1, e2, 0))
4589 *var_present = false;
4590 return no_cost;
4593 *var_present = true;
4595 if (integer_zerop (e2))
4596 return force_var_cost (data, e1, depends_on);
4598 if (integer_zerop (e1))
4600 comp_cost cost = force_var_cost (data, e2, depends_on);
4601 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4602 return cost;
4605 type = signed_type_for (TREE_TYPE (e1));
4606 tree_to_aff_combination (e1, type, &aff_e1);
4607 tree_to_aff_combination (e2, type, &aff_e2);
4608 aff_combination_scale (&aff_e2, -1);
4609 aff_combination_add (&aff_e1, &aff_e2);
4611 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4614 /* Returns true if AFF1 and AFF2 are identical. */
4616 static bool
4617 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4619 unsigned i;
4621 if (aff1->n != aff2->n)
4622 return false;
4624 for (i = 0; i < aff1->n; i++)
4626 if (aff1->elts[i].coef != aff2->elts[i].coef)
4627 return false;
4629 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4630 return false;
4632 return true;
4635 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4637 static int
4638 get_expr_id (struct ivopts_data *data, tree expr)
4640 struct iv_inv_expr_ent ent;
4641 struct iv_inv_expr_ent **slot;
4643 ent.expr = expr;
4644 ent.hash = iterative_hash_expr (expr, 0);
4645 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4646 if (*slot)
4647 return (*slot)->id;
4649 *slot = XNEW (struct iv_inv_expr_ent);
4650 (*slot)->expr = expr;
4651 (*slot)->hash = ent.hash;
4652 (*slot)->id = data->inv_expr_id++;
4653 return (*slot)->id;
4656 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4657 requires a new compiler generated temporary. Returns -1 otherwise.
4658 ADDRESS_P is a flag indicating if the expression is for address
4659 computation. */
4661 static int
4662 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4663 tree cbase, HOST_WIDE_INT ratio,
4664 bool address_p)
4666 aff_tree ubase_aff, cbase_aff;
4667 tree expr, ub, cb;
4669 STRIP_NOPS (ubase);
4670 STRIP_NOPS (cbase);
4671 ub = ubase;
4672 cb = cbase;
4674 if ((TREE_CODE (ubase) == INTEGER_CST)
4675 && (TREE_CODE (cbase) == INTEGER_CST))
4676 return -1;
4678 /* Strips the constant part. */
4679 if (TREE_CODE (ubase) == PLUS_EXPR
4680 || TREE_CODE (ubase) == MINUS_EXPR
4681 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4683 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4684 ubase = TREE_OPERAND (ubase, 0);
4687 /* Strips the constant part. */
4688 if (TREE_CODE (cbase) == PLUS_EXPR
4689 || TREE_CODE (cbase) == MINUS_EXPR
4690 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4692 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4693 cbase = TREE_OPERAND (cbase, 0);
4696 if (address_p)
4698 if (((TREE_CODE (ubase) == SSA_NAME)
4699 || (TREE_CODE (ubase) == ADDR_EXPR
4700 && is_gimple_min_invariant (ubase)))
4701 && (TREE_CODE (cbase) == INTEGER_CST))
4702 return -1;
4704 if (((TREE_CODE (cbase) == SSA_NAME)
4705 || (TREE_CODE (cbase) == ADDR_EXPR
4706 && is_gimple_min_invariant (cbase)))
4707 && (TREE_CODE (ubase) == INTEGER_CST))
4708 return -1;
4711 if (ratio == 1)
4713 if (operand_equal_p (ubase, cbase, 0))
4714 return -1;
4716 if (TREE_CODE (ubase) == ADDR_EXPR
4717 && TREE_CODE (cbase) == ADDR_EXPR)
4719 tree usym, csym;
4721 usym = TREE_OPERAND (ubase, 0);
4722 csym = TREE_OPERAND (cbase, 0);
4723 if (TREE_CODE (usym) == ARRAY_REF)
4725 tree ind = TREE_OPERAND (usym, 1);
4726 if (TREE_CODE (ind) == INTEGER_CST
4727 && tree_fits_shwi_p (ind)
4728 && tree_to_shwi (ind) == 0)
4729 usym = TREE_OPERAND (usym, 0);
4731 if (TREE_CODE (csym) == ARRAY_REF)
4733 tree ind = TREE_OPERAND (csym, 1);
4734 if (TREE_CODE (ind) == INTEGER_CST
4735 && tree_fits_shwi_p (ind)
4736 && tree_to_shwi (ind) == 0)
4737 csym = TREE_OPERAND (csym, 0);
4739 if (operand_equal_p (usym, csym, 0))
4740 return -1;
4742 /* Now do more complex comparison */
4743 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4744 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4745 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4746 return -1;
4749 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4750 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4752 aff_combination_scale (&cbase_aff, -1 * ratio);
4753 aff_combination_add (&ubase_aff, &cbase_aff);
4754 expr = aff_combination_to_tree (&ubase_aff);
4755 return get_expr_id (data, expr);
4760 /* Determines the cost of the computation by that USE is expressed
4761 from induction variable CAND. If ADDRESS_P is true, we just need
4762 to create an address from it, otherwise we want to get it into
4763 register. A set of invariants we depend on is stored in
4764 DEPENDS_ON. AT is the statement at that the value is computed.
4765 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4766 addressing is likely. */
4768 static comp_cost
4769 get_computation_cost_at (struct ivopts_data *data,
4770 struct iv_use *use, struct iv_cand *cand,
4771 bool address_p, bitmap *depends_on, gimple *at,
4772 bool *can_autoinc,
4773 int *inv_expr_id)
4775 tree ubase = use->iv->base, ustep = use->iv->step;
4776 tree cbase, cstep;
4777 tree utype = TREE_TYPE (ubase), ctype;
4778 unsigned HOST_WIDE_INT cstepi, offset = 0;
4779 HOST_WIDE_INT ratio, aratio;
4780 bool var_present, symbol_present, stmt_is_after_inc;
4781 comp_cost cost;
4782 widest_int rat;
4783 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4784 machine_mode mem_mode = (address_p
4785 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4786 : VOIDmode);
4788 if (depends_on)
4789 *depends_on = NULL;
4791 /* Only consider real candidates. */
4792 if (!cand->iv)
4793 return infinite_cost;
4795 cbase = cand->iv->base;
4796 cstep = cand->iv->step;
4797 ctype = TREE_TYPE (cbase);
4799 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4801 /* We do not have a precision to express the values of use. */
4802 return infinite_cost;
4805 if (address_p
4806 || (use->iv->base_object
4807 && cand->iv->base_object
4808 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4809 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4811 /* Do not try to express address of an object with computation based
4812 on address of a different object. This may cause problems in rtl
4813 level alias analysis (that does not expect this to be happening,
4814 as this is illegal in C), and would be unlikely to be useful
4815 anyway. */
4816 if (use->iv->base_object
4817 && cand->iv->base_object
4818 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4819 return infinite_cost;
4822 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4824 /* TODO -- add direct handling of this case. */
4825 goto fallback;
4828 /* CSTEPI is removed from the offset in case statement is after the
4829 increment. If the step is not constant, we use zero instead.
4830 This is a bit imprecise (there is the extra addition), but
4831 redundancy elimination is likely to transform the code so that
4832 it uses value of the variable before increment anyway,
4833 so it is not that much unrealistic. */
4834 if (cst_and_fits_in_hwi (cstep))
4835 cstepi = int_cst_value (cstep);
4836 else
4837 cstepi = 0;
4839 if (!constant_multiple_of (ustep, cstep, &rat))
4840 return infinite_cost;
4842 if (wi::fits_shwi_p (rat))
4843 ratio = rat.to_shwi ();
4844 else
4845 return infinite_cost;
4847 STRIP_NOPS (cbase);
4848 ctype = TREE_TYPE (cbase);
4850 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4852 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4853 or ratio == 1, it is better to handle this like
4855 ubase - ratio * cbase + ratio * var
4857 (also holds in the case ratio == -1, TODO. */
4859 if (cst_and_fits_in_hwi (cbase))
4861 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4862 cost = difference_cost (data,
4863 ubase, build_int_cst (utype, 0),
4864 &symbol_present, &var_present, &offset,
4865 depends_on);
4866 cost.cost /= avg_loop_niter (data->current_loop);
4868 else if (ratio == 1)
4870 tree real_cbase = cbase;
4872 /* Check to see if any adjustment is needed. */
4873 if (cstepi == 0 && stmt_is_after_inc)
4875 aff_tree real_cbase_aff;
4876 aff_tree cstep_aff;
4878 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4879 &real_cbase_aff);
4880 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4882 aff_combination_add (&real_cbase_aff, &cstep_aff);
4883 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4886 cost = difference_cost (data,
4887 ubase, real_cbase,
4888 &symbol_present, &var_present, &offset,
4889 depends_on);
4890 cost.cost /= avg_loop_niter (data->current_loop);
4892 else if (address_p
4893 && !POINTER_TYPE_P (ctype)
4894 && multiplier_allowed_in_address_p
4895 (ratio, mem_mode,
4896 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4898 if (cstepi == 0 && stmt_is_after_inc)
4900 if (POINTER_TYPE_P (ctype))
4901 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4902 else
4903 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4905 cbase
4906 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4907 cost = difference_cost (data,
4908 ubase, cbase,
4909 &symbol_present, &var_present, &offset,
4910 depends_on);
4911 cost.cost /= avg_loop_niter (data->current_loop);
4913 else
4915 cost = force_var_cost (data, cbase, depends_on);
4916 cost = add_costs (cost,
4917 difference_cost (data,
4918 ubase, build_int_cst (utype, 0),
4919 &symbol_present, &var_present,
4920 &offset, depends_on));
4921 cost.cost /= avg_loop_niter (data->current_loop);
4922 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4925 /* Set of invariants depended on by sub use has already been computed
4926 for the first use in the group. */
4927 if (use->sub_id)
4929 cost.cost = 0;
4930 if (depends_on && *depends_on)
4931 bitmap_clear (*depends_on);
4933 else if (inv_expr_id)
4935 *inv_expr_id =
4936 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4937 /* Clear depends on. */
4938 if (*inv_expr_id != -1 && depends_on && *depends_on)
4939 bitmap_clear (*depends_on);
4942 /* If we are after the increment, the value of the candidate is higher by
4943 one iteration. */
4944 if (stmt_is_after_inc)
4945 offset -= ratio * cstepi;
4947 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4948 (symbol/var1/const parts may be omitted). If we are looking for an
4949 address, find the cost of addressing this. */
4950 if (address_p)
4951 return add_costs (cost,
4952 get_address_cost (symbol_present, var_present,
4953 offset, ratio, cstepi,
4954 mem_mode,
4955 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4956 speed, stmt_is_after_inc,
4957 can_autoinc));
4959 /* Otherwise estimate the costs for computing the expression. */
4960 if (!symbol_present && !var_present && !offset)
4962 if (ratio != 1)
4963 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4964 return cost;
4967 /* Symbol + offset should be compile-time computable so consider that they
4968 are added once to the variable, if present. */
4969 if (var_present && (symbol_present || offset))
4970 cost.cost += adjust_setup_cost (data,
4971 add_cost (speed, TYPE_MODE (ctype)));
4973 /* Having offset does not affect runtime cost in case it is added to
4974 symbol, but it increases complexity. */
4975 if (offset)
4976 cost.complexity++;
4978 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4980 aratio = ratio > 0 ? ratio : -ratio;
4981 if (aratio != 1)
4982 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4983 return cost;
4985 fallback:
4986 if (can_autoinc)
4987 *can_autoinc = false;
4990 /* Just get the expression, expand it and measure the cost. */
4991 tree comp = get_computation_at (data->current_loop, use, cand, at);
4993 if (!comp)
4994 return infinite_cost;
4996 if (address_p)
4997 comp = build_simple_mem_ref (comp);
4999 return new_cost (computation_cost (comp, speed), 0);
5003 /* Determines the cost of the computation by that USE is expressed
5004 from induction variable CAND. If ADDRESS_P is true, we just need
5005 to create an address from it, otherwise we want to get it into
5006 register. A set of invariants we depend on is stored in
5007 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5008 autoinc addressing is likely. */
5010 static comp_cost
5011 get_computation_cost (struct ivopts_data *data,
5012 struct iv_use *use, struct iv_cand *cand,
5013 bool address_p, bitmap *depends_on,
5014 bool *can_autoinc, int *inv_expr_id)
5016 return get_computation_cost_at (data,
5017 use, cand, address_p, depends_on, use->stmt,
5018 can_autoinc, inv_expr_id);
5021 /* Determines cost of basing replacement of USE on CAND in a generic
5022 expression. */
5024 static bool
5025 determine_use_iv_cost_generic (struct ivopts_data *data,
5026 struct iv_use *use, struct iv_cand *cand)
5028 bitmap depends_on;
5029 comp_cost cost;
5030 int inv_expr_id = -1;
5032 /* The simple case first -- if we need to express value of the preserved
5033 original biv, the cost is 0. This also prevents us from counting the
5034 cost of increment twice -- once at this use and once in the cost of
5035 the candidate. */
5036 if (cand->pos == IP_ORIGINAL
5037 && cand->incremented_at == use->stmt)
5039 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5040 ERROR_MARK, -1);
5041 return true;
5044 cost = get_computation_cost (data, use, cand, false, &depends_on,
5045 NULL, &inv_expr_id);
5047 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5048 inv_expr_id);
5050 return !infinite_cost_p (cost);
5053 /* Determines cost of basing replacement of USE on CAND in an address. */
5055 static bool
5056 determine_use_iv_cost_address (struct ivopts_data *data,
5057 struct iv_use *use, struct iv_cand *cand)
5059 bitmap depends_on;
5060 bool can_autoinc;
5061 int inv_expr_id = -1;
5062 struct iv_use *sub_use;
5063 comp_cost sub_cost;
5064 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5065 &can_autoinc, &inv_expr_id);
5067 if (cand->ainc_use == use)
5069 if (can_autoinc)
5070 cost.cost -= cand->cost_step;
5071 /* If we generated the candidate solely for exploiting autoincrement
5072 opportunities, and it turns out it can't be used, set the cost to
5073 infinity to make sure we ignore it. */
5074 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5075 cost = infinite_cost;
5077 for (sub_use = use->next;
5078 sub_use && !infinite_cost_p (cost);
5079 sub_use = sub_use->next)
5081 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5082 &can_autoinc, NULL);
5083 cost = add_costs (cost, sub_cost);
5086 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5087 inv_expr_id);
5089 return !infinite_cost_p (cost);
5092 /* Computes value of candidate CAND at position AT in iteration NITER, and
5093 stores it to VAL. */
5095 static void
5096 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5097 aff_tree *val)
5099 aff_tree step, delta, nit;
5100 struct iv *iv = cand->iv;
5101 tree type = TREE_TYPE (iv->base);
5102 tree steptype = type;
5103 if (POINTER_TYPE_P (type))
5104 steptype = sizetype;
5105 steptype = unsigned_type_for (type);
5107 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5108 aff_combination_convert (&step, steptype);
5109 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5110 aff_combination_convert (&nit, steptype);
5111 aff_combination_mult (&nit, &step, &delta);
5112 if (stmt_after_increment (loop, cand, at))
5113 aff_combination_add (&delta, &step);
5115 tree_to_aff_combination (iv->base, type, val);
5116 if (!POINTER_TYPE_P (type))
5117 aff_combination_convert (val, steptype);
5118 aff_combination_add (val, &delta);
5121 /* Returns period of induction variable iv. */
5123 static tree
5124 iv_period (struct iv *iv)
5126 tree step = iv->step, period, type;
5127 tree pow2div;
5129 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131 type = unsigned_type_for (TREE_TYPE (step));
5132 /* Period of the iv is lcm (step, type_range)/step -1,
5133 i.e., N*type_range/step - 1. Since type range is power
5134 of two, N == (step >> num_of_ending_zeros_binary (step),
5135 so the final result is
5137 (type_range >> num_of_ending_zeros_binary (step)) - 1
5140 pow2div = num_ending_zeros (step);
5142 period = build_low_bits_mask (type,
5143 (TYPE_PRECISION (type)
5144 - tree_to_uhwi (pow2div)));
5146 return period;
5149 /* Returns the comparison operator used when eliminating the iv USE. */
5151 static enum tree_code
5152 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154 struct loop *loop = data->current_loop;
5155 basic_block ex_bb;
5156 edge exit;
5158 ex_bb = gimple_bb (use->stmt);
5159 exit = EDGE_SUCC (ex_bb, 0);
5160 if (flow_bb_inside_loop_p (loop, exit->dest))
5161 exit = EDGE_SUCC (ex_bb, 1);
5163 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5166 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5167 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5168 calculation is performed in non-wrapping type.
5170 TODO: More generally, we could test for the situation that
5171 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5172 This would require knowing the sign of OFFSET. */
5174 static bool
5175 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177 enum tree_code code;
5178 tree e1, e2;
5179 aff_tree aff_e1, aff_e2, aff_offset;
5181 if (!nowrap_type_p (TREE_TYPE (base)))
5182 return false;
5184 base = expand_simple_operations (base);
5186 if (TREE_CODE (base) == SSA_NAME)
5188 gimple *stmt = SSA_NAME_DEF_STMT (base);
5190 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5191 return false;
5193 code = gimple_assign_rhs_code (stmt);
5194 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5195 return false;
5197 e1 = gimple_assign_rhs1 (stmt);
5198 e2 = gimple_assign_rhs2 (stmt);
5200 else
5202 code = TREE_CODE (base);
5203 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5204 return false;
5205 e1 = TREE_OPERAND (base, 0);
5206 e2 = TREE_OPERAND (base, 1);
5209 /* Use affine expansion as deeper inspection to prove the equality. */
5210 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5211 &aff_e2, &data->name_expansion_cache);
5212 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5213 &aff_offset, &data->name_expansion_cache);
5214 aff_combination_scale (&aff_offset, -1);
5215 switch (code)
5217 case PLUS_EXPR:
5218 aff_combination_add (&aff_e2, &aff_offset);
5219 if (aff_combination_zero_p (&aff_e2))
5220 return true;
5222 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5223 &aff_e1, &data->name_expansion_cache);
5224 aff_combination_add (&aff_e1, &aff_offset);
5225 return aff_combination_zero_p (&aff_e1);
5227 case POINTER_PLUS_EXPR:
5228 aff_combination_add (&aff_e2, &aff_offset);
5229 return aff_combination_zero_p (&aff_e2);
5231 default:
5232 return false;
5236 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5237 comparison with CAND. NITER describes the number of iterations of
5238 the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 We aim to handle the following situation:
5242 sometype *base, *p;
5243 int a, b, i;
5245 i = a;
5246 p = p_0 = base + a;
5250 bla (*p);
5251 p++;
5252 i++;
5254 while (i < b);
5256 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5257 We aim to optimize this to
5259 p = p_0 = base + a;
5262 bla (*p);
5263 p++;
5265 while (p < p_0 - a + b);
5267 This preserves the correctness, since the pointer arithmetics does not
5268 overflow. More precisely:
5270 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5271 overflow in computing it or the values of p.
5272 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5273 overflow. To prove this, we use the fact that p_0 = base + a. */
5275 static bool
5276 iv_elimination_compare_lt (struct ivopts_data *data,
5277 struct iv_cand *cand, enum tree_code *comp_p,
5278 struct tree_niter_desc *niter)
5280 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5281 struct aff_tree nit, tmpa, tmpb;
5282 enum tree_code comp;
5283 HOST_WIDE_INT step;
5285 /* We need to know that the candidate induction variable does not overflow.
5286 While more complex analysis may be used to prove this, for now just
5287 check that the variable appears in the original program and that it
5288 is computed in a type that guarantees no overflows. */
5289 cand_type = TREE_TYPE (cand->iv->base);
5290 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5291 return false;
5293 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5294 the calculation of the BOUND could overflow, making the comparison
5295 invalid. */
5296 if (!data->loop_single_exit_p)
5297 return false;
5299 /* We need to be able to decide whether candidate is increasing or decreasing
5300 in order to choose the right comparison operator. */
5301 if (!cst_and_fits_in_hwi (cand->iv->step))
5302 return false;
5303 step = int_cst_value (cand->iv->step);
5305 /* Check that the number of iterations matches the expected pattern:
5306 a + 1 > b ? 0 : b - a - 1. */
5307 mbz = niter->may_be_zero;
5308 if (TREE_CODE (mbz) == GT_EXPR)
5310 /* Handle a + 1 > b. */
5311 tree op0 = TREE_OPERAND (mbz, 0);
5312 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 a = TREE_OPERAND (op0, 0);
5315 b = TREE_OPERAND (mbz, 1);
5317 else
5318 return false;
5320 else if (TREE_CODE (mbz) == LT_EXPR)
5322 tree op1 = TREE_OPERAND (mbz, 1);
5324 /* Handle b < a + 1. */
5325 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 a = TREE_OPERAND (op1, 0);
5328 b = TREE_OPERAND (mbz, 0);
5330 else
5331 return false;
5333 else
5334 return false;
5336 /* Expected number of iterations is B - A - 1. Check that it matches
5337 the actual number, i.e., that B - A - NITER = 1. */
5338 tree_to_aff_combination (niter->niter, nit_type, &nit);
5339 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5340 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5341 aff_combination_scale (&nit, -1);
5342 aff_combination_scale (&tmpa, -1);
5343 aff_combination_add (&tmpb, &tmpa);
5344 aff_combination_add (&tmpb, &nit);
5345 if (tmpb.n != 0 || tmpb.offset != 1)
5346 return false;
5348 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5349 overflow. */
5350 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5351 cand->iv->step,
5352 fold_convert (TREE_TYPE (cand->iv->step), a));
5353 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5354 return false;
5356 /* Determine the new comparison operator. */
5357 comp = step < 0 ? GT_EXPR : LT_EXPR;
5358 if (*comp_p == NE_EXPR)
5359 *comp_p = comp;
5360 else if (*comp_p == EQ_EXPR)
5361 *comp_p = invert_tree_comparison (comp, false);
5362 else
5363 gcc_unreachable ();
5365 return true;
5368 /* Check whether it is possible to express the condition in USE by comparison
5369 of candidate CAND. If so, store the value compared with to BOUND, and the
5370 comparison operator to COMP. */
5372 static bool
5373 may_eliminate_iv (struct ivopts_data *data,
5374 struct iv_use *use, struct iv_cand *cand, tree *bound,
5375 enum tree_code *comp)
5377 basic_block ex_bb;
5378 edge exit;
5379 tree period;
5380 struct loop *loop = data->current_loop;
5381 aff_tree bnd;
5382 struct tree_niter_desc *desc = NULL;
5384 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5385 return false;
5387 /* For now works only for exits that dominate the loop latch.
5388 TODO: extend to other conditions inside loop body. */
5389 ex_bb = gimple_bb (use->stmt);
5390 if (use->stmt != last_stmt (ex_bb)
5391 || gimple_code (use->stmt) != GIMPLE_COND
5392 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5393 return false;
5395 exit = EDGE_SUCC (ex_bb, 0);
5396 if (flow_bb_inside_loop_p (loop, exit->dest))
5397 exit = EDGE_SUCC (ex_bb, 1);
5398 if (flow_bb_inside_loop_p (loop, exit->dest))
5399 return false;
5401 desc = niter_for_exit (data, exit);
5402 if (!desc)
5403 return false;
5405 /* Determine whether we can use the variable to test the exit condition.
5406 This is the case iff the period of the induction variable is greater
5407 than the number of iterations for which the exit condition is true. */
5408 period = iv_period (cand->iv);
5410 /* If the number of iterations is constant, compare against it directly. */
5411 if (TREE_CODE (desc->niter) == INTEGER_CST)
5413 /* See cand_value_at. */
5414 if (stmt_after_increment (loop, cand, use->stmt))
5416 if (!tree_int_cst_lt (desc->niter, period))
5417 return false;
5419 else
5421 if (tree_int_cst_lt (period, desc->niter))
5422 return false;
5426 /* If not, and if this is the only possible exit of the loop, see whether
5427 we can get a conservative estimate on the number of iterations of the
5428 entire loop and compare against that instead. */
5429 else
5431 widest_int period_value, max_niter;
5433 max_niter = desc->max;
5434 if (stmt_after_increment (loop, cand, use->stmt))
5435 max_niter += 1;
5436 period_value = wi::to_widest (period);
5437 if (wi::gtu_p (max_niter, period_value))
5439 /* See if we can take advantage of inferred loop bound information. */
5440 if (data->loop_single_exit_p)
5442 if (!max_loop_iterations (loop, &max_niter))
5443 return false;
5444 /* The loop bound is already adjusted by adding 1. */
5445 if (wi::gtu_p (max_niter, period_value))
5446 return false;
5448 else
5449 return false;
5453 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5455 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5456 aff_combination_to_tree (&bnd));
5457 *comp = iv_elimination_compare (data, use);
5459 /* It is unlikely that computing the number of iterations using division
5460 would be more profitable than keeping the original induction variable. */
5461 if (expression_expensive_p (*bound))
5462 return false;
5464 /* Sometimes, it is possible to handle the situation that the number of
5465 iterations may be zero unless additional assumtions by using <
5466 instead of != in the exit condition.
5468 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5469 base the exit condition on it. However, that is often too
5470 expensive. */
5471 if (!integer_zerop (desc->may_be_zero))
5472 return iv_elimination_compare_lt (data, cand, comp, desc);
5474 return true;
5477 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5478 be copied, if it is used in the loop body and DATA->body_includes_call. */
5480 static int
5481 parm_decl_cost (struct ivopts_data *data, tree bound)
5483 tree sbound = bound;
5484 STRIP_NOPS (sbound);
5486 if (TREE_CODE (sbound) == SSA_NAME
5487 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5488 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5489 && data->body_includes_call)
5490 return COSTS_N_INSNS (1);
5492 return 0;
5495 /* Determines cost of basing replacement of USE on CAND in a condition. */
5497 static bool
5498 determine_use_iv_cost_condition (struct ivopts_data *data,
5499 struct iv_use *use, struct iv_cand *cand)
5501 tree bound = NULL_TREE;
5502 struct iv *cmp_iv;
5503 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5504 comp_cost elim_cost, express_cost, cost, bound_cost;
5505 bool ok;
5506 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5507 tree *control_var, *bound_cst;
5508 enum tree_code comp = ERROR_MARK;
5510 /* Only consider real candidates. */
5511 if (!cand->iv)
5513 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5514 ERROR_MARK, -1);
5515 return false;
5518 /* Try iv elimination. */
5519 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5521 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5522 if (elim_cost.cost == 0)
5523 elim_cost.cost = parm_decl_cost (data, bound);
5524 else if (TREE_CODE (bound) == INTEGER_CST)
5525 elim_cost.cost = 0;
5526 /* If we replace a loop condition 'i < n' with 'p < base + n',
5527 depends_on_elim will have 'base' and 'n' set, which implies
5528 that both 'base' and 'n' will be live during the loop. More likely,
5529 'base + n' will be loop invariant, resulting in only one live value
5530 during the loop. So in that case we clear depends_on_elim and set
5531 elim_inv_expr_id instead. */
5532 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5534 elim_inv_expr_id = get_expr_id (data, bound);
5535 bitmap_clear (depends_on_elim);
5537 /* The bound is a loop invariant, so it will be only computed
5538 once. */
5539 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5541 else
5542 elim_cost = infinite_cost;
5544 /* Try expressing the original giv. If it is compared with an invariant,
5545 note that we cannot get rid of it. */
5546 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5547 NULL, &cmp_iv);
5548 gcc_assert (ok);
5550 /* When the condition is a comparison of the candidate IV against
5551 zero, prefer this IV.
5553 TODO: The constant that we're subtracting from the cost should
5554 be target-dependent. This information should be added to the
5555 target costs for each backend. */
5556 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5557 && integer_zerop (*bound_cst)
5558 && (operand_equal_p (*control_var, cand->var_after, 0)
5559 || operand_equal_p (*control_var, cand->var_before, 0)))
5560 elim_cost.cost -= 1;
5562 express_cost = get_computation_cost (data, use, cand, false,
5563 &depends_on_express, NULL,
5564 &express_inv_expr_id);
5565 fd_ivopts_data = data;
5566 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5568 /* Count the cost of the original bound as well. */
5569 bound_cost = force_var_cost (data, *bound_cst, NULL);
5570 if (bound_cost.cost == 0)
5571 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5572 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5573 bound_cost.cost = 0;
5574 express_cost.cost += bound_cost.cost;
5576 /* Choose the better approach, preferring the eliminated IV. */
5577 if (compare_costs (elim_cost, express_cost) <= 0)
5579 cost = elim_cost;
5580 depends_on = depends_on_elim;
5581 depends_on_elim = NULL;
5582 inv_expr_id = elim_inv_expr_id;
5584 else
5586 cost = express_cost;
5587 depends_on = depends_on_express;
5588 depends_on_express = NULL;
5589 bound = NULL_TREE;
5590 comp = ERROR_MARK;
5591 inv_expr_id = express_inv_expr_id;
5594 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5596 if (depends_on_elim)
5597 BITMAP_FREE (depends_on_elim);
5598 if (depends_on_express)
5599 BITMAP_FREE (depends_on_express);
5601 return !infinite_cost_p (cost);
5604 /* Determines cost of basing replacement of USE on CAND. Returns false
5605 if USE cannot be based on CAND. */
5607 static bool
5608 determine_use_iv_cost (struct ivopts_data *data,
5609 struct iv_use *use, struct iv_cand *cand)
5611 switch (use->type)
5613 case USE_NONLINEAR_EXPR:
5614 return determine_use_iv_cost_generic (data, use, cand);
5616 case USE_ADDRESS:
5617 return determine_use_iv_cost_address (data, use, cand);
5619 case USE_COMPARE:
5620 return determine_use_iv_cost_condition (data, use, cand);
5622 default:
5623 gcc_unreachable ();
5627 /* Return true if get_computation_cost indicates that autoincrement is
5628 a possibility for the pair of USE and CAND, false otherwise. */
5630 static bool
5631 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5632 struct iv_cand *cand)
5634 bitmap depends_on;
5635 bool can_autoinc;
5636 comp_cost cost;
5638 if (use->type != USE_ADDRESS)
5639 return false;
5641 cost = get_computation_cost (data, use, cand, true, &depends_on,
5642 &can_autoinc, NULL);
5644 BITMAP_FREE (depends_on);
5646 return !infinite_cost_p (cost) && can_autoinc;
5649 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5650 use that allows autoincrement, and set their AINC_USE if possible. */
5652 static void
5653 set_autoinc_for_original_candidates (struct ivopts_data *data)
5655 unsigned i, j;
5657 for (i = 0; i < n_iv_cands (data); i++)
5659 struct iv_cand *cand = iv_cand (data, i);
5660 struct iv_use *closest_before = NULL;
5661 struct iv_use *closest_after = NULL;
5662 if (cand->pos != IP_ORIGINAL)
5663 continue;
5665 for (j = 0; j < n_iv_uses (data); j++)
5667 struct iv_use *use = iv_use (data, j);
5668 unsigned uid = gimple_uid (use->stmt);
5670 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5671 continue;
5673 if (uid < gimple_uid (cand->incremented_at)
5674 && (closest_before == NULL
5675 || uid > gimple_uid (closest_before->stmt)))
5676 closest_before = use;
5678 if (uid > gimple_uid (cand->incremented_at)
5679 && (closest_after == NULL
5680 || uid < gimple_uid (closest_after->stmt)))
5681 closest_after = use;
5684 if (closest_before != NULL
5685 && autoinc_possible_for_pair (data, closest_before, cand))
5686 cand->ainc_use = closest_before;
5687 else if (closest_after != NULL
5688 && autoinc_possible_for_pair (data, closest_after, cand))
5689 cand->ainc_use = closest_after;
5693 /* Finds the candidates for the induction variables. */
5695 static void
5696 find_iv_candidates (struct ivopts_data *data)
5698 /* Add commonly used ivs. */
5699 add_standard_iv_candidates (data);
5701 /* Add old induction variables. */
5702 add_iv_candidate_for_bivs (data);
5704 /* Add induction variables derived from uses. */
5705 add_iv_candidate_for_uses (data);
5707 set_autoinc_for_original_candidates (data);
5709 /* Record the important candidates. */
5710 record_important_candidates (data);
5713 /* Determines costs of basing the use of the iv on an iv candidate. */
5715 static void
5716 determine_use_iv_costs (struct ivopts_data *data)
5718 unsigned i, j;
5719 struct iv_use *use;
5720 struct iv_cand *cand;
5721 bitmap to_clear = BITMAP_ALLOC (NULL);
5723 alloc_use_cost_map (data);
5725 for (i = 0; i < n_iv_uses (data); i++)
5727 use = iv_use (data, i);
5729 if (data->consider_all_candidates)
5731 for (j = 0; j < n_iv_cands (data); j++)
5733 cand = iv_cand (data, j);
5734 determine_use_iv_cost (data, use, cand);
5737 else
5739 bitmap_iterator bi;
5741 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5743 cand = iv_cand (data, j);
5744 if (!determine_use_iv_cost (data, use, cand))
5745 bitmap_set_bit (to_clear, j);
5748 /* Remove the candidates for that the cost is infinite from
5749 the list of related candidates. */
5750 bitmap_and_compl_into (use->related_cands, to_clear);
5751 bitmap_clear (to_clear);
5755 BITMAP_FREE (to_clear);
5757 if (dump_file && (dump_flags & TDF_DETAILS))
5759 fprintf (dump_file, "Use-candidate costs:\n");
5761 for (i = 0; i < n_iv_uses (data); i++)
5763 use = iv_use (data, i);
5765 fprintf (dump_file, "Use %d:\n", i);
5766 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5767 for (j = 0; j < use->n_map_members; j++)
5769 if (!use->cost_map[j].cand
5770 || infinite_cost_p (use->cost_map[j].cost))
5771 continue;
5773 fprintf (dump_file, " %d\t%d\t%d\t",
5774 use->cost_map[j].cand->id,
5775 use->cost_map[j].cost.cost,
5776 use->cost_map[j].cost.complexity);
5777 if (use->cost_map[j].depends_on)
5778 bitmap_print (dump_file,
5779 use->cost_map[j].depends_on, "","");
5780 if (use->cost_map[j].inv_expr_id != -1)
5781 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5782 fprintf (dump_file, "\n");
5785 fprintf (dump_file, "\n");
5787 fprintf (dump_file, "\n");
5791 /* Determines cost of the candidate CAND. */
5793 static void
5794 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5796 comp_cost cost_base;
5797 unsigned cost, cost_step;
5798 tree base;
5800 if (!cand->iv)
5802 cand->cost = 0;
5803 return;
5806 /* There are two costs associated with the candidate -- its increment
5807 and its initialization. The second is almost negligible for any loop
5808 that rolls enough, so we take it just very little into account. */
5810 base = cand->iv->base;
5811 cost_base = force_var_cost (data, base, NULL);
5812 /* It will be exceptional that the iv register happens to be initialized with
5813 the proper value at no cost. In general, there will at least be a regcopy
5814 or a const set. */
5815 if (cost_base.cost == 0)
5816 cost_base.cost = COSTS_N_INSNS (1);
5817 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5819 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5821 /* Prefer the original ivs unless we may gain something by replacing it.
5822 The reason is to make debugging simpler; so this is not relevant for
5823 artificial ivs created by other optimization passes. */
5824 if (cand->pos != IP_ORIGINAL
5825 || !SSA_NAME_VAR (cand->var_before)
5826 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5827 cost++;
5829 /* Prefer not to insert statements into latch unless there are some
5830 already (so that we do not create unnecessary jumps). */
5831 if (cand->pos == IP_END
5832 && empty_block_p (ip_end_pos (data->current_loop)))
5833 cost++;
5835 cand->cost = cost;
5836 cand->cost_step = cost_step;
5839 /* Determines costs of computation of the candidates. */
5841 static void
5842 determine_iv_costs (struct ivopts_data *data)
5844 unsigned i;
5846 if (dump_file && (dump_flags & TDF_DETAILS))
5848 fprintf (dump_file, "Candidate costs:\n");
5849 fprintf (dump_file, " cand\tcost\n");
5852 for (i = 0; i < n_iv_cands (data); i++)
5854 struct iv_cand *cand = iv_cand (data, i);
5856 determine_iv_cost (data, cand);
5858 if (dump_file && (dump_flags & TDF_DETAILS))
5859 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5862 if (dump_file && (dump_flags & TDF_DETAILS))
5863 fprintf (dump_file, "\n");
5866 /* Calculates cost for having SIZE induction variables. */
5868 static unsigned
5869 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5871 /* We add size to the cost, so that we prefer eliminating ivs
5872 if possible. */
5873 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5874 data->body_includes_call);
5877 /* For each size of the induction variable set determine the penalty. */
5879 static void
5880 determine_set_costs (struct ivopts_data *data)
5882 unsigned j, n;
5883 gphi *phi;
5884 gphi_iterator psi;
5885 tree op;
5886 struct loop *loop = data->current_loop;
5887 bitmap_iterator bi;
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5891 fprintf (dump_file, "Global costs:\n");
5892 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5893 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5894 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5895 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5898 n = 0;
5899 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5901 phi = psi.phi ();
5902 op = PHI_RESULT (phi);
5904 if (virtual_operand_p (op))
5905 continue;
5907 if (get_iv (data, op))
5908 continue;
5910 n++;
5913 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5915 struct version_info *info = ver_info (data, j);
5917 if (info->inv_id && info->has_nonlin_use)
5918 n++;
5921 data->regs_used = n;
5922 if (dump_file && (dump_flags & TDF_DETAILS))
5923 fprintf (dump_file, " regs_used %d\n", n);
5925 if (dump_file && (dump_flags & TDF_DETAILS))
5927 fprintf (dump_file, " cost for size:\n");
5928 fprintf (dump_file, " ivs\tcost\n");
5929 for (j = 0; j <= 2 * target_avail_regs; j++)
5930 fprintf (dump_file, " %d\t%d\n", j,
5931 ivopts_global_cost_for_size (data, j));
5932 fprintf (dump_file, "\n");
5936 /* Returns true if A is a cheaper cost pair than B. */
5938 static bool
5939 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5941 int cmp;
5943 if (!a)
5944 return false;
5946 if (!b)
5947 return true;
5949 cmp = compare_costs (a->cost, b->cost);
5950 if (cmp < 0)
5951 return true;
5953 if (cmp > 0)
5954 return false;
5956 /* In case the costs are the same, prefer the cheaper candidate. */
5957 if (a->cand->cost < b->cand->cost)
5958 return true;
5960 return false;
5964 /* Returns candidate by that USE is expressed in IVS. */
5966 static struct cost_pair *
5967 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5969 return ivs->cand_for_use[use->id];
5972 /* Computes the cost field of IVS structure. */
5974 static void
5975 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5977 comp_cost cost = ivs->cand_use_cost;
5979 cost.cost += ivs->cand_cost;
5981 cost.cost += ivopts_global_cost_for_size (data,
5982 ivs->n_regs + ivs->num_used_inv_expr);
5984 ivs->cost = cost;
5987 /* Remove invariants in set INVS to set IVS. */
5989 static void
5990 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5992 bitmap_iterator bi;
5993 unsigned iid;
5995 if (!invs)
5996 return;
5998 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6000 ivs->n_invariant_uses[iid]--;
6001 if (ivs->n_invariant_uses[iid] == 0)
6002 ivs->n_regs--;
6006 /* Set USE not to be expressed by any candidate in IVS. */
6008 static void
6009 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6010 struct iv_use *use)
6012 unsigned uid = use->id, cid;
6013 struct cost_pair *cp;
6015 cp = ivs->cand_for_use[uid];
6016 if (!cp)
6017 return;
6018 cid = cp->cand->id;
6020 ivs->bad_uses++;
6021 ivs->cand_for_use[uid] = NULL;
6022 ivs->n_cand_uses[cid]--;
6024 if (ivs->n_cand_uses[cid] == 0)
6026 bitmap_clear_bit (ivs->cands, cid);
6027 /* Do not count the pseudocandidates. */
6028 if (cp->cand->iv)
6029 ivs->n_regs--;
6030 ivs->n_cands--;
6031 ivs->cand_cost -= cp->cand->cost;
6033 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6036 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6038 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6040 if (cp->inv_expr_id != -1)
6042 ivs->used_inv_expr[cp->inv_expr_id]--;
6043 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6044 ivs->num_used_inv_expr--;
6046 iv_ca_recount_cost (data, ivs);
6049 /* Add invariants in set INVS to set IVS. */
6051 static void
6052 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6054 bitmap_iterator bi;
6055 unsigned iid;
6057 if (!invs)
6058 return;
6060 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6062 ivs->n_invariant_uses[iid]++;
6063 if (ivs->n_invariant_uses[iid] == 1)
6064 ivs->n_regs++;
6068 /* Set cost pair for USE in set IVS to CP. */
6070 static void
6071 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6072 struct iv_use *use, struct cost_pair *cp)
6074 unsigned uid = use->id, cid;
6076 if (ivs->cand_for_use[uid] == cp)
6077 return;
6079 if (ivs->cand_for_use[uid])
6080 iv_ca_set_no_cp (data, ivs, use);
6082 if (cp)
6084 cid = cp->cand->id;
6086 ivs->bad_uses--;
6087 ivs->cand_for_use[uid] = cp;
6088 ivs->n_cand_uses[cid]++;
6089 if (ivs->n_cand_uses[cid] == 1)
6091 bitmap_set_bit (ivs->cands, cid);
6092 /* Do not count the pseudocandidates. */
6093 if (cp->cand->iv)
6094 ivs->n_regs++;
6095 ivs->n_cands++;
6096 ivs->cand_cost += cp->cand->cost;
6098 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6101 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6102 iv_ca_set_add_invariants (ivs, cp->depends_on);
6104 if (cp->inv_expr_id != -1)
6106 ivs->used_inv_expr[cp->inv_expr_id]++;
6107 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6108 ivs->num_used_inv_expr++;
6110 iv_ca_recount_cost (data, ivs);
6114 /* Extend set IVS by expressing USE by some of the candidates in it
6115 if possible. Consider all important candidates if candidates in
6116 set IVS don't give any result. */
6118 static void
6119 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6120 struct iv_use *use)
6122 struct cost_pair *best_cp = NULL, *cp;
6123 bitmap_iterator bi;
6124 unsigned i;
6125 struct iv_cand *cand;
6127 gcc_assert (ivs->upto >= use->id);
6128 ivs->upto++;
6129 ivs->bad_uses++;
6131 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6133 cand = iv_cand (data, i);
6134 cp = get_use_iv_cost (data, use, cand);
6135 if (cheaper_cost_pair (cp, best_cp))
6136 best_cp = cp;
6139 if (best_cp == NULL)
6141 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6143 cand = iv_cand (data, i);
6144 cp = get_use_iv_cost (data, use, cand);
6145 if (cheaper_cost_pair (cp, best_cp))
6146 best_cp = cp;
6150 iv_ca_set_cp (data, ivs, use, best_cp);
6153 /* Get cost for assignment IVS. */
6155 static comp_cost
6156 iv_ca_cost (struct iv_ca *ivs)
6158 /* This was a conditional expression but it triggered a bug in
6159 Sun C 5.5. */
6160 if (ivs->bad_uses)
6161 return infinite_cost;
6162 else
6163 return ivs->cost;
6166 /* Returns true if all dependences of CP are among invariants in IVS. */
6168 static bool
6169 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6171 unsigned i;
6172 bitmap_iterator bi;
6174 if (!cp->depends_on)
6175 return true;
6177 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6179 if (ivs->n_invariant_uses[i] == 0)
6180 return false;
6183 return true;
6186 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6187 it before NEXT_CHANGE. */
6189 static struct iv_ca_delta *
6190 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6191 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6193 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6195 change->use = use;
6196 change->old_cp = old_cp;
6197 change->new_cp = new_cp;
6198 change->next_change = next_change;
6200 return change;
6203 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6204 are rewritten. */
6206 static struct iv_ca_delta *
6207 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6209 struct iv_ca_delta *last;
6211 if (!l2)
6212 return l1;
6214 if (!l1)
6215 return l2;
6217 for (last = l1; last->next_change; last = last->next_change)
6218 continue;
6219 last->next_change = l2;
6221 return l1;
6224 /* Reverse the list of changes DELTA, forming the inverse to it. */
6226 static struct iv_ca_delta *
6227 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6229 struct iv_ca_delta *act, *next, *prev = NULL;
6231 for (act = delta; act; act = next)
6233 next = act->next_change;
6234 act->next_change = prev;
6235 prev = act;
6237 std::swap (act->old_cp, act->new_cp);
6240 return prev;
6243 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6244 reverted instead. */
6246 static void
6247 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6248 struct iv_ca_delta *delta, bool forward)
6250 struct cost_pair *from, *to;
6251 struct iv_ca_delta *act;
6253 if (!forward)
6254 delta = iv_ca_delta_reverse (delta);
6256 for (act = delta; act; act = act->next_change)
6258 from = act->old_cp;
6259 to = act->new_cp;
6260 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6261 iv_ca_set_cp (data, ivs, act->use, to);
6264 if (!forward)
6265 iv_ca_delta_reverse (delta);
6268 /* Returns true if CAND is used in IVS. */
6270 static bool
6271 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6273 return ivs->n_cand_uses[cand->id] > 0;
6276 /* Returns number of induction variable candidates in the set IVS. */
6278 static unsigned
6279 iv_ca_n_cands (struct iv_ca *ivs)
6281 return ivs->n_cands;
6284 /* Free the list of changes DELTA. */
6286 static void
6287 iv_ca_delta_free (struct iv_ca_delta **delta)
6289 struct iv_ca_delta *act, *next;
6291 for (act = *delta; act; act = next)
6293 next = act->next_change;
6294 free (act);
6297 *delta = NULL;
6300 /* Allocates new iv candidates assignment. */
6302 static struct iv_ca *
6303 iv_ca_new (struct ivopts_data *data)
6305 struct iv_ca *nw = XNEW (struct iv_ca);
6307 nw->upto = 0;
6308 nw->bad_uses = 0;
6309 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6310 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6311 nw->cands = BITMAP_ALLOC (NULL);
6312 nw->n_cands = 0;
6313 nw->n_regs = 0;
6314 nw->cand_use_cost = no_cost;
6315 nw->cand_cost = 0;
6316 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6317 nw->cost = no_cost;
6318 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6319 nw->num_used_inv_expr = 0;
6321 return nw;
6324 /* Free memory occupied by the set IVS. */
6326 static void
6327 iv_ca_free (struct iv_ca **ivs)
6329 free ((*ivs)->cand_for_use);
6330 free ((*ivs)->n_cand_uses);
6331 BITMAP_FREE ((*ivs)->cands);
6332 free ((*ivs)->n_invariant_uses);
6333 free ((*ivs)->used_inv_expr);
6334 free (*ivs);
6335 *ivs = NULL;
6338 /* Dumps IVS to FILE. */
6340 static void
6341 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6343 const char *pref = " invariants ";
6344 unsigned i;
6345 comp_cost cost = iv_ca_cost (ivs);
6347 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6348 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6349 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6350 bitmap_print (file, ivs->cands, " candidates: ","\n");
6352 for (i = 0; i < ivs->upto; i++)
6354 struct iv_use *use = iv_use (data, i);
6355 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6356 if (cp)
6357 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6358 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6359 else
6360 fprintf (file, " use:%d --> ??\n", use->id);
6363 for (i = 1; i <= data->max_inv_id; i++)
6364 if (ivs->n_invariant_uses[i])
6366 fprintf (file, "%s%d", pref, i);
6367 pref = ", ";
6369 fprintf (file, "\n\n");
6372 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6373 new set, and store differences in DELTA. Number of induction variables
6374 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6375 the function will try to find a solution with mimimal iv candidates. */
6377 static comp_cost
6378 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6379 struct iv_cand *cand, struct iv_ca_delta **delta,
6380 unsigned *n_ivs, bool min_ncand)
6382 unsigned i;
6383 comp_cost cost;
6384 struct iv_use *use;
6385 struct cost_pair *old_cp, *new_cp;
6387 *delta = NULL;
6388 for (i = 0; i < ivs->upto; i++)
6390 use = iv_use (data, i);
6391 old_cp = iv_ca_cand_for_use (ivs, use);
6393 if (old_cp
6394 && old_cp->cand == cand)
6395 continue;
6397 new_cp = get_use_iv_cost (data, use, cand);
6398 if (!new_cp)
6399 continue;
6401 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6402 continue;
6404 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6405 continue;
6407 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6410 iv_ca_delta_commit (data, ivs, *delta, true);
6411 cost = iv_ca_cost (ivs);
6412 if (n_ivs)
6413 *n_ivs = iv_ca_n_cands (ivs);
6414 iv_ca_delta_commit (data, ivs, *delta, false);
6416 return cost;
6419 /* Try narrowing set IVS by removing CAND. Return the cost of
6420 the new set and store the differences in DELTA. START is
6421 the candidate with which we start narrowing. */
6423 static comp_cost
6424 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6425 struct iv_cand *cand, struct iv_cand *start,
6426 struct iv_ca_delta **delta)
6428 unsigned i, ci;
6429 struct iv_use *use;
6430 struct cost_pair *old_cp, *new_cp, *cp;
6431 bitmap_iterator bi;
6432 struct iv_cand *cnd;
6433 comp_cost cost, best_cost, acost;
6435 *delta = NULL;
6436 for (i = 0; i < n_iv_uses (data); i++)
6438 use = iv_use (data, i);
6440 old_cp = iv_ca_cand_for_use (ivs, use);
6441 if (old_cp->cand != cand)
6442 continue;
6444 best_cost = iv_ca_cost (ivs);
6445 /* Start narrowing with START. */
6446 new_cp = get_use_iv_cost (data, use, start);
6448 if (data->consider_all_candidates)
6450 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6452 if (ci == cand->id || (start && ci == start->id))
6453 continue;
6455 cnd = iv_cand (data, ci);
6457 cp = get_use_iv_cost (data, use, cnd);
6458 if (!cp)
6459 continue;
6461 iv_ca_set_cp (data, ivs, use, cp);
6462 acost = iv_ca_cost (ivs);
6464 if (compare_costs (acost, best_cost) < 0)
6466 best_cost = acost;
6467 new_cp = cp;
6471 else
6473 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6475 if (ci == cand->id || (start && ci == start->id))
6476 continue;
6478 cnd = iv_cand (data, ci);
6480 cp = get_use_iv_cost (data, use, cnd);
6481 if (!cp)
6482 continue;
6484 iv_ca_set_cp (data, ivs, use, cp);
6485 acost = iv_ca_cost (ivs);
6487 if (compare_costs (acost, best_cost) < 0)
6489 best_cost = acost;
6490 new_cp = cp;
6494 /* Restore to old cp for use. */
6495 iv_ca_set_cp (data, ivs, use, old_cp);
6497 if (!new_cp)
6499 iv_ca_delta_free (delta);
6500 return infinite_cost;
6503 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6506 iv_ca_delta_commit (data, ivs, *delta, true);
6507 cost = iv_ca_cost (ivs);
6508 iv_ca_delta_commit (data, ivs, *delta, false);
6510 return cost;
6513 /* Try optimizing the set of candidates IVS by removing candidates different
6514 from to EXCEPT_CAND from it. Return cost of the new set, and store
6515 differences in DELTA. */
6517 static comp_cost
6518 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6519 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6521 bitmap_iterator bi;
6522 struct iv_ca_delta *act_delta, *best_delta;
6523 unsigned i;
6524 comp_cost best_cost, acost;
6525 struct iv_cand *cand;
6527 best_delta = NULL;
6528 best_cost = iv_ca_cost (ivs);
6530 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6532 cand = iv_cand (data, i);
6534 if (cand == except_cand)
6535 continue;
6537 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6539 if (compare_costs (acost, best_cost) < 0)
6541 best_cost = acost;
6542 iv_ca_delta_free (&best_delta);
6543 best_delta = act_delta;
6545 else
6546 iv_ca_delta_free (&act_delta);
6549 if (!best_delta)
6551 *delta = NULL;
6552 return best_cost;
6555 /* Recurse to possibly remove other unnecessary ivs. */
6556 iv_ca_delta_commit (data, ivs, best_delta, true);
6557 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6558 iv_ca_delta_commit (data, ivs, best_delta, false);
6559 *delta = iv_ca_delta_join (best_delta, *delta);
6560 return best_cost;
6563 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6564 cheaper local cost for USE than BEST_CP. Return pointer to
6565 the corresponding cost_pair, otherwise just return BEST_CP. */
6567 static struct cost_pair*
6568 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6569 unsigned int cand_idx, struct iv_cand *old_cand,
6570 struct cost_pair *best_cp)
6572 struct iv_cand *cand;
6573 struct cost_pair *cp;
6575 gcc_assert (old_cand != NULL && best_cp != NULL);
6576 if (cand_idx == old_cand->id)
6577 return best_cp;
6579 cand = iv_cand (data, cand_idx);
6580 cp = get_use_iv_cost (data, use, cand);
6581 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6582 return cp;
6584 return best_cp;
6587 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6588 which are used by more than one iv uses. For each of those candidates,
6589 this function tries to represent iv uses under that candidate using
6590 other ones with lower local cost, then tries to prune the new set.
6591 If the new set has lower cost, It returns the new cost after recording
6592 candidate replacement in list DELTA. */
6594 static comp_cost
6595 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6596 struct iv_ca_delta **delta)
6598 bitmap_iterator bi, bj;
6599 unsigned int i, j, k;
6600 struct iv_use *use;
6601 struct iv_cand *cand;
6602 comp_cost orig_cost, acost;
6603 struct iv_ca_delta *act_delta, *tmp_delta;
6604 struct cost_pair *old_cp, *best_cp = NULL;
6606 *delta = NULL;
6607 orig_cost = iv_ca_cost (ivs);
6609 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6611 if (ivs->n_cand_uses[i] == 1
6612 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6613 continue;
6615 cand = iv_cand (data, i);
6617 act_delta = NULL;
6618 /* Represent uses under current candidate using other ones with
6619 lower local cost. */
6620 for (j = 0; j < ivs->upto; j++)
6622 use = iv_use (data, j);
6623 old_cp = iv_ca_cand_for_use (ivs, use);
6625 if (old_cp->cand != cand)
6626 continue;
6628 best_cp = old_cp;
6629 if (data->consider_all_candidates)
6630 for (k = 0; k < n_iv_cands (data); k++)
6631 best_cp = cheaper_cost_with_cand (data, use, k,
6632 old_cp->cand, best_cp);
6633 else
6634 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6635 best_cp = cheaper_cost_with_cand (data, use, k,
6636 old_cp->cand, best_cp);
6638 if (best_cp == old_cp)
6639 continue;
6641 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6643 /* No need for further prune. */
6644 if (!act_delta)
6645 continue;
6647 /* Prune the new candidate set. */
6648 iv_ca_delta_commit (data, ivs, act_delta, true);
6649 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6650 iv_ca_delta_commit (data, ivs, act_delta, false);
6651 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6653 if (compare_costs (acost, orig_cost) < 0)
6655 *delta = act_delta;
6656 return acost;
6658 else
6659 iv_ca_delta_free (&act_delta);
6662 return orig_cost;
6665 /* Tries to extend the sets IVS in the best possible way in order
6666 to express the USE. If ORIGINALP is true, prefer candidates from
6667 the original set of IVs, otherwise favor important candidates not
6668 based on any memory object. */
6670 static bool
6671 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6672 struct iv_use *use, bool originalp)
6674 comp_cost best_cost, act_cost;
6675 unsigned i;
6676 bitmap_iterator bi;
6677 struct iv_cand *cand;
6678 struct iv_ca_delta *best_delta = NULL, *act_delta;
6679 struct cost_pair *cp;
6681 iv_ca_add_use (data, ivs, use);
6682 best_cost = iv_ca_cost (ivs);
6683 cp = iv_ca_cand_for_use (ivs, use);
6684 if (cp)
6686 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6687 iv_ca_set_no_cp (data, ivs, use);
6690 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6691 first try important candidates not based on any memory object. Only if
6692 this fails, try the specific ones. Rationale -- in loops with many
6693 variables the best choice often is to use just one generic biv. If we
6694 added here many ivs specific to the uses, the optimization algorithm later
6695 would be likely to get stuck in a local minimum, thus causing us to create
6696 too many ivs. The approach from few ivs to more seems more likely to be
6697 successful -- starting from few ivs, replacing an expensive use by a
6698 specific iv should always be a win. */
6699 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6701 cand = iv_cand (data, i);
6703 if (originalp && cand->pos !=IP_ORIGINAL)
6704 continue;
6706 if (!originalp && cand->iv->base_object != NULL_TREE)
6707 continue;
6709 if (iv_ca_cand_used_p (ivs, cand))
6710 continue;
6712 cp = get_use_iv_cost (data, use, cand);
6713 if (!cp)
6714 continue;
6716 iv_ca_set_cp (data, ivs, use, cp);
6717 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6718 true);
6719 iv_ca_set_no_cp (data, ivs, use);
6720 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6722 if (compare_costs (act_cost, best_cost) < 0)
6724 best_cost = act_cost;
6726 iv_ca_delta_free (&best_delta);
6727 best_delta = act_delta;
6729 else
6730 iv_ca_delta_free (&act_delta);
6733 if (infinite_cost_p (best_cost))
6735 for (i = 0; i < use->n_map_members; i++)
6737 cp = use->cost_map + i;
6738 cand = cp->cand;
6739 if (!cand)
6740 continue;
6742 /* Already tried this. */
6743 if (cand->important)
6745 if (originalp && cand->pos == IP_ORIGINAL)
6746 continue;
6747 if (!originalp && cand->iv->base_object == NULL_TREE)
6748 continue;
6751 if (iv_ca_cand_used_p (ivs, cand))
6752 continue;
6754 act_delta = NULL;
6755 iv_ca_set_cp (data, ivs, use, cp);
6756 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6757 iv_ca_set_no_cp (data, ivs, use);
6758 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6759 cp, act_delta);
6761 if (compare_costs (act_cost, best_cost) < 0)
6763 best_cost = act_cost;
6765 if (best_delta)
6766 iv_ca_delta_free (&best_delta);
6767 best_delta = act_delta;
6769 else
6770 iv_ca_delta_free (&act_delta);
6774 iv_ca_delta_commit (data, ivs, best_delta, true);
6775 iv_ca_delta_free (&best_delta);
6777 return !infinite_cost_p (best_cost);
6780 /* Finds an initial assignment of candidates to uses. */
6782 static struct iv_ca *
6783 get_initial_solution (struct ivopts_data *data, bool originalp)
6785 struct iv_ca *ivs = iv_ca_new (data);
6786 unsigned i;
6788 for (i = 0; i < n_iv_uses (data); i++)
6789 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6791 iv_ca_free (&ivs);
6792 return NULL;
6795 return ivs;
6798 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6799 points to a bool variable, this function tries to break local
6800 optimal fixed-point by replacing candidates in IVS if it's true. */
6802 static bool
6803 try_improve_iv_set (struct ivopts_data *data,
6804 struct iv_ca *ivs, bool *try_replace_p)
6806 unsigned i, n_ivs;
6807 comp_cost acost, best_cost = iv_ca_cost (ivs);
6808 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6809 struct iv_cand *cand;
6811 /* Try extending the set of induction variables by one. */
6812 for (i = 0; i < n_iv_cands (data); i++)
6814 cand = iv_cand (data, i);
6816 if (iv_ca_cand_used_p (ivs, cand))
6817 continue;
6819 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6820 if (!act_delta)
6821 continue;
6823 /* If we successfully added the candidate and the set is small enough,
6824 try optimizing it by removing other candidates. */
6825 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6827 iv_ca_delta_commit (data, ivs, act_delta, true);
6828 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6829 iv_ca_delta_commit (data, ivs, act_delta, false);
6830 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6833 if (compare_costs (acost, best_cost) < 0)
6835 best_cost = acost;
6836 iv_ca_delta_free (&best_delta);
6837 best_delta = act_delta;
6839 else
6840 iv_ca_delta_free (&act_delta);
6843 if (!best_delta)
6845 /* Try removing the candidates from the set instead. */
6846 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6848 if (!best_delta && *try_replace_p)
6850 *try_replace_p = false;
6851 /* So far candidate selecting algorithm tends to choose fewer IVs
6852 so that it can handle cases in which loops have many variables
6853 but the best choice is often to use only one general biv. One
6854 weakness is it can't handle opposite cases, in which different
6855 candidates should be chosen with respect to each use. To solve
6856 the problem, we replace candidates in a manner described by the
6857 comments of iv_ca_replace, thus give general algorithm a chance
6858 to break local optimal fixed-point in these cases. */
6859 best_cost = iv_ca_replace (data, ivs, &best_delta);
6862 if (!best_delta)
6863 return false;
6866 iv_ca_delta_commit (data, ivs, best_delta, true);
6867 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6868 iv_ca_delta_free (&best_delta);
6869 return true;
6872 /* Attempts to find the optimal set of induction variables. We do simple
6873 greedy heuristic -- we try to replace at most one candidate in the selected
6874 solution and remove the unused ivs while this improves the cost. */
6876 static struct iv_ca *
6877 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6879 struct iv_ca *set;
6880 bool try_replace_p = true;
6882 /* Get the initial solution. */
6883 set = get_initial_solution (data, originalp);
6884 if (!set)
6886 if (dump_file && (dump_flags & TDF_DETAILS))
6887 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6888 return NULL;
6891 if (dump_file && (dump_flags & TDF_DETAILS))
6893 fprintf (dump_file, "Initial set of candidates:\n");
6894 iv_ca_dump (data, dump_file, set);
6897 while (try_improve_iv_set (data, set, &try_replace_p))
6899 if (dump_file && (dump_flags & TDF_DETAILS))
6901 fprintf (dump_file, "Improved to:\n");
6902 iv_ca_dump (data, dump_file, set);
6906 return set;
6909 static struct iv_ca *
6910 find_optimal_iv_set (struct ivopts_data *data)
6912 unsigned i;
6913 struct iv_ca *set, *origset;
6914 struct iv_use *use;
6915 comp_cost cost, origcost;
6917 /* Determine the cost based on a strategy that starts with original IVs,
6918 and try again using a strategy that prefers candidates not based
6919 on any IVs. */
6920 origset = find_optimal_iv_set_1 (data, true);
6921 set = find_optimal_iv_set_1 (data, false);
6923 if (!origset && !set)
6924 return NULL;
6926 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6927 cost = set ? iv_ca_cost (set) : infinite_cost;
6929 if (dump_file && (dump_flags & TDF_DETAILS))
6931 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6932 origcost.cost, origcost.complexity);
6933 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6934 cost.cost, cost.complexity);
6937 /* Choose the one with the best cost. */
6938 if (compare_costs (origcost, cost) <= 0)
6940 if (set)
6941 iv_ca_free (&set);
6942 set = origset;
6944 else if (origset)
6945 iv_ca_free (&origset);
6947 for (i = 0; i < n_iv_uses (data); i++)
6949 use = iv_use (data, i);
6950 use->selected = iv_ca_cand_for_use (set, use)->cand;
6953 return set;
6956 /* Creates a new induction variable corresponding to CAND. */
6958 static void
6959 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6961 gimple_stmt_iterator incr_pos;
6962 tree base;
6963 bool after = false;
6965 if (!cand->iv)
6966 return;
6968 switch (cand->pos)
6970 case IP_NORMAL:
6971 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6972 break;
6974 case IP_END:
6975 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6976 after = true;
6977 break;
6979 case IP_AFTER_USE:
6980 after = true;
6981 /* fall through */
6982 case IP_BEFORE_USE:
6983 incr_pos = gsi_for_stmt (cand->incremented_at);
6984 break;
6986 case IP_ORIGINAL:
6987 /* Mark that the iv is preserved. */
6988 name_info (data, cand->var_before)->preserve_biv = true;
6989 name_info (data, cand->var_after)->preserve_biv = true;
6991 /* Rewrite the increment so that it uses var_before directly. */
6992 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6993 return;
6996 gimple_add_tmp_var (cand->var_before);
6998 base = unshare_expr (cand->iv->base);
7000 create_iv (base, unshare_expr (cand->iv->step),
7001 cand->var_before, data->current_loop,
7002 &incr_pos, after, &cand->var_before, &cand->var_after);
7005 /* Creates new induction variables described in SET. */
7007 static void
7008 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7010 unsigned i;
7011 struct iv_cand *cand;
7012 bitmap_iterator bi;
7014 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7016 cand = iv_cand (data, i);
7017 create_new_iv (data, cand);
7020 if (dump_file && (dump_flags & TDF_DETAILS))
7022 fprintf (dump_file, "Selected IV set for loop %d",
7023 data->current_loop->num);
7024 if (data->loop_loc != UNKNOWN_LOCATION)
7025 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7026 LOCATION_LINE (data->loop_loc));
7027 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7028 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7030 cand = iv_cand (data, i);
7031 dump_cand (dump_file, cand);
7033 fprintf (dump_file, "\n");
7037 /* Rewrites USE (definition of iv used in a nonlinear expression)
7038 using candidate CAND. */
7040 static void
7041 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7042 struct iv_use *use, struct iv_cand *cand)
7044 tree comp;
7045 tree op, tgt;
7046 gassign *ass;
7047 gimple_stmt_iterator bsi;
7049 /* An important special case -- if we are asked to express value of
7050 the original iv by itself, just exit; there is no need to
7051 introduce a new computation (that might also need casting the
7052 variable to unsigned and back). */
7053 if (cand->pos == IP_ORIGINAL
7054 && cand->incremented_at == use->stmt)
7056 enum tree_code stmt_code;
7058 gcc_assert (is_gimple_assign (use->stmt));
7059 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7061 /* Check whether we may leave the computation unchanged.
7062 This is the case only if it does not rely on other
7063 computations in the loop -- otherwise, the computation
7064 we rely upon may be removed in remove_unused_ivs,
7065 thus leading to ICE. */
7066 stmt_code = gimple_assign_rhs_code (use->stmt);
7067 if (stmt_code == PLUS_EXPR
7068 || stmt_code == MINUS_EXPR
7069 || stmt_code == POINTER_PLUS_EXPR)
7071 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7072 op = gimple_assign_rhs2 (use->stmt);
7073 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7074 op = gimple_assign_rhs1 (use->stmt);
7075 else
7076 op = NULL_TREE;
7078 else
7079 op = NULL_TREE;
7081 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7082 return;
7085 comp = get_computation (data->current_loop, use, cand);
7086 gcc_assert (comp != NULL_TREE);
7088 switch (gimple_code (use->stmt))
7090 case GIMPLE_PHI:
7091 tgt = PHI_RESULT (use->stmt);
7093 /* If we should keep the biv, do not replace it. */
7094 if (name_info (data, tgt)->preserve_biv)
7095 return;
7097 bsi = gsi_after_labels (gimple_bb (use->stmt));
7098 break;
7100 case GIMPLE_ASSIGN:
7101 tgt = gimple_assign_lhs (use->stmt);
7102 bsi = gsi_for_stmt (use->stmt);
7103 break;
7105 default:
7106 gcc_unreachable ();
7109 if (!valid_gimple_rhs_p (comp)
7110 || (gimple_code (use->stmt) != GIMPLE_PHI
7111 /* We can't allow re-allocating the stmt as it might be pointed
7112 to still. */
7113 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7114 >= gimple_num_ops (gsi_stmt (bsi)))))
7116 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7117 true, GSI_SAME_STMT);
7118 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7120 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7121 /* As this isn't a plain copy we have to reset alignment
7122 information. */
7123 if (SSA_NAME_PTR_INFO (comp))
7124 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7128 if (gimple_code (use->stmt) == GIMPLE_PHI)
7130 ass = gimple_build_assign (tgt, comp);
7131 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7133 bsi = gsi_for_stmt (use->stmt);
7134 remove_phi_node (&bsi, false);
7136 else
7138 gimple_assign_set_rhs_from_tree (&bsi, comp);
7139 use->stmt = gsi_stmt (bsi);
7143 /* Performs a peephole optimization to reorder the iv update statement with
7144 a mem ref to enable instruction combining in later phases. The mem ref uses
7145 the iv value before the update, so the reordering transformation requires
7146 adjustment of the offset. CAND is the selected IV_CAND.
7148 Example:
7150 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7151 iv2 = iv1 + 1;
7153 if (t < val) (1)
7154 goto L;
7155 goto Head;
7158 directly propagating t over to (1) will introduce overlapping live range
7159 thus increase register pressure. This peephole transform it into:
7162 iv2 = iv1 + 1;
7163 t = MEM_REF (base, iv2, 8, 8);
7164 if (t < val)
7165 goto L;
7166 goto Head;
7169 static void
7170 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7172 tree var_after;
7173 gimple *iv_update, *stmt;
7174 basic_block bb;
7175 gimple_stmt_iterator gsi, gsi_iv;
7177 if (cand->pos != IP_NORMAL)
7178 return;
7180 var_after = cand->var_after;
7181 iv_update = SSA_NAME_DEF_STMT (var_after);
7183 bb = gimple_bb (iv_update);
7184 gsi = gsi_last_nondebug_bb (bb);
7185 stmt = gsi_stmt (gsi);
7187 /* Only handle conditional statement for now. */
7188 if (gimple_code (stmt) != GIMPLE_COND)
7189 return;
7191 gsi_prev_nondebug (&gsi);
7192 stmt = gsi_stmt (gsi);
7193 if (stmt != iv_update)
7194 return;
7196 gsi_prev_nondebug (&gsi);
7197 if (gsi_end_p (gsi))
7198 return;
7200 stmt = gsi_stmt (gsi);
7201 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7202 return;
7204 if (stmt != use->stmt)
7205 return;
7207 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7208 return;
7210 if (dump_file && (dump_flags & TDF_DETAILS))
7212 fprintf (dump_file, "Reordering \n");
7213 print_gimple_stmt (dump_file, iv_update, 0, 0);
7214 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7215 fprintf (dump_file, "\n");
7218 gsi = gsi_for_stmt (use->stmt);
7219 gsi_iv = gsi_for_stmt (iv_update);
7220 gsi_move_before (&gsi_iv, &gsi);
7222 cand->pos = IP_BEFORE_USE;
7223 cand->incremented_at = use->stmt;
7226 /* Rewrites USE (address that is an iv) using candidate CAND. */
7228 static void
7229 rewrite_use_address_1 (struct ivopts_data *data,
7230 struct iv_use *use, struct iv_cand *cand)
7232 aff_tree aff;
7233 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7234 tree base_hint = NULL_TREE;
7235 tree ref, iv;
7236 bool ok;
7238 adjust_iv_update_pos (cand, use);
7239 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7240 gcc_assert (ok);
7241 unshare_aff_combination (&aff);
7243 /* To avoid undefined overflow problems, all IV candidates use unsigned
7244 integer types. The drawback is that this makes it impossible for
7245 create_mem_ref to distinguish an IV that is based on a memory object
7246 from one that represents simply an offset.
7248 To work around this problem, we pass a hint to create_mem_ref that
7249 indicates which variable (if any) in aff is an IV based on a memory
7250 object. Note that we only consider the candidate. If this is not
7251 based on an object, the base of the reference is in some subexpression
7252 of the use -- but these will use pointer types, so they are recognized
7253 by the create_mem_ref heuristics anyway. */
7254 if (cand->iv->base_object)
7255 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7257 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7258 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7259 reference_alias_ptr_type (*use->op_p),
7260 iv, base_hint, data->speed);
7261 copy_ref_info (ref, *use->op_p);
7262 *use->op_p = ref;
7265 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7266 first use of a group, rewrites sub uses in the group too. */
7268 static void
7269 rewrite_use_address (struct ivopts_data *data,
7270 struct iv_use *use, struct iv_cand *cand)
7272 struct iv_use *next;
7274 gcc_assert (use->sub_id == 0);
7275 rewrite_use_address_1 (data, use, cand);
7276 update_stmt (use->stmt);
7278 for (next = use->next; next != NULL; next = next->next)
7280 rewrite_use_address_1 (data, next, cand);
7281 update_stmt (next->stmt);
7284 return;
7287 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7288 candidate CAND. */
7290 static void
7291 rewrite_use_compare (struct ivopts_data *data,
7292 struct iv_use *use, struct iv_cand *cand)
7294 tree comp, *var_p, op, bound;
7295 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7296 enum tree_code compare;
7297 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7298 bool ok;
7300 bound = cp->value;
7301 if (bound)
7303 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7304 tree var_type = TREE_TYPE (var);
7305 gimple_seq stmts;
7307 if (dump_file && (dump_flags & TDF_DETAILS))
7309 fprintf (dump_file, "Replacing exit test: ");
7310 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7312 compare = cp->comp;
7313 bound = unshare_expr (fold_convert (var_type, bound));
7314 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7315 if (stmts)
7316 gsi_insert_seq_on_edge_immediate (
7317 loop_preheader_edge (data->current_loop),
7318 stmts);
7320 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7321 gimple_cond_set_lhs (cond_stmt, var);
7322 gimple_cond_set_code (cond_stmt, compare);
7323 gimple_cond_set_rhs (cond_stmt, op);
7324 return;
7327 /* The induction variable elimination failed; just express the original
7328 giv. */
7329 comp = get_computation (data->current_loop, use, cand);
7330 gcc_assert (comp != NULL_TREE);
7332 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7333 gcc_assert (ok);
7335 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7336 true, GSI_SAME_STMT);
7339 /* Rewrites USE using candidate CAND. */
7341 static void
7342 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7344 switch (use->type)
7346 case USE_NONLINEAR_EXPR:
7347 rewrite_use_nonlinear_expr (data, use, cand);
7348 break;
7350 case USE_ADDRESS:
7351 rewrite_use_address (data, use, cand);
7352 break;
7354 case USE_COMPARE:
7355 rewrite_use_compare (data, use, cand);
7356 break;
7358 default:
7359 gcc_unreachable ();
7362 update_stmt (use->stmt);
7365 /* Rewrite the uses using the selected induction variables. */
7367 static void
7368 rewrite_uses (struct ivopts_data *data)
7370 unsigned i;
7371 struct iv_cand *cand;
7372 struct iv_use *use;
7374 for (i = 0; i < n_iv_uses (data); i++)
7376 use = iv_use (data, i);
7377 cand = use->selected;
7378 gcc_assert (cand);
7380 rewrite_use (data, use, cand);
7384 /* Removes the ivs that are not used after rewriting. */
7386 static void
7387 remove_unused_ivs (struct ivopts_data *data)
7389 unsigned j;
7390 bitmap_iterator bi;
7391 bitmap toremove = BITMAP_ALLOC (NULL);
7393 /* Figure out an order in which to release SSA DEFs so that we don't
7394 release something that we'd have to propagate into a debug stmt
7395 afterwards. */
7396 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7398 struct version_info *info;
7400 info = ver_info (data, j);
7401 if (info->iv
7402 && !integer_zerop (info->iv->step)
7403 && !info->inv_id
7404 && !info->iv->have_use_for
7405 && !info->preserve_biv)
7407 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7409 tree def = info->iv->ssa_name;
7411 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7413 imm_use_iterator imm_iter;
7414 use_operand_p use_p;
7415 gimple *stmt;
7416 int count = 0;
7418 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7420 if (!gimple_debug_bind_p (stmt))
7421 continue;
7423 /* We just want to determine whether to do nothing
7424 (count == 0), to substitute the computed
7425 expression into a single use of the SSA DEF by
7426 itself (count == 1), or to use a debug temp
7427 because the SSA DEF is used multiple times or as
7428 part of a larger expression (count > 1). */
7429 count++;
7430 if (gimple_debug_bind_get_value (stmt) != def)
7431 count++;
7433 if (count > 1)
7434 BREAK_FROM_IMM_USE_STMT (imm_iter);
7437 if (!count)
7438 continue;
7440 struct iv_use dummy_use;
7441 struct iv_cand *best_cand = NULL, *cand;
7442 unsigned i, best_pref = 0, cand_pref;
7444 memset (&dummy_use, 0, sizeof (dummy_use));
7445 dummy_use.iv = info->iv;
7446 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7448 cand = iv_use (data, i)->selected;
7449 if (cand == best_cand)
7450 continue;
7451 cand_pref = operand_equal_p (cand->iv->step,
7452 info->iv->step, 0)
7453 ? 4 : 0;
7454 cand_pref
7455 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7456 == TYPE_MODE (TREE_TYPE (info->iv->base))
7457 ? 2 : 0;
7458 cand_pref
7459 += TREE_CODE (cand->iv->base) == INTEGER_CST
7460 ? 1 : 0;
7461 if (best_cand == NULL || best_pref < cand_pref)
7463 best_cand = cand;
7464 best_pref = cand_pref;
7468 if (!best_cand)
7469 continue;
7471 tree comp = get_computation_at (data->current_loop,
7472 &dummy_use, best_cand,
7473 SSA_NAME_DEF_STMT (def));
7474 if (!comp)
7475 continue;
7477 if (count > 1)
7479 tree vexpr = make_node (DEBUG_EXPR_DECL);
7480 DECL_ARTIFICIAL (vexpr) = 1;
7481 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7482 if (SSA_NAME_VAR (def))
7483 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7484 else
7485 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7486 gdebug *def_temp
7487 = gimple_build_debug_bind (vexpr, comp, NULL);
7488 gimple_stmt_iterator gsi;
7490 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7491 gsi = gsi_after_labels (gimple_bb
7492 (SSA_NAME_DEF_STMT (def)));
7493 else
7494 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7496 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7497 comp = vexpr;
7500 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7502 if (!gimple_debug_bind_p (stmt))
7503 continue;
7505 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7506 SET_USE (use_p, comp);
7508 update_stmt (stmt);
7514 release_defs_bitset (toremove);
7516 BITMAP_FREE (toremove);
7519 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7520 for hash_map::traverse. */
7522 bool
7523 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7525 free (value);
7526 return true;
7529 /* Frees data allocated by the optimization of a single loop. */
7531 static void
7532 free_loop_data (struct ivopts_data *data)
7534 unsigned i, j;
7535 bitmap_iterator bi;
7536 tree obj;
7538 if (data->niters)
7540 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7541 delete data->niters;
7542 data->niters = NULL;
7545 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7547 struct version_info *info;
7549 info = ver_info (data, i);
7550 info->iv = NULL;
7551 info->has_nonlin_use = false;
7552 info->preserve_biv = false;
7553 info->inv_id = 0;
7555 bitmap_clear (data->relevant);
7556 bitmap_clear (data->important_candidates);
7558 for (i = 0; i < n_iv_uses (data); i++)
7560 struct iv_use *use = iv_use (data, i);
7561 struct iv_use *pre = use, *sub = use->next;
7563 while (sub)
7565 gcc_assert (sub->related_cands == NULL);
7566 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7568 pre = sub;
7569 sub = sub->next;
7570 free (pre);
7573 BITMAP_FREE (use->related_cands);
7574 for (j = 0; j < use->n_map_members; j++)
7575 if (use->cost_map[j].depends_on)
7576 BITMAP_FREE (use->cost_map[j].depends_on);
7577 free (use->cost_map);
7578 free (use);
7580 data->iv_uses.truncate (0);
7582 for (i = 0; i < n_iv_cands (data); i++)
7584 struct iv_cand *cand = iv_cand (data, i);
7586 if (cand->depends_on)
7587 BITMAP_FREE (cand->depends_on);
7588 free (cand);
7590 data->iv_candidates.truncate (0);
7592 if (data->version_info_size < num_ssa_names)
7594 data->version_info_size = 2 * num_ssa_names;
7595 free (data->version_info);
7596 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7599 data->max_inv_id = 0;
7601 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7602 SET_DECL_RTL (obj, NULL_RTX);
7604 decl_rtl_to_reset.truncate (0);
7606 data->inv_expr_tab->empty ();
7607 data->inv_expr_id = 0;
7609 data->iv_common_cand_tab->empty ();
7610 data->iv_common_cands.truncate (0);
7613 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7614 loop tree. */
7616 static void
7617 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7619 free_loop_data (data);
7620 free (data->version_info);
7621 BITMAP_FREE (data->relevant);
7622 BITMAP_FREE (data->important_candidates);
7624 decl_rtl_to_reset.release ();
7625 data->iv_uses.release ();
7626 data->iv_candidates.release ();
7627 delete data->inv_expr_tab;
7628 data->inv_expr_tab = NULL;
7629 free_affine_expand_cache (&data->name_expansion_cache);
7630 delete data->iv_common_cand_tab;
7631 data->iv_common_cand_tab = NULL;
7632 data->iv_common_cands.release ();
7633 obstack_free (&data->iv_obstack, NULL);
7636 /* Returns true if the loop body BODY includes any function calls. */
7638 static bool
7639 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7641 gimple_stmt_iterator gsi;
7642 unsigned i;
7644 for (i = 0; i < num_nodes; i++)
7645 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7647 gimple *stmt = gsi_stmt (gsi);
7648 if (is_gimple_call (stmt)
7649 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7650 return true;
7652 return false;
7655 /* Optimizes the LOOP. Returns true if anything changed. */
7657 static bool
7658 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7660 bool changed = false;
7661 struct iv_ca *iv_ca;
7662 edge exit = single_dom_exit (loop);
7663 basic_block *body;
7665 gcc_assert (!data->niters);
7666 data->current_loop = loop;
7667 data->loop_loc = find_loop_location (loop);
7668 data->speed = optimize_loop_for_speed_p (loop);
7670 if (dump_file && (dump_flags & TDF_DETAILS))
7672 fprintf (dump_file, "Processing loop %d", loop->num);
7673 if (data->loop_loc != UNKNOWN_LOCATION)
7674 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7675 LOCATION_LINE (data->loop_loc));
7676 fprintf (dump_file, "\n");
7678 if (exit)
7680 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7681 exit->src->index, exit->dest->index);
7682 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7683 fprintf (dump_file, "\n");
7686 fprintf (dump_file, "\n");
7689 body = get_loop_body (loop);
7690 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7691 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7692 free (body);
7694 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7696 /* For each ssa name determines whether it behaves as an induction variable
7697 in some loop. */
7698 if (!find_induction_variables (data))
7699 goto finish;
7701 /* Finds interesting uses (item 1). */
7702 find_interesting_uses (data);
7703 group_address_uses (data);
7704 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7705 goto finish;
7707 /* Finds candidates for the induction variables (item 2). */
7708 find_iv_candidates (data);
7710 /* Calculates the costs (item 3, part 1). */
7711 determine_iv_costs (data);
7712 determine_use_iv_costs (data);
7713 determine_set_costs (data);
7715 /* Find the optimal set of induction variables (item 3, part 2). */
7716 iv_ca = find_optimal_iv_set (data);
7717 if (!iv_ca)
7718 goto finish;
7719 changed = true;
7721 /* Create the new induction variables (item 4, part 1). */
7722 create_new_ivs (data, iv_ca);
7723 iv_ca_free (&iv_ca);
7725 /* Rewrite the uses (item 4, part 2). */
7726 rewrite_uses (data);
7728 /* Remove the ivs that are unused after rewriting. */
7729 remove_unused_ivs (data);
7731 /* We have changed the structure of induction variables; it might happen
7732 that definitions in the scev database refer to some of them that were
7733 eliminated. */
7734 scev_reset ();
7736 finish:
7737 free_loop_data (data);
7739 return changed;
7742 /* Main entry point. Optimizes induction variables in loops. */
7744 void
7745 tree_ssa_iv_optimize (void)
7747 struct loop *loop;
7748 struct ivopts_data data;
7750 tree_ssa_iv_optimize_init (&data);
7752 /* Optimize the loops starting with the innermost ones. */
7753 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7755 if (dump_file && (dump_flags & TDF_DETAILS))
7756 flow_loop_dump (loop, dump_file, NULL, 1);
7758 tree_ssa_iv_optimize_loop (&data, loop);
7761 tree_ssa_iv_optimize_finalize (&data);