* sr.po: Update.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob5302edf928427e6f07392c6424254c22de7b7c36
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
126 return niter;
129 /* Representation of the induction variable. */
130 struct iv
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
155 /* Types of uses. */
156 enum use_type
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
163 /* Cost of a computation. */
164 struct comp_cost
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
176 /* The candidate - cost pair. */
177 struct cost_pair
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
190 /* Use. */
191 struct iv_use
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand *selected;
207 /* The selected candidate. */
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
216 enum iv_position
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
225 /* The induction variable candidate. */
226 struct iv_cand
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
253 tree base;
254 tree step;
255 /* IV uses from which this common candidate is derived. */
256 auto_vec<iv_use *> uses;
257 hashval_t hash;
260 /* Hashtable helpers. */
262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
264 static inline hashval_t hash (const iv_common_cand *);
265 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
268 /* Hash function for possible common candidates. */
270 inline hashval_t
271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
273 return ccand->hash;
276 /* Hash table equality function for common candidates. */
278 inline bool
279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
280 const iv_common_cand *ccand2)
282 return (ccand1->hash == ccand2->hash
283 && operand_equal_p (ccand1->base, ccand2->base, 0)
284 && operand_equal_p (ccand1->step, ccand2->step, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
286 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
292 tree expr;
293 int id;
294 hashval_t hash;
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
301 static inline hashval_t hash (const iv_inv_expr_ent *);
302 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
305 /* Hash function for loop invariant expressions. */
307 inline hashval_t
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
310 return expr->hash;
313 /* Hash table equality function for expressions. */
315 inline bool
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
317 const iv_inv_expr_ent *expr2)
319 return expr1->hash == expr2->hash
320 && operand_equal_p (expr1->expr, expr2->expr, 0);
323 struct ivopts_data
325 /* The currently optimized loop. */
326 struct loop *current_loop;
327 source_location loop_loc;
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map<edge, tree_niter_desc *> *niters;
332 /* Number of registers used in it. */
333 unsigned regs_used;
335 /* The size of version_info array allocated. */
336 unsigned version_info_size;
338 /* The array of information for the ssa names. */
339 struct version_info *version_info;
341 /* The hashtable of loop invariant expressions created
342 by ivopt. */
343 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
345 /* Loop invariant expression id. */
346 int inv_expr_id;
348 /* The bitmap of indices in version_info whose value was changed. */
349 bitmap relevant;
351 /* The uses of induction variables. */
352 vec<iv_use *> iv_uses;
354 /* The candidates. */
355 vec<iv_cand *> iv_candidates;
357 /* A bitmap of important candidates. */
358 bitmap important_candidates;
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map<tree, name_expansion *> *name_expansion_cache;
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
366 /* The common candidates. */
367 vec<iv_common_cand *> iv_common_cands;
369 /* The maximum invariant id. */
370 unsigned max_inv_id;
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr;
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack;
378 /* Whether to consider just related and important candidates when replacing a
379 use. */
380 bool consider_all_candidates;
382 /* Are we optimizing for speed? */
383 bool speed;
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call;
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p;
392 /* An assignment of iv candidates to uses. */
394 struct iv_ca
396 /* The number of uses covered by the assignment. */
397 unsigned upto;
399 /* Number of uses that cannot be expressed by the candidates in the set. */
400 unsigned bad_uses;
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair **cand_for_use;
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses;
408 /* The candidates used. */
409 bitmap cands;
411 /* The number of candidates in the set. */
412 unsigned n_cands;
414 /* Total number of registers needed. */
415 unsigned n_regs;
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost;
420 /* Total cost of candidates. */
421 unsigned cand_cost;
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses;
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr;
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr;
433 /* Total cost of the assignment. */
434 comp_cost cost;
437 /* Difference of two iv candidate assignments. */
439 struct iv_ca_delta
441 /* Changed use. */
442 struct iv_use *use;
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair *old_cp;
447 /* A new assignment. */
448 struct cost_pair *new_cp;
450 /* Next change in the list. */
451 struct iv_ca_delta *next_change;
454 /* Bound on number of candidates below that all candidates are considered. */
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471 /* The list of trees for that the decl_rtl field must be reset is stored
472 here. */
474 static vec<tree> decl_rtl_to_reset;
476 static comp_cost force_expr_to_var_cost (tree, bool);
478 /* Number of uses recorded in DATA. */
480 static inline unsigned
481 n_iv_uses (struct ivopts_data *data)
483 return data->iv_uses.length ();
486 /* Ith use recorded in DATA. */
488 static inline struct iv_use *
489 iv_use (struct ivopts_data *data, unsigned i)
491 return data->iv_uses[i];
494 /* Number of candidates recorded in DATA. */
496 static inline unsigned
497 n_iv_cands (struct ivopts_data *data)
499 return data->iv_candidates.length ();
502 /* Ith candidate recorded in DATA. */
504 static inline struct iv_cand *
505 iv_cand (struct ivopts_data *data, unsigned i)
507 return data->iv_candidates[i];
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
512 edge
513 single_dom_exit (struct loop *loop)
515 edge exit = single_exit (loop);
517 if (!exit)
518 return NULL;
520 if (!just_once_each_iteration_p (loop, exit->src))
521 return NULL;
523 return exit;
526 /* Dumps information about the induction variable IV to FILE. */
528 void
529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
531 if (iv->ssa_name && dump_name)
533 fprintf (file, "ssa name ");
534 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
535 fprintf (file, "\n");
538 fprintf (file, " type ");
539 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
540 fprintf (file, "\n");
542 if (iv->step)
544 fprintf (file, " base ");
545 print_generic_expr (file, iv->base, TDF_SLIM);
546 fprintf (file, "\n");
548 fprintf (file, " step ");
549 print_generic_expr (file, iv->step, TDF_SLIM);
550 fprintf (file, "\n");
552 else
554 fprintf (file, " invariant ");
555 print_generic_expr (file, iv->base, TDF_SLIM);
556 fprintf (file, "\n");
559 if (iv->base_object)
561 fprintf (file, " base object ");
562 print_generic_expr (file, iv->base_object, TDF_SLIM);
563 fprintf (file, "\n");
566 if (iv->biv_p)
567 fprintf (file, " is a biv\n");
569 if (iv->no_overflow)
570 fprintf (file, " iv doesn't overflow wrto loop niter\n");
573 /* Dumps information about the USE to FILE. */
575 void
576 dump_use (FILE *file, struct iv_use *use)
578 fprintf (file, "use %d", use->id);
579 if (use->sub_id)
580 fprintf (file, ".%d", use->sub_id);
582 fprintf (file, "\n");
584 switch (use->type)
586 case USE_NONLINEAR_EXPR:
587 fprintf (file, " generic\n");
588 break;
590 case USE_ADDRESS:
591 fprintf (file, " address\n");
592 break;
594 case USE_COMPARE:
595 fprintf (file, " compare\n");
596 break;
598 default:
599 gcc_unreachable ();
602 fprintf (file, " in statement ");
603 print_gimple_stmt (file, use->stmt, 0, 0);
604 fprintf (file, "\n");
606 fprintf (file, " at position ");
607 if (use->op_p)
608 print_generic_expr (file, *use->op_p, TDF_SLIM);
609 fprintf (file, "\n");
611 dump_iv (file, use->iv, false);
613 if (use->related_cands)
615 fprintf (file, " related candidates ");
616 dump_bitmap (file, use->related_cands);
620 /* Dumps information about the uses to FILE. */
622 void
623 dump_uses (FILE *file, struct ivopts_data *data)
625 unsigned i;
626 struct iv_use *use;
628 for (i = 0; i < n_iv_uses (data); i++)
630 use = iv_use (data, i);
633 dump_use (file, use);
634 use = use->next;
636 while (use);
637 fprintf (file, "\n");
641 /* Dumps information about induction variable candidate CAND to FILE. */
643 void
644 dump_cand (FILE *file, struct iv_cand *cand)
646 struct iv *iv = cand->iv;
648 fprintf (file, "candidate %d%s\n",
649 cand->id, cand->important ? " (important)" : "");
651 if (cand->depends_on)
653 fprintf (file, " depends on ");
654 dump_bitmap (file, cand->depends_on);
657 if (!iv)
659 fprintf (file, " final value replacement\n");
660 return;
663 if (cand->var_before)
665 fprintf (file, " var_before ");
666 print_generic_expr (file, cand->var_before, TDF_SLIM);
667 fprintf (file, "\n");
669 if (cand->var_after)
671 fprintf (file, " var_after ");
672 print_generic_expr (file, cand->var_after, TDF_SLIM);
673 fprintf (file, "\n");
676 switch (cand->pos)
678 case IP_NORMAL:
679 fprintf (file, " incremented before exit test\n");
680 break;
682 case IP_BEFORE_USE:
683 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
684 break;
686 case IP_AFTER_USE:
687 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
688 break;
690 case IP_END:
691 fprintf (file, " incremented at end\n");
692 break;
694 case IP_ORIGINAL:
695 fprintf (file, " original biv\n");
696 break;
699 dump_iv (file, iv, false);
702 /* Returns the info for ssa version VER. */
704 static inline struct version_info *
705 ver_info (struct ivopts_data *data, unsigned ver)
707 return data->version_info + ver;
710 /* Returns the info for ssa name NAME. */
712 static inline struct version_info *
713 name_info (struct ivopts_data *data, tree name)
715 return ver_info (data, SSA_NAME_VERSION (name));
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
719 emitted in LOOP. */
721 static bool
722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
724 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
726 gcc_assert (bb);
728 if (sbb == loop->latch)
729 return true;
731 if (sbb != bb)
732 return false;
734 return stmt == last_stmt (bb);
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
741 static bool
742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
744 basic_block cand_bb = gimple_bb (cand->incremented_at);
745 basic_block stmt_bb = gimple_bb (stmt);
747 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
748 return false;
750 if (stmt_bb != cand_bb)
751 return true;
753 if (true_if_equal
754 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
755 return true;
756 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
762 static bool
763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
765 switch (cand->pos)
767 case IP_END:
768 return false;
770 case IP_NORMAL:
771 return stmt_after_ip_normal_pos (loop, stmt);
773 case IP_ORIGINAL:
774 case IP_AFTER_USE:
775 return stmt_after_inc_pos (cand, stmt, false);
777 case IP_BEFORE_USE:
778 return stmt_after_inc_pos (cand, stmt, true);
780 default:
781 gcc_unreachable ();
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
787 static bool
788 abnormal_ssa_name_p (tree exp)
790 if (!exp)
791 return false;
793 if (TREE_CODE (exp) != SSA_NAME)
794 return false;
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
802 static bool
803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
804 void *data ATTRIBUTE_UNUSED)
806 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
808 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
809 return false;
810 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
811 return false;
814 return !abnormal_ssa_name_p (*index);
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
820 bool
821 contains_abnormal_ssa_name_p (tree expr)
823 enum tree_code code;
824 enum tree_code_class codeclass;
826 if (!expr)
827 return false;
829 code = TREE_CODE (expr);
830 codeclass = TREE_CODE_CLASS (code);
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
844 if (code == COND_EXPR)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
849 switch (codeclass)
851 case tcc_binary:
852 case tcc_comparison:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
854 return true;
856 /* Fallthru. */
857 case tcc_unary:
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
859 return true;
861 break;
863 default:
864 gcc_unreachable ();
867 return false;
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
873 static struct tree_niter_desc *
874 niter_for_exit (struct ivopts_data *data, edge exit)
876 struct tree_niter_desc *desc;
877 tree_niter_desc **slot;
879 if (!data->niters)
881 data->niters = new hash_map<edge, tree_niter_desc *>;
882 slot = NULL;
884 else
885 slot = data->niters->get (exit);
887 if (!slot)
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc = XNEW (struct tree_niter_desc);
893 if (!number_of_iterations_exit (data->current_loop,
894 exit, desc, true)
895 || contains_abnormal_ssa_name_p (desc->niter))
897 XDELETE (desc);
898 desc = NULL;
900 data->niters->put (exit, desc);
902 else
903 desc = *slot;
905 return desc;
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
910 goes wrong. */
912 static struct tree_niter_desc *
913 niter_for_single_dom_exit (struct ivopts_data *data)
915 edge exit = single_dom_exit (data->current_loop);
917 if (!exit)
918 return NULL;
920 return niter_for_exit (data, exit);
923 /* Initializes data structures used by the iv optimization pass, stored
924 in DATA. */
926 static void
927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
929 data->version_info_size = 2 * num_ssa_names;
930 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
931 data->relevant = BITMAP_ALLOC (NULL);
932 data->important_candidates = BITMAP_ALLOC (NULL);
933 data->max_inv_id = 0;
934 data->niters = NULL;
935 data->iv_uses.create (20);
936 data->iv_candidates.create (20);
937 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
938 data->inv_expr_id = 0;
939 data->name_expansion_cache = NULL;
940 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
941 data->iv_common_cands.create (20);
942 decl_rtl_to_reset.create (20);
943 gcc_obstack_init (&data->iv_obstack);
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
949 static tree
950 determine_base_object (tree expr)
952 enum tree_code code = TREE_CODE (expr);
953 tree base, obj;
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr))
959 return determine_base_object (TREE_OPERAND (expr, 0));
961 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
962 return NULL_TREE;
964 switch (code)
966 case INTEGER_CST:
967 return NULL_TREE;
969 case ADDR_EXPR:
970 obj = TREE_OPERAND (expr, 0);
971 base = get_base_address (obj);
973 if (!base)
974 return expr;
976 if (TREE_CODE (base) == MEM_REF)
977 return determine_base_object (TREE_OPERAND (base, 0));
979 return fold_convert (ptr_type_node,
980 build_fold_addr_expr (base));
982 case POINTER_PLUS_EXPR:
983 return determine_base_object (TREE_OPERAND (expr, 0));
985 case PLUS_EXPR:
986 case MINUS_EXPR:
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
988 gcc_unreachable ();
990 default:
991 return fold_convert (ptr_type_node, expr);
995 /* Return true if address expression with non-DECL_P operand appears
996 in EXPR. */
998 static bool
999 contain_complex_addr_expr (tree expr)
1001 bool res = false;
1003 STRIP_NOPS (expr);
1004 switch (TREE_CODE (expr))
1006 case POINTER_PLUS_EXPR:
1007 case PLUS_EXPR:
1008 case MINUS_EXPR:
1009 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011 break;
1013 case ADDR_EXPR:
1014 return (!DECL_P (TREE_OPERAND (expr, 0)));
1016 default:
1017 return false;
1020 return res;
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028 bool no_overflow = false)
1030 tree expr = base;
1031 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032 sizeof (struct iv));
1033 gcc_assert (step != NULL_TREE);
1035 /* Lower address expression in base except ones with DECL_P as operand.
1036 By doing this:
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1040 STRIP_NOPS (expr);
1041 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042 || contain_complex_addr_expr (expr))
1044 aff_tree comb;
1045 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1049 iv->base = base;
1050 iv->base_object = determine_base_object (base);
1051 iv->step = step;
1052 iv->biv_p = false;
1053 iv->have_use_for = false;
1054 iv->use_id = 0;
1055 iv->ssa_name = NULL_TREE;
1056 iv->no_overflow = no_overflow;
1057 iv->have_address_use = false;
1059 return iv;
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067 bool no_overflow)
1069 struct version_info *info = name_info (data, iv);
1071 gcc_assert (!info->iv);
1073 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074 info->iv = alloc_iv (data, base, step, no_overflow);
1075 info->iv->ssa_name = iv;
1078 /* Finds induction variable declaration for VAR. */
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1083 basic_block bb;
1084 tree type = TREE_TYPE (var);
1086 if (!POINTER_TYPE_P (type)
1087 && !INTEGRAL_TYPE_P (type))
1088 return NULL;
1090 if (!name_info (data, var)->iv)
1092 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094 if (!bb
1095 || !flow_bb_inside_loop_p (data->current_loop, bb))
1096 set_iv (data, var, var, build_int_cst (type, 0), true);
1099 return name_info (data, var)->iv;
1102 /* Return the first non-invariant ssa var found in EXPR. */
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1107 int i, n;
1108 tree tmp;
1109 enum tree_code code;
1111 if (!expr || is_gimple_min_invariant (expr))
1112 return NULL;
1114 code = TREE_CODE (expr);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1117 n = TREE_OPERAND_LENGTH (expr);
1118 for (i = 0; i < n; i++)
1120 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1122 if (tmp)
1123 return tmp;
1126 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1129 /* Finds basic ivs. */
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1134 gphi *phi;
1135 affine_iv iv;
1136 tree step, type, base, stop;
1137 bool found = false;
1138 struct loop *loop = data->current_loop;
1139 gphi_iterator psi;
1141 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1143 phi = psi.phi ();
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146 continue;
1148 if (virtual_operand_p (PHI_RESULT (phi)))
1149 continue;
1151 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152 continue;
1154 if (integer_zerop (iv.step))
1155 continue;
1157 step = iv.step;
1158 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1163 See PR64705 for the rationale. */
1164 stop = extract_single_var_from_expr (step);
1165 base = expand_simple_operations (base, stop);
1166 if (contains_abnormal_ssa_name_p (base)
1167 || contains_abnormal_ssa_name_p (step))
1168 continue;
1170 type = TREE_TYPE (PHI_RESULT (phi));
1171 base = fold_convert (type, base);
1172 if (step)
1174 if (POINTER_TYPE_P (type))
1175 step = convert_to_ptrofftype (step);
1176 else
1177 step = fold_convert (type, step);
1180 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181 found = true;
1184 return found;
1187 /* Marks basic ivs. */
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1192 gphi *phi;
1193 gimple *def;
1194 tree var;
1195 struct iv *iv, *incr_iv;
1196 struct loop *loop = data->current_loop;
1197 basic_block incr_bb;
1198 gphi_iterator psi;
1200 data->bivs_not_used_in_addr = 0;
1201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1203 phi = psi.phi ();
1205 iv = get_iv (data, PHI_RESULT (phi));
1206 if (!iv)
1207 continue;
1209 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210 def = SSA_NAME_DEF_STMT (var);
1211 /* Don't mark iv peeled from other one as biv. */
1212 if (def
1213 && gimple_code (def) == GIMPLE_PHI
1214 && gimple_bb (def) == loop->header)
1215 continue;
1217 incr_iv = get_iv (data, var);
1218 if (!incr_iv)
1219 continue;
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223 if (incr_bb->loop_father != data->current_loop
1224 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225 continue;
1227 iv->biv_p = true;
1228 incr_iv->biv_p = true;
1229 if (iv->no_overflow)
1230 data->bivs_not_used_in_addr++;
1231 if (incr_iv->no_overflow)
1232 data->bivs_not_used_in_addr++;
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1242 tree lhs, stop;
1243 struct loop *loop = data->current_loop;
1245 iv->base = NULL_TREE;
1246 iv->step = NULL_TREE;
1248 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249 return false;
1251 lhs = gimple_assign_lhs (stmt);
1252 if (TREE_CODE (lhs) != SSA_NAME)
1253 return false;
1255 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256 return false;
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1262 See PR64705 for the rationale. */
1263 stop = extract_single_var_from_expr (iv->step);
1264 iv->base = expand_simple_operations (iv->base, stop);
1265 if (contains_abnormal_ssa_name_p (iv->base)
1266 || contains_abnormal_ssa_name_p (iv->step))
1267 return false;
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt))
1273 return false;
1275 return true;
1278 /* Finds general ivs in statement STMT. */
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1283 affine_iv iv;
1285 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286 return;
1288 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1291 /* Finds general ivs in basic block BB. */
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1296 gimple_stmt_iterator bsi;
1298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299 find_givs_in_stmt (data, gsi_stmt (bsi));
1302 /* Finds general ivs. */
1304 static void
1305 find_givs (struct ivopts_data *data)
1307 struct loop *loop = data->current_loop;
1308 basic_block *body = get_loop_body_in_dom_order (loop);
1309 unsigned i;
1311 for (i = 0; i < loop->num_nodes; i++)
1312 find_givs_in_bb (data, body[i]);
1313 free (body);
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1322 unsigned i;
1323 bitmap_iterator bi;
1325 if (!find_bivs (data))
1326 return false;
1328 find_givs (data);
1329 mark_bivs (data);
1331 if (dump_file && (dump_flags & TDF_DETAILS))
1333 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1335 if (niter)
1337 fprintf (dump_file, " number of iterations ");
1338 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339 if (!integer_zerop (niter->may_be_zero))
1341 fprintf (dump_file, "; zero if ");
1342 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1344 fprintf (dump_file, "\n\n");
1347 fprintf (dump_file, "Induction variables:\n\n");
1349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1351 if (ver_info (data, i)->iv)
1352 dump_iv (dump_file, ver_info (data, i)->iv, true);
1356 return true;
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367 unsigned HOST_WIDE_INT addr_offset = 0)
1369 struct iv_use *use = XCNEW (struct iv_use);
1371 use->id = n_iv_uses (data);
1372 use->sub_id = 0;
1373 use->type = use_type;
1374 use->iv = iv;
1375 use->stmt = stmt;
1376 use->op_p = use_p;
1377 use->related_cands = BITMAP_ALLOC (NULL);
1378 use->next = NULL;
1379 use->addr_base = addr_base;
1380 use->addr_offset = addr_offset;
1382 data->iv_uses.safe_push (use);
1384 return use;
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392 struct iv *iv, gimple *stmt, enum use_type use_type,
1393 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394 unsigned int id_group)
1396 struct iv_use *use = XCNEW (struct iv_use);
1397 struct iv_use *group = iv_use (data, id_group);
1399 use->id = group->id;
1400 use->sub_id = 0;
1401 use->type = use_type;
1402 use->iv = iv;
1403 use->stmt = stmt;
1404 use->op_p = use_p;
1405 use->related_cands = NULL;
1406 use->addr_base = addr_base;
1407 use->addr_offset = addr_offset;
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset <= group->addr_offset)
1412 use->related_cands = group->related_cands;
1413 group->related_cands = NULL;
1414 use->next = group;
1415 data->iv_uses[id_group] = use;
1417 else
1419 struct iv_use *pre;
1422 pre = group;
1423 group = group->next;
1425 while (group && addr_offset > group->addr_offset);
1426 use->next = pre->next;
1427 pre->next = use;
1430 return use;
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1440 basic_block bb;
1441 struct version_info *info;
1443 if (TREE_CODE (op) != SSA_NAME
1444 || virtual_operand_p (op))
1445 return;
1447 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448 if (bb
1449 && flow_bb_inside_loop_p (data->current_loop, bb))
1450 return;
1452 info = name_info (data, op);
1453 info->name = op;
1454 info->has_nonlin_use |= nonlinear_use;
1455 if (!info->inv_id)
1456 info->inv_id = ++data->max_inv_id;
1457 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1460 /* Checks whether the use OP is interesting and if so, records it. */
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1465 struct iv *iv;
1466 gimple *stmt;
1467 struct iv_use *use;
1469 if (TREE_CODE (op) != SSA_NAME)
1470 return NULL;
1472 iv = get_iv (data, op);
1473 if (!iv)
1474 return NULL;
1476 if (iv->have_use_for)
1478 use = iv_use (data, iv->use_id);
1480 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481 return use;
1484 if (integer_zerop (iv->step))
1486 record_invariant (data, op, true);
1487 return NULL;
1489 iv->have_use_for = true;
1491 stmt = SSA_NAME_DEF_STMT (op);
1492 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493 || is_gimple_assign (stmt));
1495 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496 iv->use_id = use->id;
1498 return use;
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511 tree **control_var, tree **bound,
1512 struct iv **iv_var, struct iv **iv_bound)
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv;
1516 static tree zero;
1517 tree *op0 = &zero, *op1 = &zero;
1518 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519 bool ret = false;
1521 if (gimple_code (stmt) == GIMPLE_COND)
1523 gcond *cond_stmt = as_a <gcond *> (stmt);
1524 op0 = gimple_cond_lhs_ptr (cond_stmt);
1525 op1 = gimple_cond_rhs_ptr (cond_stmt);
1527 else
1529 op0 = gimple_assign_rhs1_ptr (stmt);
1530 op1 = gimple_assign_rhs2_ptr (stmt);
1533 zero = integer_zero_node;
1534 const_iv.step = integer_zero_node;
1536 if (TREE_CODE (*op0) == SSA_NAME)
1537 iv0 = get_iv (data, *op0);
1538 if (TREE_CODE (*op1) == SSA_NAME)
1539 iv1 = get_iv (data, *op1);
1541 /* Exactly one of the compared values must be an iv, and the other one must
1542 be an invariant. */
1543 if (!iv0 || !iv1)
1544 goto end;
1546 if (integer_zerop (iv0->step))
1548 /* Control variable may be on the other side. */
1549 std::swap (op0, op1);
1550 std::swap (iv0, iv1);
1552 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1554 end:
1555 if (control_var)
1556 *control_var = op0;
1557 if (iv_var)
1558 *iv_var = iv0;
1559 if (bound)
1560 *bound = op1;
1561 if (iv_bound)
1562 *iv_bound = iv1;
1564 return ret;
1567 /* Checks whether the condition in STMT is interesting and if so,
1568 records it. */
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1573 tree *var_p, *bound_p;
1574 struct iv *var_iv;
1576 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1578 find_interesting_uses_op (data, *var_p);
1579 find_interesting_uses_op (data, *bound_p);
1580 return;
1583 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1594 basic_block def_bb;
1595 unsigned i, len;
1597 if (is_gimple_min_invariant (expr))
1598 return current_loops->tree_root;
1600 if (TREE_CODE (expr) == SSA_NAME)
1602 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603 if (def_bb)
1605 if (flow_bb_inside_loop_p (loop, def_bb))
1606 return NULL;
1607 return superloop_at_depth (loop,
1608 loop_depth (def_bb->loop_father) + 1);
1611 return current_loops->tree_root;
1614 if (!EXPR_P (expr))
1615 return NULL;
1617 unsigned maxdepth = 0;
1618 len = TREE_OPERAND_LENGTH (expr);
1619 for (i = 0; i < len; i++)
1621 struct loop *ivloop;
1622 if (!TREE_OPERAND (expr, i))
1623 continue;
1625 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626 if (!ivloop)
1627 return NULL;
1628 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1631 return superloop_at_depth (loop, maxdepth);
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1641 basic_block def_bb;
1642 unsigned i, len;
1644 gcc_assert (loop_depth (loop) > 0);
1646 if (is_gimple_min_invariant (expr))
1647 return true;
1649 if (TREE_CODE (expr) == SSA_NAME)
1651 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652 if (def_bb
1653 && flow_bb_inside_loop_p (loop, def_bb))
1654 return false;
1656 return true;
1659 if (!EXPR_P (expr))
1660 return false;
1662 len = TREE_OPERAND_LENGTH (expr);
1663 for (i = 0; i < len; i++)
1664 if (TREE_OPERAND (expr, i)
1665 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666 return false;
1668 return true;
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1678 struct iv *iv;
1679 unsigned i, n;
1680 tree e2, e1;
1681 enum tree_code code;
1682 gimple *stmt;
1684 if (expr == NULL_TREE)
1685 return NULL;
1687 if (is_gimple_min_invariant (expr))
1688 return NULL;
1690 code = TREE_CODE (expr);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1693 n = TREE_OPERAND_LENGTH (expr);
1694 for (i = 0; i < n; i++)
1696 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697 if (iv)
1698 return iv;
1702 /* Stop if it's not ssa name. */
1703 if (code != SSA_NAME)
1704 return NULL;
1706 iv = get_iv (data, expr);
1707 if (!iv || integer_zerop (iv->step))
1708 return NULL;
1709 else if (iv->biv_p)
1710 return iv;
1712 stmt = SSA_NAME_DEF_STMT (expr);
1713 if (gphi *phi = dyn_cast <gphi *> (stmt))
1715 ssa_op_iter iter;
1716 use_operand_p use_p;
1718 if (virtual_operand_p (gimple_phi_result (phi)))
1719 return NULL;
1721 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1723 tree use = USE_FROM_PTR (use_p);
1724 iv = find_deriving_biv_for_expr (data, use);
1725 if (iv)
1726 return iv;
1728 return NULL;
1730 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731 return NULL;
1733 e1 = gimple_assign_rhs1 (stmt);
1734 code = gimple_assign_rhs_code (stmt);
1735 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736 return find_deriving_biv_for_expr (data, e1);
1738 switch (code)
1740 case MULT_EXPR:
1741 case PLUS_EXPR:
1742 case MINUS_EXPR:
1743 case POINTER_PLUS_EXPR:
1744 /* Increments, decrements and multiplications by a constant
1745 are simple. */
1746 e2 = gimple_assign_rhs2 (stmt);
1747 iv = find_deriving_biv_for_expr (data, e2);
1748 if (iv)
1749 return iv;
1751 /* Fallthru. */
1752 CASE_CONVERT:
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data, e1);
1756 default:
1757 break;
1760 return NULL;
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1769 unsigned i;
1770 tree type, base_1, base_2;
1771 bitmap_iterator bi;
1773 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774 || biv->have_address_use || !biv->no_overflow)
1775 return;
1777 type = TREE_TYPE (biv->base);
1778 if (!INTEGRAL_TYPE_P (type))
1779 return;
1781 biv->have_address_use = true;
1782 data->bivs_not_used_in_addr--;
1783 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1786 struct iv *iv = ver_info (data, i)->iv;
1788 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789 || iv->have_address_use || !iv->no_overflow)
1790 continue;
1792 if (type != TREE_TYPE (iv->base)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794 continue;
1796 if (!operand_equal_p (biv->step, iv->step, 0))
1797 continue;
1799 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800 if (operand_equal_p (base_1, iv->base, 0)
1801 || operand_equal_p (base_2, biv->base, 0))
1803 iv->have_address_use = true;
1804 data->bivs_not_used_in_addr--;
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1813 struct ifs_ivopts_data
1815 struct ivopts_data *ivopts_data;
1816 gimple *stmt;
1817 tree step;
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1823 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824 struct iv *iv;
1825 bool use_overflow_semantics = false;
1826 tree step, iv_base, iv_step, lbound, off;
1827 struct loop *loop = dta->ivopts_data->current_loop;
1829 /* If base is a component ref, require that the offset of the reference
1830 be invariant. */
1831 if (TREE_CODE (base) == COMPONENT_REF)
1833 off = component_ref_field_offset (base);
1834 return expr_invariant_in_loop_p (loop, off);
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846 return false;
1848 step = array_ref_element_size (base);
1849 lbound = array_ref_low_bound (base);
1851 if (!expr_invariant_in_loop_p (loop, step)
1852 || !expr_invariant_in_loop_p (loop, lbound))
1853 return false;
1856 if (TREE_CODE (*idx) != SSA_NAME)
1857 return true;
1859 iv = get_iv (dta->ivopts_data, *idx);
1860 if (!iv)
1861 return false;
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1866 *idx = iv->base;
1868 if (integer_zerop (iv->step))
1869 return true;
1871 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1873 step = array_ref_element_size (base);
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step) != INTEGER_CST)
1877 return false;
1879 else
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step = size_one_node;
1883 iv_base = iv->base;
1884 iv_step = iv->step;
1885 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886 use_overflow_semantics = true;
1888 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889 sizetype, &iv_base, &iv_step, dta->stmt,
1890 use_overflow_semantics))
1892 /* The index might wrap. */
1893 return false;
1896 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1899 if (dta->ivopts_data->bivs_not_used_in_addr)
1901 if (!iv->biv_p)
1902 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1904 record_biv_for_address_use (dta->ivopts_data, iv);
1906 return true;
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914 void *vdata)
1916 struct ivopts_data *data = (struct ivopts_data *) vdata;
1917 find_interesting_uses_op (data, *idx);
1918 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1920 find_interesting_uses_op (data, array_ref_element_size (base));
1921 find_interesting_uses_op (data, array_ref_low_bound (base));
1923 return true;
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1934 tree mby;
1935 enum tree_code code;
1936 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937 widest_int res, p0, p1;
1939 STRIP_NOPS (top);
1940 STRIP_NOPS (bot);
1942 if (operand_equal_p (top, bot, 0))
1944 *mul = 1;
1945 return true;
1948 code = TREE_CODE (top);
1949 switch (code)
1951 case MULT_EXPR:
1952 mby = TREE_OPERAND (top, 1);
1953 if (TREE_CODE (mby) != INTEGER_CST)
1954 return false;
1956 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957 return false;
1959 *mul = wi::sext (res * wi::to_widest (mby), precision);
1960 return true;
1962 case PLUS_EXPR:
1963 case MINUS_EXPR:
1964 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966 return false;
1968 if (code == MINUS_EXPR)
1969 p1 = -p1;
1970 *mul = wi::sext (p0 + p1, precision);
1971 return true;
1973 case INTEGER_CST:
1974 if (TREE_CODE (bot) != INTEGER_CST)
1975 return false;
1977 p0 = widest_int::from (top, SIGNED);
1978 p1 = widest_int::from (bot, SIGNED);
1979 if (p1 == 0)
1980 return false;
1981 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982 return res == 0;
1984 default:
1985 return false;
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref) == TARGET_MEM_REF)
1997 return false;
1999 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2003 unsigned HOST_WIDE_INT bitpos;
2004 unsigned int ref_align;
2005 get_object_alignment_1 (ref, &ref_align, &bitpos);
2006 if (ref_align < align
2007 || (bitpos % align) != 0
2008 || (bitpos % BITS_PER_UNIT) != 0)
2009 return true;
2011 unsigned int trailing_zeros = tree_ctz (step);
2012 if (trailing_zeros < HOST_BITS_PER_INT
2013 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014 return true;
2016 return false;
2019 /* Return true if EXPR may be non-addressable. */
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2024 switch (TREE_CODE (expr))
2026 case TARGET_MEM_REF:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2029 return false;
2031 case MEM_REF:
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr);
2035 case BIT_FIELD_REF:
2036 if (REF_REVERSE_STORAGE_ORDER (expr))
2037 return true;
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2040 case COMPONENT_REF:
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042 return true;
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049 return true;
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2052 case VIEW_CONVERT_EXPR:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059 return true;
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2062 CASE_CONVERT:
2063 return true;
2065 default:
2066 break;
2069 return false;
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082 struct iv *iv, gimple *stmt, enum use_type use_type)
2084 unsigned int i;
2085 struct iv_use *use;
2086 tree addr_base;
2087 unsigned HOST_WIDE_INT addr_offset;
2089 /* Only support sub use for address type uses, that is, with base
2090 object. */
2091 if (!iv->base_object)
2092 return record_use (data, use_p, iv, stmt, use_type);
2094 addr_base = strip_offset (iv->base, &addr_offset);
2095 for (i = 0; i < n_iv_uses (data); i++)
2097 use = iv_use (data, i);
2098 if (use->type != USE_ADDRESS || !use->iv->base_object)
2099 continue;
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103 && operand_equal_p (iv->step, use->iv->step, 0)
2104 && operand_equal_p (addr_base, use->addr_base, 0))
2105 break;
2108 if (i == n_iv_uses (data))
2109 return record_use (data, use_p, iv, stmt,
2110 use_type, addr_base, addr_offset);
2111 else
2112 return record_sub_use (data, use_p, iv, stmt,
2113 use_type, addr_base, addr_offset, i);
2116 /* Finds addresses in *OP_P inside STMT. */
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120 tree *op_p)
2122 tree base = *op_p, step = size_zero_node;
2123 struct iv *civ;
2124 struct ifs_ivopts_data ifs_ivopts_data;
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt))
2129 goto fail;
2131 /* Ignore bitfields for now. Not really something terribly complicated
2132 to handle. TODO. */
2133 if (TREE_CODE (base) == BIT_FIELD_REF)
2134 goto fail;
2136 base = unshare_expr (base);
2138 if (TREE_CODE (base) == TARGET_MEM_REF)
2140 tree type = build_pointer_type (TREE_TYPE (base));
2141 tree astep;
2143 if (TMR_BASE (base)
2144 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2146 civ = get_iv (data, TMR_BASE (base));
2147 if (!civ)
2148 goto fail;
2150 TMR_BASE (base) = civ->base;
2151 step = civ->step;
2153 if (TMR_INDEX2 (base)
2154 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2156 civ = get_iv (data, TMR_INDEX2 (base));
2157 if (!civ)
2158 goto fail;
2160 TMR_INDEX2 (base) = civ->base;
2161 step = civ->step;
2163 if (TMR_INDEX (base)
2164 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2166 civ = get_iv (data, TMR_INDEX (base));
2167 if (!civ)
2168 goto fail;
2170 TMR_INDEX (base) = civ->base;
2171 astep = civ->step;
2173 if (astep)
2175 if (TMR_STEP (base))
2176 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2178 step = fold_build2 (PLUS_EXPR, type, step, astep);
2182 if (integer_zerop (step))
2183 goto fail;
2184 base = tree_mem_ref_addr (type, base);
2186 else
2188 ifs_ivopts_data.ivopts_data = data;
2189 ifs_ivopts_data.stmt = stmt;
2190 ifs_ivopts_data.step = size_zero_node;
2191 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192 || integer_zerop (ifs_ivopts_data.step))
2193 goto fail;
2194 step = ifs_ivopts_data.step;
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base))
2199 goto fail;
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204 goto fail;
2206 base = build_fold_addr_expr (base);
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base) == ADDR_EXPR)
2212 tree *ref = &TREE_OPERAND (base, 0);
2213 while (handled_component_p (*ref))
2214 ref = &TREE_OPERAND (*ref, 0);
2215 if (TREE_CODE (*ref) == MEM_REF)
2217 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218 TREE_OPERAND (*ref, 0),
2219 TREE_OPERAND (*ref, 1));
2220 if (tem)
2221 *ref = tem;
2226 civ = alloc_iv (data, base, step);
2227 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228 return;
2230 fail:
2231 for_each_index (op_p, idx_record_use, data);
2234 /* Finds and records invariants used in STMT. */
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2239 ssa_op_iter iter;
2240 use_operand_p use_p;
2241 tree op;
2243 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2245 op = USE_FROM_PTR (use_p);
2246 record_invariant (data, op, false);
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2255 struct iv *iv;
2256 tree op, *lhs, *rhs;
2257 ssa_op_iter iter;
2258 use_operand_p use_p;
2259 enum tree_code code;
2261 find_invariants_stmt (data, stmt);
2263 if (gimple_code (stmt) == GIMPLE_COND)
2265 find_interesting_uses_cond (data, stmt);
2266 return;
2269 if (is_gimple_assign (stmt))
2271 lhs = gimple_assign_lhs_ptr (stmt);
2272 rhs = gimple_assign_rhs1_ptr (stmt);
2274 if (TREE_CODE (*lhs) == SSA_NAME)
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2279 iv = get_iv (data, *lhs);
2281 if (iv && !integer_zerop (iv->step))
2282 return;
2285 code = gimple_assign_rhs_code (stmt);
2286 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs)
2288 || is_gimple_val (*rhs)))
2290 if (REFERENCE_CLASS_P (*rhs))
2291 find_interesting_uses_address (data, stmt, rhs);
2292 else
2293 find_interesting_uses_op (data, *rhs);
2295 if (REFERENCE_CLASS_P (*lhs))
2296 find_interesting_uses_address (data, stmt, lhs);
2297 return;
2299 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2301 find_interesting_uses_cond (data, stmt);
2302 return;
2305 /* TODO -- we should also handle address uses of type
2307 memory = call (whatever);
2311 call (memory). */
2314 if (gimple_code (stmt) == GIMPLE_PHI
2315 && gimple_bb (stmt) == data->current_loop->header)
2317 iv = get_iv (data, PHI_RESULT (stmt));
2319 if (iv && !integer_zerop (iv->step))
2320 return;
2323 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2325 op = USE_FROM_PTR (use_p);
2327 if (TREE_CODE (op) != SSA_NAME)
2328 continue;
2330 iv = get_iv (data, op);
2331 if (!iv)
2332 continue;
2334 find_interesting_uses_op (data, op);
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2344 gphi *phi;
2345 gphi_iterator psi;
2346 tree def;
2348 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2350 phi = psi.phi ();
2351 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352 if (!virtual_operand_p (def))
2353 find_interesting_uses_op (data, def);
2357 /* Finds uses of the induction variables that are interesting. */
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2362 basic_block bb;
2363 gimple_stmt_iterator bsi;
2364 basic_block *body = get_loop_body (data->current_loop);
2365 unsigned i;
2366 struct version_info *info;
2367 edge e;
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 fprintf (dump_file, "Uses:\n\n");
2372 for (i = 0; i < data->current_loop->num_nodes; i++)
2374 edge_iterator ei;
2375 bb = body[i];
2377 FOR_EACH_EDGE (e, ei, bb->succs)
2378 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380 find_interesting_uses_outside (data, e);
2382 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385 if (!is_gimple_debug (gsi_stmt (bsi)))
2386 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2391 bitmap_iterator bi;
2393 fprintf (dump_file, "\n");
2395 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2397 info = ver_info (data, i);
2398 if (info->inv_id)
2400 fprintf (dump_file, " ");
2401 print_generic_expr (dump_file, info->name, TDF_SLIM);
2402 fprintf (dump_file, " is invariant (%d)%s\n",
2403 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2407 fprintf (dump_file, "\n");
2410 free (body);
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2419 int width;
2420 rtx reg, addr;
2421 HOST_WIDE_INT i, off;
2422 unsigned list_index, num;
2423 addr_space_t as;
2424 machine_mode mem_mode, addr_mode;
2425 static vec<HOST_WIDE_INT> max_offset_list;
2427 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430 num = max_offset_list.length ();
2431 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432 if (list_index >= num)
2434 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435 for (; num < max_offset_list.length (); num++)
2436 max_offset_list[num] = -1;
2439 off = max_offset_list[list_index];
2440 if (off != -1)
2441 return off;
2443 addr_mode = targetm.addr_space.address_mode (as);
2444 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2447 width = GET_MODE_BITSIZE (addr_mode) - 1;
2448 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449 width = HOST_BITS_PER_WIDE_INT - 1;
2451 for (i = width; i > 0; i--)
2453 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455 if (memory_address_addr_space_p (mem_mode, addr, as))
2456 break;
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off = ((unsigned HOST_WIDE_INT) 1 << i);
2461 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2463 off -= GET_MODE_SIZE (mem_mode);
2464 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465 if (memory_address_addr_space_p (mem_mode, addr, as))
2466 break;
2469 if (i == 0)
2470 off = 0;
2472 max_offset_list[list_index] = off;
2473 return off;
2476 /* Check if all small groups should be split. Return true if and
2477 only if:
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2482 Return false otherwise. We want to split such groups because:
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2489 TODO -- Above claim may not hold when auto increment is supported. */
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2494 bool split_p = false;
2495 unsigned int i, n, distinct;
2496 struct iv_use *pre, *use;
2498 n = n_iv_uses (data);
2499 for (i = 0; i < n; i++)
2501 use = iv_use (data, i);
2502 if (!use->next)
2503 continue;
2505 distinct = 1;
2506 gcc_assert (use->type == USE_ADDRESS);
2507 for (pre = use, use = use->next; use; pre = use, use = use->next)
2509 if (pre->addr_offset != use->addr_offset)
2510 distinct++;
2512 if (distinct > 2)
2513 return false;
2515 if (distinct == 2)
2516 split_p = true;
2519 return split_p;
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2529 HOST_WIDE_INT max_offset = -1;
2530 unsigned int i, n, sub_id;
2531 struct iv_use *pre, *use;
2532 unsigned HOST_WIDE_INT addr_offset_first;
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data))
2536 max_offset = 0;
2538 n = n_iv_uses (data);
2539 for (i = 0; i < n; i++)
2541 use = iv_use (data, i);
2542 if (!use->next)
2543 continue;
2545 gcc_assert (use->type == USE_ADDRESS);
2546 if (max_offset != 0)
2547 max_offset = compute_max_addr_offset (use);
2549 while (use)
2551 sub_id = 0;
2552 addr_offset_first = use->addr_offset;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre = use, use = use->next;
2556 use && (use->addr_offset - addr_offset_first
2557 <= (unsigned HOST_WIDE_INT) max_offset);
2558 pre = use, use = use->next)
2560 use->id = pre->id;
2561 use->sub_id = ++sub_id;
2564 /* Break the list and create new group. */
2565 if (use)
2567 pre->next = NULL;
2568 use->id = n_iv_uses (data);
2569 use->related_cands = BITMAP_ALLOC (NULL);
2570 data->iv_uses.safe_push (use);
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 dump_uses (dump_file, data);
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585 HOST_WIDE_INT *offset)
2587 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588 enum tree_code code;
2589 tree type, orig_type = TREE_TYPE (expr);
2590 HOST_WIDE_INT off0, off1, st;
2591 tree orig_expr = expr;
2593 STRIP_NOPS (expr);
2595 type = TREE_TYPE (expr);
2596 code = TREE_CODE (expr);
2597 *offset = 0;
2599 switch (code)
2601 case INTEGER_CST:
2602 if (!cst_and_fits_in_hwi (expr)
2603 || integer_zerop (expr))
2604 return orig_expr;
2606 *offset = int_cst_value (expr);
2607 return build_int_cst (orig_type, 0);
2609 case POINTER_PLUS_EXPR:
2610 case PLUS_EXPR:
2611 case MINUS_EXPR:
2612 op0 = TREE_OPERAND (expr, 0);
2613 op1 = TREE_OPERAND (expr, 1);
2615 op0 = strip_offset_1 (op0, false, false, &off0);
2616 op1 = strip_offset_1 (op1, false, false, &off1);
2618 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619 if (op0 == TREE_OPERAND (expr, 0)
2620 && op1 == TREE_OPERAND (expr, 1))
2621 return orig_expr;
2623 if (integer_zerop (op1))
2624 expr = op0;
2625 else if (integer_zerop (op0))
2627 if (code == MINUS_EXPR)
2628 expr = fold_build1 (NEGATE_EXPR, type, op1);
2629 else
2630 expr = op1;
2632 else
2633 expr = fold_build2 (code, type, op0, op1);
2635 return fold_convert (orig_type, expr);
2637 case MULT_EXPR:
2638 op1 = TREE_OPERAND (expr, 1);
2639 if (!cst_and_fits_in_hwi (op1))
2640 return orig_expr;
2642 op0 = TREE_OPERAND (expr, 0);
2643 op0 = strip_offset_1 (op0, false, false, &off0);
2644 if (op0 == TREE_OPERAND (expr, 0))
2645 return orig_expr;
2647 *offset = off0 * int_cst_value (op1);
2648 if (integer_zerop (op0))
2649 expr = op0;
2650 else
2651 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2653 return fold_convert (orig_type, expr);
2655 case ARRAY_REF:
2656 case ARRAY_RANGE_REF:
2657 if (!inside_addr)
2658 return orig_expr;
2660 step = array_ref_element_size (expr);
2661 if (!cst_and_fits_in_hwi (step))
2662 break;
2664 st = int_cst_value (step);
2665 op1 = TREE_OPERAND (expr, 1);
2666 op1 = strip_offset_1 (op1, false, false, &off1);
2667 *offset = off1 * st;
2669 if (top_compref
2670 && integer_zerop (op1))
2672 /* Strip the component reference completely. */
2673 op0 = TREE_OPERAND (expr, 0);
2674 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675 *offset += off0;
2676 return op0;
2678 break;
2680 case COMPONENT_REF:
2682 tree field;
2684 if (!inside_addr)
2685 return orig_expr;
2687 tmp = component_ref_field_offset (expr);
2688 field = TREE_OPERAND (expr, 1);
2689 if (top_compref
2690 && cst_and_fits_in_hwi (tmp)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2693 HOST_WIDE_INT boffset, abs_off;
2695 /* Strip the component reference completely. */
2696 op0 = TREE_OPERAND (expr, 0);
2697 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700 if (boffset < 0)
2701 abs_off = -abs_off;
2703 *offset = off0 + int_cst_value (tmp) + abs_off;
2704 return op0;
2707 break;
2709 case ADDR_EXPR:
2710 op0 = TREE_OPERAND (expr, 0);
2711 op0 = strip_offset_1 (op0, true, true, &off0);
2712 *offset += off0;
2714 if (op0 == TREE_OPERAND (expr, 0))
2715 return orig_expr;
2717 expr = build_fold_addr_expr (op0);
2718 return fold_convert (orig_type, expr);
2720 case MEM_REF:
2721 /* ??? Offset operand? */
2722 inside_addr = false;
2723 break;
2725 default:
2726 return orig_expr;
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0 = TREE_OPERAND (expr, 0);
2732 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733 *offset += off0;
2735 if (op0 == TREE_OPERAND (expr, 0)
2736 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737 return orig_expr;
2739 expr = copy_node (expr);
2740 TREE_OPERAND (expr, 0) = op0;
2741 if (op1)
2742 TREE_OPERAND (expr, 1) = op1;
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2746 will fix that. */
2747 expr = fold_convert (orig_type, expr);
2749 return expr;
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2757 HOST_WIDE_INT off;
2758 tree core = strip_offset_1 (expr, false, false, &off);
2759 *offset = off;
2760 return core;
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2765 with overflows. */
2767 static tree
2768 generic_type_for (tree type)
2770 if (POINTER_TYPE_P (type))
2771 return unsigned_type_for (type);
2773 if (TYPE_UNSIGNED (type))
2774 return type;
2776 return unsigned_type_for (type);
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2786 bitmap *depends_on = (bitmap *) data;
2787 struct version_info *info;
2789 if (TREE_CODE (*expr_p) != SSA_NAME)
2790 return NULL_TREE;
2791 info = name_info (fd_ivopts_data, *expr_p);
2793 if (!info->inv_id || info->has_nonlin_use)
2794 return NULL_TREE;
2796 if (!*depends_on)
2797 *depends_on = BITMAP_ALLOC (NULL);
2798 bitmap_set_bit (*depends_on, info->inv_id);
2800 return NULL_TREE;
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810 tree base, tree step, bool important, enum iv_position pos,
2811 struct iv_use *use, gimple *incremented_at,
2812 struct iv *orig_iv = NULL)
2814 unsigned i;
2815 struct iv_cand *cand = NULL;
2816 tree type, orig_type;
2818 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2819 live, but the ivopts code may replace a real pointer with one
2820 pointing before or after the memory block that is then adjusted
2821 into the memory block during the loop. FIXME: It would likely be
2822 better to actually force the pointer live and still use ivopts;
2823 for example, it would be enough to write the pointer into memory
2824 and keep it there until after the loop. */
2825 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2826 return NULL;
2828 /* For non-original variables, make sure their values are computed in a type
2829 that does not invoke undefined behavior on overflows (since in general,
2830 we cannot prove that these induction variables are non-wrapping). */
2831 if (pos != IP_ORIGINAL)
2833 orig_type = TREE_TYPE (base);
2834 type = generic_type_for (orig_type);
2835 if (type != orig_type)
2837 base = fold_convert (type, base);
2838 step = fold_convert (type, step);
2842 for (i = 0; i < n_iv_cands (data); i++)
2844 cand = iv_cand (data, i);
2846 if (cand->pos != pos)
2847 continue;
2849 if (cand->incremented_at != incremented_at
2850 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2851 && cand->ainc_use != use))
2852 continue;
2854 if (!cand->iv)
2856 if (!base && !step)
2857 break;
2859 continue;
2862 if (!base && !step)
2863 continue;
2865 if (operand_equal_p (base, cand->iv->base, 0)
2866 && operand_equal_p (step, cand->iv->step, 0)
2867 && (TYPE_PRECISION (TREE_TYPE (base))
2868 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2869 break;
2872 if (i == n_iv_cands (data))
2874 cand = XCNEW (struct iv_cand);
2875 cand->id = i;
2877 if (!base && !step)
2878 cand->iv = NULL;
2879 else
2880 cand->iv = alloc_iv (data, base, step);
2882 cand->pos = pos;
2883 if (pos != IP_ORIGINAL && cand->iv)
2885 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2886 cand->var_after = cand->var_before;
2888 cand->important = important;
2889 cand->incremented_at = incremented_at;
2890 data->iv_candidates.safe_push (cand);
2892 if (step
2893 && TREE_CODE (step) != INTEGER_CST)
2895 fd_ivopts_data = data;
2896 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2899 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2900 cand->ainc_use = use;
2901 else
2902 cand->ainc_use = NULL;
2904 cand->orig_iv = orig_iv;
2905 if (dump_file && (dump_flags & TDF_DETAILS))
2906 dump_cand (dump_file, cand);
2909 if (important && !cand->important)
2911 cand->important = true;
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2916 if (use)
2918 bitmap_set_bit (use->related_cands, i);
2919 if (dump_file && (dump_flags & TDF_DETAILS))
2920 fprintf (dump_file, "Candidate %d is related to use %d\n",
2921 cand->id, use->id);
2924 return cand;
2927 /* Returns true if incrementing the induction variable at the end of the LOOP
2928 is allowed.
2930 The purpose is to avoid splitting latch edge with a biv increment, thus
2931 creating a jump, possibly confusing other optimization passes and leaving
2932 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2933 is not available (so we do not have a better alternative), or if the latch
2934 edge is already nonempty. */
2936 static bool
2937 allow_ip_end_pos_p (struct loop *loop)
2939 if (!ip_normal_pos (loop))
2940 return true;
2942 if (!empty_block_p (ip_end_pos (loop)))
2943 return true;
2945 return false;
2948 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2949 Important field is set to IMPORTANT. */
2951 static void
2952 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2953 bool important, struct iv_use *use)
2955 basic_block use_bb = gimple_bb (use->stmt);
2956 machine_mode mem_mode;
2957 unsigned HOST_WIDE_INT cstepi;
2959 /* If we insert the increment in any position other than the standard
2960 ones, we must ensure that it is incremented once per iteration.
2961 It must not be in an inner nested loop, or one side of an if
2962 statement. */
2963 if (use_bb->loop_father != data->current_loop
2964 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2965 || stmt_could_throw_p (use->stmt)
2966 || !cst_and_fits_in_hwi (step))
2967 return;
2969 cstepi = int_cst_value (step);
2971 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2972 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2973 || USE_STORE_PRE_INCREMENT (mem_mode))
2974 && GET_MODE_SIZE (mem_mode) == cstepi)
2975 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2976 || USE_STORE_PRE_DECREMENT (mem_mode))
2977 && GET_MODE_SIZE (mem_mode) == -cstepi))
2979 enum tree_code code = MINUS_EXPR;
2980 tree new_base;
2981 tree new_step = step;
2983 if (POINTER_TYPE_P (TREE_TYPE (base)))
2985 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2986 code = POINTER_PLUS_EXPR;
2988 else
2989 new_step = fold_convert (TREE_TYPE (base), new_step);
2990 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2991 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2992 use->stmt);
2994 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2995 || USE_STORE_POST_INCREMENT (mem_mode))
2996 && GET_MODE_SIZE (mem_mode) == cstepi)
2997 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2998 || USE_STORE_POST_DECREMENT (mem_mode))
2999 && GET_MODE_SIZE (mem_mode) == -cstepi))
3001 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3002 use->stmt);
3006 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3007 position to POS. If USE is not NULL, the candidate is set as related to
3008 it. The candidate computation is scheduled before exit condition and at
3009 the end of loop. */
3011 static void
3012 add_candidate (struct ivopts_data *data,
3013 tree base, tree step, bool important, struct iv_use *use,
3014 struct iv *orig_iv = NULL)
3016 gcc_assert (use == NULL || use->sub_id == 0);
3018 if (ip_normal_pos (data->current_loop))
3019 add_candidate_1 (data, base, step, important,
3020 IP_NORMAL, use, NULL, orig_iv);
3021 if (ip_end_pos (data->current_loop)
3022 && allow_ip_end_pos_p (data->current_loop))
3023 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3026 /* Adds standard iv candidates. */
3028 static void
3029 add_standard_iv_candidates (struct ivopts_data *data)
3031 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3033 /* The same for a double-integer type if it is still fast enough. */
3034 if (TYPE_PRECISION
3035 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3036 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3037 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3038 build_int_cst (long_integer_type_node, 1), true, NULL);
3040 /* The same for a double-integer type if it is still fast enough. */
3041 if (TYPE_PRECISION
3042 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3043 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3044 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3045 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3049 /* Adds candidates bases on the old induction variable IV. */
3051 static void
3052 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3054 gimple *phi;
3055 tree def;
3056 struct iv_cand *cand;
3058 /* Check if this biv is used in address type use. */
3059 if (iv->no_overflow && iv->have_address_use
3060 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3061 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3063 tree base = fold_convert (sizetype, iv->base);
3064 tree step = fold_convert (sizetype, iv->step);
3066 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3067 add_candidate (data, base, step, true, NULL, iv);
3068 /* Add iv cand of the original type only if it has nonlinear use. */
3069 if (iv->have_use_for)
3070 add_candidate (data, iv->base, iv->step, true, NULL);
3072 else
3073 add_candidate (data, iv->base, iv->step, true, NULL);
3075 /* The same, but with initial value zero. */
3076 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3077 add_candidate (data, size_int (0), iv->step, true, NULL);
3078 else
3079 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3080 iv->step, true, NULL);
3082 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3083 if (gimple_code (phi) == GIMPLE_PHI)
3085 /* Additionally record the possibility of leaving the original iv
3086 untouched. */
3087 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3088 /* Don't add candidate if it's from another PHI node because
3089 it's an affine iv appearing in the form of PEELED_CHREC. */
3090 phi = SSA_NAME_DEF_STMT (def);
3091 if (gimple_code (phi) != GIMPLE_PHI)
3093 cand = add_candidate_1 (data,
3094 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3095 SSA_NAME_DEF_STMT (def));
3096 if (cand)
3098 cand->var_before = iv->ssa_name;
3099 cand->var_after = def;
3102 else
3103 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3107 /* Adds candidates based on the old induction variables. */
3109 static void
3110 add_iv_candidate_for_bivs (struct ivopts_data *data)
3112 unsigned i;
3113 struct iv *iv;
3114 bitmap_iterator bi;
3116 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3118 iv = ver_info (data, i)->iv;
3119 if (iv && iv->biv_p && !integer_zerop (iv->step))
3120 add_iv_candidate_for_biv (data, iv);
3124 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3126 static void
3127 record_common_cand (struct ivopts_data *data, tree base,
3128 tree step, struct iv_use *use)
3130 struct iv_common_cand ent;
3131 struct iv_common_cand **slot;
3133 gcc_assert (use != NULL);
3135 ent.base = base;
3136 ent.step = step;
3137 ent.hash = iterative_hash_expr (base, 0);
3138 ent.hash = iterative_hash_expr (step, ent.hash);
3140 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3141 if (*slot == NULL)
3143 *slot = new iv_common_cand ();
3144 (*slot)->base = base;
3145 (*slot)->step = step;
3146 (*slot)->uses.create (8);
3147 (*slot)->hash = ent.hash;
3148 data->iv_common_cands.safe_push ((*slot));
3150 (*slot)->uses.safe_push (use);
3151 return;
3154 /* Comparison function used to sort common candidates. */
3156 static int
3157 common_cand_cmp (const void *p1, const void *p2)
3159 unsigned n1, n2;
3160 const struct iv_common_cand *const *const ccand1
3161 = (const struct iv_common_cand *const *)p1;
3162 const struct iv_common_cand *const *const ccand2
3163 = (const struct iv_common_cand *const *)p2;
3165 n1 = (*ccand1)->uses.length ();
3166 n2 = (*ccand2)->uses.length ();
3167 return n2 - n1;
3170 /* Adds IV candidates based on common candidated recorded. */
3172 static void
3173 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3175 unsigned i, j;
3176 struct iv_cand *cand_1, *cand_2;
3178 data->iv_common_cands.qsort (common_cand_cmp);
3179 for (i = 0; i < data->iv_common_cands.length (); i++)
3181 struct iv_common_cand *ptr = data->iv_common_cands[i];
3183 /* Only add IV candidate if it's derived from multiple uses. */
3184 if (ptr->uses.length () <= 1)
3185 break;
3187 cand_1 = NULL;
3188 cand_2 = NULL;
3189 if (ip_normal_pos (data->current_loop))
3190 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3191 false, IP_NORMAL, NULL, NULL);
3193 if (ip_end_pos (data->current_loop)
3194 && allow_ip_end_pos_p (data->current_loop))
3195 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3196 false, IP_END, NULL, NULL);
3198 /* Bind deriving uses and the new candidates. */
3199 for (j = 0; j < ptr->uses.length (); j++)
3201 struct iv_use *use = ptr->uses[j];
3202 if (cand_1)
3203 bitmap_set_bit (use->related_cands, cand_1->id);
3204 if (cand_2)
3205 bitmap_set_bit (use->related_cands, cand_2->id);
3209 /* Release data since it is useless from this point. */
3210 data->iv_common_cand_tab->empty ();
3211 data->iv_common_cands.truncate (0);
3214 /* Adds candidates based on the value of USE's iv. */
3216 static void
3217 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3219 unsigned HOST_WIDE_INT offset;
3220 tree base;
3221 tree basetype;
3222 struct iv *iv = use->iv;
3224 add_candidate (data, iv->base, iv->step, false, use);
3226 /* Record common candidate for use in case it can be shared by others. */
3227 record_common_cand (data, iv->base, iv->step, use);
3229 /* Record common candidate with initial value zero. */
3230 basetype = TREE_TYPE (iv->base);
3231 if (POINTER_TYPE_P (basetype))
3232 basetype = sizetype;
3233 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3235 /* Record common candidate with constant offset stripped in base. */
3237 base = strip_offset (iv->base, &offset);
3238 if (offset || base != iv->base)
3239 record_common_cand (data, base, iv->step, use);
3242 /* Record common candidate with base_object removed in base. */
3243 if (iv->base_object != NULL)
3245 unsigned i;
3246 aff_tree aff_base;
3247 tree step, base_object = iv->base_object;
3249 base = iv->base;
3250 step = iv->step;
3251 STRIP_NOPS (base);
3252 STRIP_NOPS (step);
3253 STRIP_NOPS (base_object);
3254 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3255 for (i = 0; i < aff_base.n; i++)
3257 if (aff_base.elts[i].coef != 1)
3258 continue;
3260 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3261 break;
3263 if (i < aff_base.n)
3265 aff_combination_remove_elt (&aff_base, i);
3266 base = aff_combination_to_tree (&aff_base);
3267 basetype = TREE_TYPE (base);
3268 if (POINTER_TYPE_P (basetype))
3269 basetype = sizetype;
3271 step = fold_convert (basetype, step);
3272 record_common_cand (data, base, step, use);
3273 /* Also record common candidate with offset stripped. */
3274 base = strip_offset (base, &offset);
3275 if (offset)
3276 record_common_cand (data, base, step, use);
3280 /* At last, add auto-incremental candidates. Make such variables
3281 important since other iv uses with same base object may be based
3282 on it. */
3283 if (use != NULL && use->type == USE_ADDRESS)
3284 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3287 /* Adds candidates based on the uses. */
3289 static void
3290 add_iv_candidate_for_uses (struct ivopts_data *data)
3292 unsigned i;
3294 for (i = 0; i < n_iv_uses (data); i++)
3296 struct iv_use *use = iv_use (data, i);
3298 if (!use)
3299 continue;
3301 switch (use->type)
3303 case USE_NONLINEAR_EXPR:
3304 case USE_COMPARE:
3305 case USE_ADDRESS:
3306 /* Just add the ivs based on the value of the iv used here. */
3307 add_iv_candidate_for_use (data, use);
3308 break;
3310 default:
3311 gcc_unreachable ();
3314 add_iv_candidate_derived_from_uses (data);
3317 /* Record important candidates and add them to related_cands bitmaps. */
3319 static void
3320 record_important_candidates (struct ivopts_data *data)
3322 unsigned i;
3323 struct iv_use *use;
3325 for (i = 0; i < n_iv_cands (data); i++)
3327 struct iv_cand *cand = iv_cand (data, i);
3329 if (cand->important)
3330 bitmap_set_bit (data->important_candidates, i);
3333 data->consider_all_candidates = (n_iv_cands (data)
3334 <= CONSIDER_ALL_CANDIDATES_BOUND);
3336 /* Add important candidates to uses' related_cands bitmaps. */
3337 for (i = 0; i < n_iv_uses (data); i++)
3339 use = iv_use (data, i);
3340 bitmap_ior_into (use->related_cands, data->important_candidates);
3344 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3345 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3346 we allocate a simple list to every use. */
3348 static void
3349 alloc_use_cost_map (struct ivopts_data *data)
3351 unsigned i, size, s;
3353 for (i = 0; i < n_iv_uses (data); i++)
3355 struct iv_use *use = iv_use (data, i);
3357 if (data->consider_all_candidates)
3358 size = n_iv_cands (data);
3359 else
3361 s = bitmap_count_bits (use->related_cands);
3363 /* Round up to the power of two, so that moduling by it is fast. */
3364 size = s ? (1 << ceil_log2 (s)) : 1;
3367 use->n_map_members = size;
3368 use->cost_map = XCNEWVEC (struct cost_pair, size);
3372 /* Returns description of computation cost of expression whose runtime
3373 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3375 static comp_cost
3376 new_cost (unsigned runtime, unsigned complexity)
3378 comp_cost cost;
3380 cost.cost = runtime;
3381 cost.complexity = complexity;
3383 return cost;
3386 /* Returns true if COST is infinite. */
3388 static bool
3389 infinite_cost_p (comp_cost cost)
3391 return cost.cost == INFTY;
3394 /* Adds costs COST1 and COST2. */
3396 static comp_cost
3397 add_costs (comp_cost cost1, comp_cost cost2)
3399 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3400 return infinite_cost;
3402 cost1.cost += cost2.cost;
3403 cost1.complexity += cost2.complexity;
3405 return cost1;
3407 /* Subtracts costs COST1 and COST2. */
3409 static comp_cost
3410 sub_costs (comp_cost cost1, comp_cost cost2)
3412 cost1.cost -= cost2.cost;
3413 cost1.complexity -= cost2.complexity;
3415 return cost1;
3418 /* Returns a negative number if COST1 < COST2, a positive number if
3419 COST1 > COST2, and 0 if COST1 = COST2. */
3421 static int
3422 compare_costs (comp_cost cost1, comp_cost cost2)
3424 if (cost1.cost == cost2.cost)
3425 return cost1.complexity - cost2.complexity;
3427 return cost1.cost - cost2.cost;
3430 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3431 on invariants DEPENDS_ON and that the value used in expressing it
3432 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3434 static void
3435 set_use_iv_cost (struct ivopts_data *data,
3436 struct iv_use *use, struct iv_cand *cand,
3437 comp_cost cost, bitmap depends_on, tree value,
3438 enum tree_code comp, int inv_expr_id)
3440 unsigned i, s;
3442 if (infinite_cost_p (cost))
3444 BITMAP_FREE (depends_on);
3445 return;
3448 if (data->consider_all_candidates)
3450 use->cost_map[cand->id].cand = cand;
3451 use->cost_map[cand->id].cost = cost;
3452 use->cost_map[cand->id].depends_on = depends_on;
3453 use->cost_map[cand->id].value = value;
3454 use->cost_map[cand->id].comp = comp;
3455 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3456 return;
3459 /* n_map_members is a power of two, so this computes modulo. */
3460 s = cand->id & (use->n_map_members - 1);
3461 for (i = s; i < use->n_map_members; i++)
3462 if (!use->cost_map[i].cand)
3463 goto found;
3464 for (i = 0; i < s; i++)
3465 if (!use->cost_map[i].cand)
3466 goto found;
3468 gcc_unreachable ();
3470 found:
3471 use->cost_map[i].cand = cand;
3472 use->cost_map[i].cost = cost;
3473 use->cost_map[i].depends_on = depends_on;
3474 use->cost_map[i].value = value;
3475 use->cost_map[i].comp = comp;
3476 use->cost_map[i].inv_expr_id = inv_expr_id;
3479 /* Gets cost of (USE, CANDIDATE) pair. */
3481 static struct cost_pair *
3482 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3483 struct iv_cand *cand)
3485 unsigned i, s;
3486 struct cost_pair *ret;
3488 if (!cand)
3489 return NULL;
3491 if (data->consider_all_candidates)
3493 ret = use->cost_map + cand->id;
3494 if (!ret->cand)
3495 return NULL;
3497 return ret;
3500 /* n_map_members is a power of two, so this computes modulo. */
3501 s = cand->id & (use->n_map_members - 1);
3502 for (i = s; i < use->n_map_members; i++)
3503 if (use->cost_map[i].cand == cand)
3504 return use->cost_map + i;
3505 else if (use->cost_map[i].cand == NULL)
3506 return NULL;
3507 for (i = 0; i < s; i++)
3508 if (use->cost_map[i].cand == cand)
3509 return use->cost_map + i;
3510 else if (use->cost_map[i].cand == NULL)
3511 return NULL;
3513 return NULL;
3516 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3517 static rtx
3518 produce_memory_decl_rtl (tree obj, int *regno)
3520 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3521 machine_mode address_mode = targetm.addr_space.address_mode (as);
3522 rtx x;
3524 gcc_assert (obj);
3525 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3527 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3528 x = gen_rtx_SYMBOL_REF (address_mode, name);
3529 SET_SYMBOL_REF_DECL (x, obj);
3530 x = gen_rtx_MEM (DECL_MODE (obj), x);
3531 set_mem_addr_space (x, as);
3532 targetm.encode_section_info (obj, x, true);
3534 else
3536 x = gen_raw_REG (address_mode, (*regno)++);
3537 x = gen_rtx_MEM (DECL_MODE (obj), x);
3538 set_mem_addr_space (x, as);
3541 return x;
3544 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3545 walk_tree. DATA contains the actual fake register number. */
3547 static tree
3548 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3550 tree obj = NULL_TREE;
3551 rtx x = NULL_RTX;
3552 int *regno = (int *) data;
3554 switch (TREE_CODE (*expr_p))
3556 case ADDR_EXPR:
3557 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3558 handled_component_p (*expr_p);
3559 expr_p = &TREE_OPERAND (*expr_p, 0))
3560 continue;
3561 obj = *expr_p;
3562 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3563 x = produce_memory_decl_rtl (obj, regno);
3564 break;
3566 case SSA_NAME:
3567 *ws = 0;
3568 obj = SSA_NAME_VAR (*expr_p);
3569 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3570 if (!obj)
3571 return NULL_TREE;
3572 if (!DECL_RTL_SET_P (obj))
3573 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3574 break;
3576 case VAR_DECL:
3577 case PARM_DECL:
3578 case RESULT_DECL:
3579 *ws = 0;
3580 obj = *expr_p;
3582 if (DECL_RTL_SET_P (obj))
3583 break;
3585 if (DECL_MODE (obj) == BLKmode)
3586 x = produce_memory_decl_rtl (obj, regno);
3587 else
3588 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3590 break;
3592 default:
3593 break;
3596 if (x)
3598 decl_rtl_to_reset.safe_push (obj);
3599 SET_DECL_RTL (obj, x);
3602 return NULL_TREE;
3605 /* Determines cost of the computation of EXPR. */
3607 static unsigned
3608 computation_cost (tree expr, bool speed)
3610 rtx_insn *seq;
3611 rtx rslt;
3612 tree type = TREE_TYPE (expr);
3613 unsigned cost;
3614 /* Avoid using hard regs in ways which may be unsupported. */
3615 int regno = LAST_VIRTUAL_REGISTER + 1;
3616 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3617 enum node_frequency real_frequency = node->frequency;
3619 node->frequency = NODE_FREQUENCY_NORMAL;
3620 crtl->maybe_hot_insn_p = speed;
3621 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3622 start_sequence ();
3623 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3624 seq = get_insns ();
3625 end_sequence ();
3626 default_rtl_profile ();
3627 node->frequency = real_frequency;
3629 cost = seq_cost (seq, speed);
3630 if (MEM_P (rslt))
3631 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3632 TYPE_ADDR_SPACE (type), speed);
3633 else if (!REG_P (rslt))
3634 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3636 return cost;
3639 /* Returns variable containing the value of candidate CAND at statement AT. */
3641 static tree
3642 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3644 if (stmt_after_increment (loop, cand, stmt))
3645 return cand->var_after;
3646 else
3647 return cand->var_before;
3650 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3651 same precision that is at least as wide as the precision of TYPE, stores
3652 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3653 type of A and B. */
3655 static tree
3656 determine_common_wider_type (tree *a, tree *b)
3658 tree wider_type = NULL;
3659 tree suba, subb;
3660 tree atype = TREE_TYPE (*a);
3662 if (CONVERT_EXPR_P (*a))
3664 suba = TREE_OPERAND (*a, 0);
3665 wider_type = TREE_TYPE (suba);
3666 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3667 return atype;
3669 else
3670 return atype;
3672 if (CONVERT_EXPR_P (*b))
3674 subb = TREE_OPERAND (*b, 0);
3675 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3676 return atype;
3678 else
3679 return atype;
3681 *a = suba;
3682 *b = subb;
3683 return wider_type;
3686 /* Determines the expression by that USE is expressed from induction variable
3687 CAND at statement AT in LOOP. The expression is stored in a decomposed
3688 form into AFF. Returns false if USE cannot be expressed using CAND. */
3690 static bool
3691 get_computation_aff (struct loop *loop,
3692 struct iv_use *use, struct iv_cand *cand, gimple *at,
3693 struct aff_tree *aff)
3695 tree ubase = use->iv->base;
3696 tree ustep = use->iv->step;
3697 tree cbase = cand->iv->base;
3698 tree cstep = cand->iv->step, cstep_common;
3699 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3700 tree common_type, var;
3701 tree uutype;
3702 aff_tree cbase_aff, var_aff;
3703 widest_int rat;
3705 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3707 /* We do not have a precision to express the values of use. */
3708 return false;
3711 var = var_at_stmt (loop, cand, at);
3712 uutype = unsigned_type_for (utype);
3714 /* If the conversion is not noop, perform it. */
3715 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3717 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3718 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3720 tree inner_base, inner_step, inner_type;
3721 inner_base = TREE_OPERAND (cbase, 0);
3722 if (CONVERT_EXPR_P (cstep))
3723 inner_step = TREE_OPERAND (cstep, 0);
3724 else
3725 inner_step = cstep;
3727 inner_type = TREE_TYPE (inner_base);
3728 /* If candidate is added from a biv whose type is smaller than
3729 ctype, we know both candidate and the biv won't overflow.
3730 In this case, it's safe to skip the convertion in candidate.
3731 As an example, (unsigned short)((unsigned long)A) equals to
3732 (unsigned short)A, if A has a type no larger than short. */
3733 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3735 cbase = inner_base;
3736 cstep = inner_step;
3739 cstep = fold_convert (uutype, cstep);
3740 cbase = fold_convert (uutype, cbase);
3741 var = fold_convert (uutype, var);
3744 /* Ratio is 1 when computing the value of biv cand by itself.
3745 We can't rely on constant_multiple_of in this case because the
3746 use is created after the original biv is selected. The call
3747 could fail because of inconsistent fold behavior. See PR68021
3748 for more information. */
3749 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3751 gcc_assert (is_gimple_assign (use->stmt));
3752 gcc_assert (use->iv->ssa_name == cand->var_after);
3753 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3754 rat = 1;
3756 else if (!constant_multiple_of (ustep, cstep, &rat))
3757 return false;
3759 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3760 type, we achieve better folding by computing their difference in this
3761 wider type, and cast the result to UUTYPE. We do not need to worry about
3762 overflows, as all the arithmetics will in the end be performed in UUTYPE
3763 anyway. */
3764 common_type = determine_common_wider_type (&ubase, &cbase);
3766 /* use = ubase - ratio * cbase + ratio * var. */
3767 tree_to_aff_combination (ubase, common_type, aff);
3768 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3769 tree_to_aff_combination (var, uutype, &var_aff);
3771 /* We need to shift the value if we are after the increment. */
3772 if (stmt_after_increment (loop, cand, at))
3774 aff_tree cstep_aff;
3776 if (common_type != uutype)
3777 cstep_common = fold_convert (common_type, cstep);
3778 else
3779 cstep_common = cstep;
3781 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3782 aff_combination_add (&cbase_aff, &cstep_aff);
3785 aff_combination_scale (&cbase_aff, -rat);
3786 aff_combination_add (aff, &cbase_aff);
3787 if (common_type != uutype)
3788 aff_combination_convert (aff, uutype);
3790 aff_combination_scale (&var_aff, rat);
3791 aff_combination_add (aff, &var_aff);
3793 return true;
3796 /* Return the type of USE. */
3798 static tree
3799 get_use_type (struct iv_use *use)
3801 tree base_type = TREE_TYPE (use->iv->base);
3802 tree type;
3804 if (use->type == USE_ADDRESS)
3806 /* The base_type may be a void pointer. Create a pointer type based on
3807 the mem_ref instead. */
3808 type = build_pointer_type (TREE_TYPE (*use->op_p));
3809 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3810 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3812 else
3813 type = base_type;
3815 return type;
3818 /* Determines the expression by that USE is expressed from induction variable
3819 CAND at statement AT in LOOP. The computation is unshared. */
3821 static tree
3822 get_computation_at (struct loop *loop,
3823 struct iv_use *use, struct iv_cand *cand, gimple *at)
3825 aff_tree aff;
3826 tree type = get_use_type (use);
3828 if (!get_computation_aff (loop, use, cand, at, &aff))
3829 return NULL_TREE;
3830 unshare_aff_combination (&aff);
3831 return fold_convert (type, aff_combination_to_tree (&aff));
3834 /* Determines the expression by that USE is expressed from induction variable
3835 CAND in LOOP. The computation is unshared. */
3837 static tree
3838 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3840 return get_computation_at (loop, use, cand, use->stmt);
3843 /* Adjust the cost COST for being in loop setup rather than loop body.
3844 If we're optimizing for space, the loop setup overhead is constant;
3845 if we're optimizing for speed, amortize it over the per-iteration cost. */
3846 static unsigned
3847 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3849 if (cost == INFTY)
3850 return cost;
3851 else if (optimize_loop_for_speed_p (data->current_loop))
3852 return cost / avg_loop_niter (data->current_loop);
3853 else
3854 return cost;
3857 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3858 validity for a memory reference accessing memory of mode MODE in
3859 address space AS. */
3862 bool
3863 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3864 addr_space_t as)
3866 #define MAX_RATIO 128
3867 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3868 static vec<sbitmap> valid_mult_list;
3869 sbitmap valid_mult;
3871 if (data_index >= valid_mult_list.length ())
3872 valid_mult_list.safe_grow_cleared (data_index + 1);
3874 valid_mult = valid_mult_list[data_index];
3875 if (!valid_mult)
3877 machine_mode address_mode = targetm.addr_space.address_mode (as);
3878 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3879 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3880 rtx addr, scaled;
3881 HOST_WIDE_INT i;
3883 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3884 bitmap_clear (valid_mult);
3885 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3886 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3887 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3889 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3890 if (memory_address_addr_space_p (mode, addr, as)
3891 || memory_address_addr_space_p (mode, scaled, as))
3892 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3895 if (dump_file && (dump_flags & TDF_DETAILS))
3897 fprintf (dump_file, " allowed multipliers:");
3898 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3899 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3900 fprintf (dump_file, " %d", (int) i);
3901 fprintf (dump_file, "\n");
3902 fprintf (dump_file, "\n");
3905 valid_mult_list[data_index] = valid_mult;
3908 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3909 return false;
3911 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3914 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3915 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3916 variable is omitted. Compute the cost for a memory reference that accesses
3917 a memory location of mode MEM_MODE in address space AS.
3919 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3920 size of MEM_MODE / RATIO) is available. To make this determination, we
3921 look at the size of the increment to be made, which is given in CSTEP.
3922 CSTEP may be zero if the step is unknown.
3923 STMT_AFTER_INC is true iff the statement we're looking at is after the
3924 increment of the original biv.
3926 TODO -- there must be some better way. This all is quite crude. */
3928 enum ainc_type
3930 AINC_PRE_INC, /* Pre increment. */
3931 AINC_PRE_DEC, /* Pre decrement. */
3932 AINC_POST_INC, /* Post increment. */
3933 AINC_POST_DEC, /* Post decrement. */
3934 AINC_NONE /* Also the number of auto increment types. */
3937 struct address_cost_data
3939 HOST_WIDE_INT min_offset, max_offset;
3940 unsigned costs[2][2][2][2];
3941 unsigned ainc_costs[AINC_NONE];
3945 static comp_cost
3946 get_address_cost (bool symbol_present, bool var_present,
3947 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3948 HOST_WIDE_INT cstep, machine_mode mem_mode,
3949 addr_space_t as, bool speed,
3950 bool stmt_after_inc, bool *may_autoinc)
3952 machine_mode address_mode = targetm.addr_space.address_mode (as);
3953 static vec<address_cost_data *> address_cost_data_list;
3954 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3955 address_cost_data *data;
3956 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3957 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3958 unsigned cost, acost, complexity;
3959 enum ainc_type autoinc_type;
3960 bool offset_p, ratio_p, autoinc;
3961 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3962 unsigned HOST_WIDE_INT mask;
3963 unsigned bits;
3965 if (data_index >= address_cost_data_list.length ())
3966 address_cost_data_list.safe_grow_cleared (data_index + 1);
3968 data = address_cost_data_list[data_index];
3969 if (!data)
3971 HOST_WIDE_INT i;
3972 HOST_WIDE_INT rat, off = 0;
3973 int old_cse_not_expected, width;
3974 unsigned sym_p, var_p, off_p, rat_p, add_c;
3975 rtx_insn *seq;
3976 rtx addr, base;
3977 rtx reg0, reg1;
3979 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3981 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3983 width = GET_MODE_BITSIZE (address_mode) - 1;
3984 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3985 width = HOST_BITS_PER_WIDE_INT - 1;
3986 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3988 for (i = width; i >= 0; i--)
3990 off = -((unsigned HOST_WIDE_INT) 1 << i);
3991 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3992 if (memory_address_addr_space_p (mem_mode, addr, as))
3993 break;
3995 data->min_offset = (i == -1? 0 : off);
3997 for (i = width; i >= 0; i--)
3999 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4000 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4001 if (memory_address_addr_space_p (mem_mode, addr, as))
4002 break;
4003 /* For some strict-alignment targets, the offset must be naturally
4004 aligned. Try an aligned offset if mem_mode is not QImode. */
4005 off = mem_mode != QImode
4006 ? ((unsigned HOST_WIDE_INT) 1 << i)
4007 - GET_MODE_SIZE (mem_mode)
4008 : 0;
4009 if (off > 0)
4011 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4012 if (memory_address_addr_space_p (mem_mode, addr, as))
4013 break;
4016 if (i == -1)
4017 off = 0;
4018 data->max_offset = off;
4020 if (dump_file && (dump_flags & TDF_DETAILS))
4022 fprintf (dump_file, "get_address_cost:\n");
4023 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4024 GET_MODE_NAME (mem_mode),
4025 data->min_offset);
4026 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4027 GET_MODE_NAME (mem_mode),
4028 data->max_offset);
4031 rat = 1;
4032 for (i = 2; i <= MAX_RATIO; i++)
4033 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4035 rat = i;
4036 break;
4039 /* Compute the cost of various addressing modes. */
4040 acost = 0;
4041 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4042 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4044 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4045 || USE_STORE_PRE_DECREMENT (mem_mode))
4047 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4048 has_predec[mem_mode]
4049 = memory_address_addr_space_p (mem_mode, addr, as);
4051 if (has_predec[mem_mode])
4052 data->ainc_costs[AINC_PRE_DEC]
4053 = address_cost (addr, mem_mode, as, speed);
4055 if (USE_LOAD_POST_DECREMENT (mem_mode)
4056 || USE_STORE_POST_DECREMENT (mem_mode))
4058 addr = gen_rtx_POST_DEC (address_mode, reg0);
4059 has_postdec[mem_mode]
4060 = memory_address_addr_space_p (mem_mode, addr, as);
4062 if (has_postdec[mem_mode])
4063 data->ainc_costs[AINC_POST_DEC]
4064 = address_cost (addr, mem_mode, as, speed);
4066 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4067 || USE_STORE_PRE_DECREMENT (mem_mode))
4069 addr = gen_rtx_PRE_INC (address_mode, reg0);
4070 has_preinc[mem_mode]
4071 = memory_address_addr_space_p (mem_mode, addr, as);
4073 if (has_preinc[mem_mode])
4074 data->ainc_costs[AINC_PRE_INC]
4075 = address_cost (addr, mem_mode, as, speed);
4077 if (USE_LOAD_POST_INCREMENT (mem_mode)
4078 || USE_STORE_POST_INCREMENT (mem_mode))
4080 addr = gen_rtx_POST_INC (address_mode, reg0);
4081 has_postinc[mem_mode]
4082 = memory_address_addr_space_p (mem_mode, addr, as);
4084 if (has_postinc[mem_mode])
4085 data->ainc_costs[AINC_POST_INC]
4086 = address_cost (addr, mem_mode, as, speed);
4088 for (i = 0; i < 16; i++)
4090 sym_p = i & 1;
4091 var_p = (i >> 1) & 1;
4092 off_p = (i >> 2) & 1;
4093 rat_p = (i >> 3) & 1;
4095 addr = reg0;
4096 if (rat_p)
4097 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4098 gen_int_mode (rat, address_mode));
4100 if (var_p)
4101 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4103 if (sym_p)
4105 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4106 /* ??? We can run into trouble with some backends by presenting
4107 it with symbols which haven't been properly passed through
4108 targetm.encode_section_info. By setting the local bit, we
4109 enhance the probability of things working. */
4110 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4112 if (off_p)
4113 base = gen_rtx_fmt_e (CONST, address_mode,
4114 gen_rtx_fmt_ee
4115 (PLUS, address_mode, base,
4116 gen_int_mode (off, address_mode)));
4118 else if (off_p)
4119 base = gen_int_mode (off, address_mode);
4120 else
4121 base = NULL_RTX;
4123 if (base)
4124 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4126 start_sequence ();
4127 /* To avoid splitting addressing modes, pretend that no cse will
4128 follow. */
4129 old_cse_not_expected = cse_not_expected;
4130 cse_not_expected = true;
4131 addr = memory_address_addr_space (mem_mode, addr, as);
4132 cse_not_expected = old_cse_not_expected;
4133 seq = get_insns ();
4134 end_sequence ();
4136 acost = seq_cost (seq, speed);
4137 acost += address_cost (addr, mem_mode, as, speed);
4139 if (!acost)
4140 acost = 1;
4141 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4144 /* On some targets, it is quite expensive to load symbol to a register,
4145 which makes addresses that contain symbols look much more expensive.
4146 However, the symbol will have to be loaded in any case before the
4147 loop (and quite likely we have it in register already), so it does not
4148 make much sense to penalize them too heavily. So make some final
4149 tweaks for the SYMBOL_PRESENT modes:
4151 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4152 var is cheaper, use this mode with small penalty.
4153 If VAR_PRESENT is true, try whether the mode with
4154 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4155 if this is the case, use it. */
4156 add_c = add_cost (speed, address_mode);
4157 for (i = 0; i < 8; i++)
4159 var_p = i & 1;
4160 off_p = (i >> 1) & 1;
4161 rat_p = (i >> 2) & 1;
4163 acost = data->costs[0][1][off_p][rat_p] + 1;
4164 if (var_p)
4165 acost += add_c;
4167 if (acost < data->costs[1][var_p][off_p][rat_p])
4168 data->costs[1][var_p][off_p][rat_p] = acost;
4171 if (dump_file && (dump_flags & TDF_DETAILS))
4173 fprintf (dump_file, "Address costs:\n");
4175 for (i = 0; i < 16; i++)
4177 sym_p = i & 1;
4178 var_p = (i >> 1) & 1;
4179 off_p = (i >> 2) & 1;
4180 rat_p = (i >> 3) & 1;
4182 fprintf (dump_file, " ");
4183 if (sym_p)
4184 fprintf (dump_file, "sym + ");
4185 if (var_p)
4186 fprintf (dump_file, "var + ");
4187 if (off_p)
4188 fprintf (dump_file, "cst + ");
4189 if (rat_p)
4190 fprintf (dump_file, "rat * ");
4192 acost = data->costs[sym_p][var_p][off_p][rat_p];
4193 fprintf (dump_file, "index costs %d\n", acost);
4195 if (has_predec[mem_mode] || has_postdec[mem_mode]
4196 || has_preinc[mem_mode] || has_postinc[mem_mode])
4197 fprintf (dump_file, " May include autoinc/dec\n");
4198 fprintf (dump_file, "\n");
4201 address_cost_data_list[data_index] = data;
4204 bits = GET_MODE_BITSIZE (address_mode);
4205 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4206 offset &= mask;
4207 if ((offset >> (bits - 1) & 1))
4208 offset |= ~mask;
4209 s_offset = offset;
4211 autoinc = false;
4212 autoinc_type = AINC_NONE;
4213 msize = GET_MODE_SIZE (mem_mode);
4214 autoinc_offset = offset;
4215 if (stmt_after_inc)
4216 autoinc_offset += ratio * cstep;
4217 if (symbol_present || var_present || ratio != 1)
4218 autoinc = false;
4219 else
4221 if (has_postinc[mem_mode] && autoinc_offset == 0
4222 && msize == cstep)
4223 autoinc_type = AINC_POST_INC;
4224 else if (has_postdec[mem_mode] && autoinc_offset == 0
4225 && msize == -cstep)
4226 autoinc_type = AINC_POST_DEC;
4227 else if (has_preinc[mem_mode] && autoinc_offset == msize
4228 && msize == cstep)
4229 autoinc_type = AINC_PRE_INC;
4230 else if (has_predec[mem_mode] && autoinc_offset == -msize
4231 && msize == -cstep)
4232 autoinc_type = AINC_PRE_DEC;
4234 if (autoinc_type != AINC_NONE)
4235 autoinc = true;
4238 cost = 0;
4239 offset_p = (s_offset != 0
4240 && data->min_offset <= s_offset
4241 && s_offset <= data->max_offset);
4242 ratio_p = (ratio != 1
4243 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4245 if (ratio != 1 && !ratio_p)
4246 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4248 if (s_offset && !offset_p && !symbol_present)
4249 cost += add_cost (speed, address_mode);
4251 if (may_autoinc)
4252 *may_autoinc = autoinc;
4253 if (autoinc)
4254 acost = data->ainc_costs[autoinc_type];
4255 else
4256 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4257 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4258 return new_cost (cost + acost, complexity);
4261 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4262 EXPR operand holding the shift. COST0 and COST1 are the costs for
4263 calculating the operands of EXPR. Returns true if successful, and returns
4264 the cost in COST. */
4266 static bool
4267 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4268 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4270 comp_cost res;
4271 tree op1 = TREE_OPERAND (expr, 1);
4272 tree cst = TREE_OPERAND (mult, 1);
4273 tree multop = TREE_OPERAND (mult, 0);
4274 int m = exact_log2 (int_cst_value (cst));
4275 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4276 int as_cost, sa_cost;
4277 bool mult_in_op1;
4279 if (!(m >= 0 && m < maxm))
4280 return false;
4282 STRIP_NOPS (op1);
4283 mult_in_op1 = operand_equal_p (op1, mult, 0);
4285 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4287 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4288 use that in preference to a shift insn followed by an add insn. */
4289 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4290 ? shiftadd_cost (speed, mode, m)
4291 : (mult_in_op1
4292 ? shiftsub1_cost (speed, mode, m)
4293 : shiftsub0_cost (speed, mode, m)));
4295 res = new_cost (MIN (as_cost, sa_cost), 0);
4296 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4298 STRIP_NOPS (multop);
4299 if (!is_gimple_val (multop))
4300 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4302 *cost = res;
4303 return true;
4306 /* Estimates cost of forcing expression EXPR into a variable. */
4308 static comp_cost
4309 force_expr_to_var_cost (tree expr, bool speed)
4311 static bool costs_initialized = false;
4312 static unsigned integer_cost [2];
4313 static unsigned symbol_cost [2];
4314 static unsigned address_cost [2];
4315 tree op0, op1;
4316 comp_cost cost0, cost1, cost;
4317 machine_mode mode;
4319 if (!costs_initialized)
4321 tree type = build_pointer_type (integer_type_node);
4322 tree var, addr;
4323 rtx x;
4324 int i;
4326 var = create_tmp_var_raw (integer_type_node, "test_var");
4327 TREE_STATIC (var) = 1;
4328 x = produce_memory_decl_rtl (var, NULL);
4329 SET_DECL_RTL (var, x);
4331 addr = build1 (ADDR_EXPR, type, var);
4334 for (i = 0; i < 2; i++)
4336 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4337 2000), i);
4339 symbol_cost[i] = computation_cost (addr, i) + 1;
4341 address_cost[i]
4342 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4343 if (dump_file && (dump_flags & TDF_DETAILS))
4345 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4346 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4347 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4348 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4349 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4350 fprintf (dump_file, "\n");
4354 costs_initialized = true;
4357 STRIP_NOPS (expr);
4359 if (SSA_VAR_P (expr))
4360 return no_cost;
4362 if (is_gimple_min_invariant (expr))
4364 if (TREE_CODE (expr) == INTEGER_CST)
4365 return new_cost (integer_cost [speed], 0);
4367 if (TREE_CODE (expr) == ADDR_EXPR)
4369 tree obj = TREE_OPERAND (expr, 0);
4371 if (TREE_CODE (obj) == VAR_DECL
4372 || TREE_CODE (obj) == PARM_DECL
4373 || TREE_CODE (obj) == RESULT_DECL)
4374 return new_cost (symbol_cost [speed], 0);
4377 return new_cost (address_cost [speed], 0);
4380 switch (TREE_CODE (expr))
4382 case POINTER_PLUS_EXPR:
4383 case PLUS_EXPR:
4384 case MINUS_EXPR:
4385 case MULT_EXPR:
4386 op0 = TREE_OPERAND (expr, 0);
4387 op1 = TREE_OPERAND (expr, 1);
4388 STRIP_NOPS (op0);
4389 STRIP_NOPS (op1);
4390 break;
4392 CASE_CONVERT:
4393 case NEGATE_EXPR:
4394 op0 = TREE_OPERAND (expr, 0);
4395 STRIP_NOPS (op0);
4396 op1 = NULL_TREE;
4397 break;
4399 default:
4400 /* Just an arbitrary value, FIXME. */
4401 return new_cost (target_spill_cost[speed], 0);
4404 if (op0 == NULL_TREE
4405 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4406 cost0 = no_cost;
4407 else
4408 cost0 = force_expr_to_var_cost (op0, speed);
4410 if (op1 == NULL_TREE
4411 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4412 cost1 = no_cost;
4413 else
4414 cost1 = force_expr_to_var_cost (op1, speed);
4416 mode = TYPE_MODE (TREE_TYPE (expr));
4417 switch (TREE_CODE (expr))
4419 case POINTER_PLUS_EXPR:
4420 case PLUS_EXPR:
4421 case MINUS_EXPR:
4422 case NEGATE_EXPR:
4423 cost = new_cost (add_cost (speed, mode), 0);
4424 if (TREE_CODE (expr) != NEGATE_EXPR)
4426 tree mult = NULL_TREE;
4427 comp_cost sa_cost;
4428 if (TREE_CODE (op1) == MULT_EXPR)
4429 mult = op1;
4430 else if (TREE_CODE (op0) == MULT_EXPR)
4431 mult = op0;
4433 if (mult != NULL_TREE
4434 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4435 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4436 speed, &sa_cost))
4437 return sa_cost;
4439 break;
4441 CASE_CONVERT:
4443 tree inner_mode, outer_mode;
4444 outer_mode = TREE_TYPE (expr);
4445 inner_mode = TREE_TYPE (op0);
4446 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4447 TYPE_MODE (inner_mode), speed), 0);
4449 break;
4451 case MULT_EXPR:
4452 if (cst_and_fits_in_hwi (op0))
4453 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4454 mode, speed), 0);
4455 else if (cst_and_fits_in_hwi (op1))
4456 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4457 mode, speed), 0);
4458 else
4459 return new_cost (target_spill_cost [speed], 0);
4460 break;
4462 default:
4463 gcc_unreachable ();
4466 cost = add_costs (cost, cost0);
4467 cost = add_costs (cost, cost1);
4469 /* Bound the cost by target_spill_cost. The parts of complicated
4470 computations often are either loop invariant or at least can
4471 be shared between several iv uses, so letting this grow without
4472 limits would not give reasonable results. */
4473 if (cost.cost > (int) target_spill_cost [speed])
4474 cost.cost = target_spill_cost [speed];
4476 return cost;
4479 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4480 invariants the computation depends on. */
4482 static comp_cost
4483 force_var_cost (struct ivopts_data *data,
4484 tree expr, bitmap *depends_on)
4486 if (depends_on)
4488 fd_ivopts_data = data;
4489 walk_tree (&expr, find_depends, depends_on, NULL);
4492 return force_expr_to_var_cost (expr, data->speed);
4495 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4496 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4497 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4498 invariants the computation depends on. */
4500 static comp_cost
4501 split_address_cost (struct ivopts_data *data,
4502 tree addr, bool *symbol_present, bool *var_present,
4503 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4505 tree core;
4506 HOST_WIDE_INT bitsize;
4507 HOST_WIDE_INT bitpos;
4508 tree toffset;
4509 machine_mode mode;
4510 int unsignedp, reversep, volatilep;
4512 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4513 &unsignedp, &reversep, &volatilep, false);
4515 if (toffset != 0
4516 || bitpos % BITS_PER_UNIT != 0
4517 || reversep
4518 || TREE_CODE (core) != VAR_DECL)
4520 *symbol_present = false;
4521 *var_present = true;
4522 fd_ivopts_data = data;
4523 if (depends_on)
4524 walk_tree (&addr, find_depends, depends_on, NULL);
4526 return new_cost (target_spill_cost[data->speed], 0);
4529 *offset += bitpos / BITS_PER_UNIT;
4530 if (TREE_STATIC (core)
4531 || DECL_EXTERNAL (core))
4533 *symbol_present = true;
4534 *var_present = false;
4535 return no_cost;
4538 *symbol_present = false;
4539 *var_present = true;
4540 return no_cost;
4543 /* Estimates cost of expressing difference of addresses E1 - E2 as
4544 var + symbol + offset. The value of offset is added to OFFSET,
4545 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4546 part is missing. DEPENDS_ON is a set of the invariants the computation
4547 depends on. */
4549 static comp_cost
4550 ptr_difference_cost (struct ivopts_data *data,
4551 tree e1, tree e2, bool *symbol_present, bool *var_present,
4552 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4554 HOST_WIDE_INT diff = 0;
4555 aff_tree aff_e1, aff_e2;
4556 tree type;
4558 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4560 if (ptr_difference_const (e1, e2, &diff))
4562 *offset += diff;
4563 *symbol_present = false;
4564 *var_present = false;
4565 return no_cost;
4568 if (integer_zerop (e2))
4569 return split_address_cost (data, TREE_OPERAND (e1, 0),
4570 symbol_present, var_present, offset, depends_on);
4572 *symbol_present = false;
4573 *var_present = true;
4575 type = signed_type_for (TREE_TYPE (e1));
4576 tree_to_aff_combination (e1, type, &aff_e1);
4577 tree_to_aff_combination (e2, type, &aff_e2);
4578 aff_combination_scale (&aff_e2, -1);
4579 aff_combination_add (&aff_e1, &aff_e2);
4581 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4584 /* Estimates cost of expressing difference E1 - E2 as
4585 var + symbol + offset. The value of offset is added to OFFSET,
4586 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4587 part is missing. DEPENDS_ON is a set of the invariants the computation
4588 depends on. */
4590 static comp_cost
4591 difference_cost (struct ivopts_data *data,
4592 tree e1, tree e2, bool *symbol_present, bool *var_present,
4593 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4595 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4596 unsigned HOST_WIDE_INT off1, off2;
4597 aff_tree aff_e1, aff_e2;
4598 tree type;
4600 e1 = strip_offset (e1, &off1);
4601 e2 = strip_offset (e2, &off2);
4602 *offset += off1 - off2;
4604 STRIP_NOPS (e1);
4605 STRIP_NOPS (e2);
4607 if (TREE_CODE (e1) == ADDR_EXPR)
4608 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4609 offset, depends_on);
4610 *symbol_present = false;
4612 if (operand_equal_p (e1, e2, 0))
4614 *var_present = false;
4615 return no_cost;
4618 *var_present = true;
4620 if (integer_zerop (e2))
4621 return force_var_cost (data, e1, depends_on);
4623 if (integer_zerop (e1))
4625 comp_cost cost = force_var_cost (data, e2, depends_on);
4626 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4627 return cost;
4630 type = signed_type_for (TREE_TYPE (e1));
4631 tree_to_aff_combination (e1, type, &aff_e1);
4632 tree_to_aff_combination (e2, type, &aff_e2);
4633 aff_combination_scale (&aff_e2, -1);
4634 aff_combination_add (&aff_e1, &aff_e2);
4636 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4639 /* Returns true if AFF1 and AFF2 are identical. */
4641 static bool
4642 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4644 unsigned i;
4646 if (aff1->n != aff2->n)
4647 return false;
4649 for (i = 0; i < aff1->n; i++)
4651 if (aff1->elts[i].coef != aff2->elts[i].coef)
4652 return false;
4654 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4655 return false;
4657 return true;
4660 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4662 static int
4663 get_expr_id (struct ivopts_data *data, tree expr)
4665 struct iv_inv_expr_ent ent;
4666 struct iv_inv_expr_ent **slot;
4668 ent.expr = expr;
4669 ent.hash = iterative_hash_expr (expr, 0);
4670 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4671 if (*slot)
4672 return (*slot)->id;
4674 *slot = XNEW (struct iv_inv_expr_ent);
4675 (*slot)->expr = expr;
4676 (*slot)->hash = ent.hash;
4677 (*slot)->id = data->inv_expr_id++;
4678 return (*slot)->id;
4681 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4682 requires a new compiler generated temporary. Returns -1 otherwise.
4683 ADDRESS_P is a flag indicating if the expression is for address
4684 computation. */
4686 static int
4687 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4688 tree cbase, HOST_WIDE_INT ratio,
4689 bool address_p)
4691 aff_tree ubase_aff, cbase_aff;
4692 tree expr, ub, cb;
4694 STRIP_NOPS (ubase);
4695 STRIP_NOPS (cbase);
4696 ub = ubase;
4697 cb = cbase;
4699 if ((TREE_CODE (ubase) == INTEGER_CST)
4700 && (TREE_CODE (cbase) == INTEGER_CST))
4701 return -1;
4703 /* Strips the constant part. */
4704 if (TREE_CODE (ubase) == PLUS_EXPR
4705 || TREE_CODE (ubase) == MINUS_EXPR
4706 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4708 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4709 ubase = TREE_OPERAND (ubase, 0);
4712 /* Strips the constant part. */
4713 if (TREE_CODE (cbase) == PLUS_EXPR
4714 || TREE_CODE (cbase) == MINUS_EXPR
4715 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4717 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4718 cbase = TREE_OPERAND (cbase, 0);
4721 if (address_p)
4723 if (((TREE_CODE (ubase) == SSA_NAME)
4724 || (TREE_CODE (ubase) == ADDR_EXPR
4725 && is_gimple_min_invariant (ubase)))
4726 && (TREE_CODE (cbase) == INTEGER_CST))
4727 return -1;
4729 if (((TREE_CODE (cbase) == SSA_NAME)
4730 || (TREE_CODE (cbase) == ADDR_EXPR
4731 && is_gimple_min_invariant (cbase)))
4732 && (TREE_CODE (ubase) == INTEGER_CST))
4733 return -1;
4736 if (ratio == 1)
4738 if (operand_equal_p (ubase, cbase, 0))
4739 return -1;
4741 if (TREE_CODE (ubase) == ADDR_EXPR
4742 && TREE_CODE (cbase) == ADDR_EXPR)
4744 tree usym, csym;
4746 usym = TREE_OPERAND (ubase, 0);
4747 csym = TREE_OPERAND (cbase, 0);
4748 if (TREE_CODE (usym) == ARRAY_REF)
4750 tree ind = TREE_OPERAND (usym, 1);
4751 if (TREE_CODE (ind) == INTEGER_CST
4752 && tree_fits_shwi_p (ind)
4753 && tree_to_shwi (ind) == 0)
4754 usym = TREE_OPERAND (usym, 0);
4756 if (TREE_CODE (csym) == ARRAY_REF)
4758 tree ind = TREE_OPERAND (csym, 1);
4759 if (TREE_CODE (ind) == INTEGER_CST
4760 && tree_fits_shwi_p (ind)
4761 && tree_to_shwi (ind) == 0)
4762 csym = TREE_OPERAND (csym, 0);
4764 if (operand_equal_p (usym, csym, 0))
4765 return -1;
4767 /* Now do more complex comparison */
4768 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4769 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4770 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4771 return -1;
4774 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4775 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4777 aff_combination_scale (&cbase_aff, -1 * ratio);
4778 aff_combination_add (&ubase_aff, &cbase_aff);
4779 expr = aff_combination_to_tree (&ubase_aff);
4780 return get_expr_id (data, expr);
4785 /* Determines the cost of the computation by that USE is expressed
4786 from induction variable CAND. If ADDRESS_P is true, we just need
4787 to create an address from it, otherwise we want to get it into
4788 register. A set of invariants we depend on is stored in
4789 DEPENDS_ON. AT is the statement at that the value is computed.
4790 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4791 addressing is likely. */
4793 static comp_cost
4794 get_computation_cost_at (struct ivopts_data *data,
4795 struct iv_use *use, struct iv_cand *cand,
4796 bool address_p, bitmap *depends_on, gimple *at,
4797 bool *can_autoinc,
4798 int *inv_expr_id)
4800 tree ubase = use->iv->base, ustep = use->iv->step;
4801 tree cbase, cstep;
4802 tree utype = TREE_TYPE (ubase), ctype;
4803 unsigned HOST_WIDE_INT cstepi, offset = 0;
4804 HOST_WIDE_INT ratio, aratio;
4805 bool var_present, symbol_present, stmt_is_after_inc;
4806 comp_cost cost;
4807 widest_int rat;
4808 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4809 machine_mode mem_mode = (address_p
4810 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4811 : VOIDmode);
4813 if (depends_on)
4814 *depends_on = NULL;
4816 /* Only consider real candidates. */
4817 if (!cand->iv)
4818 return infinite_cost;
4820 cbase = cand->iv->base;
4821 cstep = cand->iv->step;
4822 ctype = TREE_TYPE (cbase);
4824 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4826 /* We do not have a precision to express the values of use. */
4827 return infinite_cost;
4830 if (address_p
4831 || (use->iv->base_object
4832 && cand->iv->base_object
4833 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4834 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4836 /* Do not try to express address of an object with computation based
4837 on address of a different object. This may cause problems in rtl
4838 level alias analysis (that does not expect this to be happening,
4839 as this is illegal in C), and would be unlikely to be useful
4840 anyway. */
4841 if (use->iv->base_object
4842 && cand->iv->base_object
4843 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4844 return infinite_cost;
4847 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4849 /* TODO -- add direct handling of this case. */
4850 goto fallback;
4853 /* CSTEPI is removed from the offset in case statement is after the
4854 increment. If the step is not constant, we use zero instead.
4855 This is a bit imprecise (there is the extra addition), but
4856 redundancy elimination is likely to transform the code so that
4857 it uses value of the variable before increment anyway,
4858 so it is not that much unrealistic. */
4859 if (cst_and_fits_in_hwi (cstep))
4860 cstepi = int_cst_value (cstep);
4861 else
4862 cstepi = 0;
4864 if (!constant_multiple_of (ustep, cstep, &rat))
4865 return infinite_cost;
4867 if (wi::fits_shwi_p (rat))
4868 ratio = rat.to_shwi ();
4869 else
4870 return infinite_cost;
4872 STRIP_NOPS (cbase);
4873 ctype = TREE_TYPE (cbase);
4875 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4877 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4878 or ratio == 1, it is better to handle this like
4880 ubase - ratio * cbase + ratio * var
4882 (also holds in the case ratio == -1, TODO. */
4884 if (cst_and_fits_in_hwi (cbase))
4886 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4887 cost = difference_cost (data,
4888 ubase, build_int_cst (utype, 0),
4889 &symbol_present, &var_present, &offset,
4890 depends_on);
4891 cost.cost /= avg_loop_niter (data->current_loop);
4893 else if (ratio == 1)
4895 tree real_cbase = cbase;
4897 /* Check to see if any adjustment is needed. */
4898 if (cstepi == 0 && stmt_is_after_inc)
4900 aff_tree real_cbase_aff;
4901 aff_tree cstep_aff;
4903 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4904 &real_cbase_aff);
4905 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4907 aff_combination_add (&real_cbase_aff, &cstep_aff);
4908 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4911 cost = difference_cost (data,
4912 ubase, real_cbase,
4913 &symbol_present, &var_present, &offset,
4914 depends_on);
4915 cost.cost /= avg_loop_niter (data->current_loop);
4917 else if (address_p
4918 && !POINTER_TYPE_P (ctype)
4919 && multiplier_allowed_in_address_p
4920 (ratio, mem_mode,
4921 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4923 if (cstepi == 0 && stmt_is_after_inc)
4925 if (POINTER_TYPE_P (ctype))
4926 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4927 else
4928 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4930 cbase
4931 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4932 cost = difference_cost (data,
4933 ubase, cbase,
4934 &symbol_present, &var_present, &offset,
4935 depends_on);
4936 cost.cost /= avg_loop_niter (data->current_loop);
4938 else
4940 cost = force_var_cost (data, cbase, depends_on);
4941 cost = add_costs (cost,
4942 difference_cost (data,
4943 ubase, build_int_cst (utype, 0),
4944 &symbol_present, &var_present,
4945 &offset, depends_on));
4946 cost.cost /= avg_loop_niter (data->current_loop);
4947 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4950 /* Set of invariants depended on by sub use has already been computed
4951 for the first use in the group. */
4952 if (use->sub_id)
4954 cost.cost = 0;
4955 if (depends_on && *depends_on)
4956 bitmap_clear (*depends_on);
4958 else if (inv_expr_id)
4960 *inv_expr_id =
4961 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4962 /* Clear depends on. */
4963 if (*inv_expr_id != -1 && depends_on && *depends_on)
4964 bitmap_clear (*depends_on);
4967 /* If we are after the increment, the value of the candidate is higher by
4968 one iteration. */
4969 if (stmt_is_after_inc)
4970 offset -= ratio * cstepi;
4972 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4973 (symbol/var1/const parts may be omitted). If we are looking for an
4974 address, find the cost of addressing this. */
4975 if (address_p)
4976 return add_costs (cost,
4977 get_address_cost (symbol_present, var_present,
4978 offset, ratio, cstepi,
4979 mem_mode,
4980 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4981 speed, stmt_is_after_inc,
4982 can_autoinc));
4984 /* Otherwise estimate the costs for computing the expression. */
4985 if (!symbol_present && !var_present && !offset)
4987 if (ratio != 1)
4988 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4989 return cost;
4992 /* Symbol + offset should be compile-time computable so consider that they
4993 are added once to the variable, if present. */
4994 if (var_present && (symbol_present || offset))
4995 cost.cost += adjust_setup_cost (data,
4996 add_cost (speed, TYPE_MODE (ctype)));
4998 /* Having offset does not affect runtime cost in case it is added to
4999 symbol, but it increases complexity. */
5000 if (offset)
5001 cost.complexity++;
5003 cost.cost += add_cost (speed, TYPE_MODE (ctype));
5005 aratio = ratio > 0 ? ratio : -ratio;
5006 if (aratio != 1)
5007 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5008 return cost;
5010 fallback:
5011 if (can_autoinc)
5012 *can_autoinc = false;
5015 /* Just get the expression, expand it and measure the cost. */
5016 tree comp = get_computation_at (data->current_loop, use, cand, at);
5018 if (!comp)
5019 return infinite_cost;
5021 if (address_p)
5022 comp = build_simple_mem_ref (comp);
5024 return new_cost (computation_cost (comp, speed), 0);
5028 /* Determines the cost of the computation by that USE is expressed
5029 from induction variable CAND. If ADDRESS_P is true, we just need
5030 to create an address from it, otherwise we want to get it into
5031 register. A set of invariants we depend on is stored in
5032 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5033 autoinc addressing is likely. */
5035 static comp_cost
5036 get_computation_cost (struct ivopts_data *data,
5037 struct iv_use *use, struct iv_cand *cand,
5038 bool address_p, bitmap *depends_on,
5039 bool *can_autoinc, int *inv_expr_id)
5041 return get_computation_cost_at (data,
5042 use, cand, address_p, depends_on, use->stmt,
5043 can_autoinc, inv_expr_id);
5046 /* Determines cost of basing replacement of USE on CAND in a generic
5047 expression. */
5049 static bool
5050 determine_use_iv_cost_generic (struct ivopts_data *data,
5051 struct iv_use *use, struct iv_cand *cand)
5053 bitmap depends_on;
5054 comp_cost cost;
5055 int inv_expr_id = -1;
5057 /* The simple case first -- if we need to express value of the preserved
5058 original biv, the cost is 0. This also prevents us from counting the
5059 cost of increment twice -- once at this use and once in the cost of
5060 the candidate. */
5061 if (cand->pos == IP_ORIGINAL
5062 && cand->incremented_at == use->stmt)
5064 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5065 ERROR_MARK, -1);
5066 return true;
5069 cost = get_computation_cost (data, use, cand, false, &depends_on,
5070 NULL, &inv_expr_id);
5072 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5073 inv_expr_id);
5075 return !infinite_cost_p (cost);
5078 /* Determines cost of basing replacement of USE on CAND in an address. */
5080 static bool
5081 determine_use_iv_cost_address (struct ivopts_data *data,
5082 struct iv_use *use, struct iv_cand *cand)
5084 bitmap depends_on;
5085 bool can_autoinc;
5086 int inv_expr_id = -1;
5087 struct iv_use *sub_use;
5088 comp_cost sub_cost;
5089 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5090 &can_autoinc, &inv_expr_id);
5092 if (cand->ainc_use == use)
5094 if (can_autoinc)
5095 cost.cost -= cand->cost_step;
5096 /* If we generated the candidate solely for exploiting autoincrement
5097 opportunities, and it turns out it can't be used, set the cost to
5098 infinity to make sure we ignore it. */
5099 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5100 cost = infinite_cost;
5102 for (sub_use = use->next;
5103 sub_use && !infinite_cost_p (cost);
5104 sub_use = sub_use->next)
5106 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5107 &can_autoinc, NULL);
5108 cost = add_costs (cost, sub_cost);
5111 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5112 inv_expr_id);
5114 return !infinite_cost_p (cost);
5117 /* Computes value of candidate CAND at position AT in iteration NITER, and
5118 stores it to VAL. */
5120 static void
5121 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5122 aff_tree *val)
5124 aff_tree step, delta, nit;
5125 struct iv *iv = cand->iv;
5126 tree type = TREE_TYPE (iv->base);
5127 tree steptype = type;
5128 if (POINTER_TYPE_P (type))
5129 steptype = sizetype;
5130 steptype = unsigned_type_for (type);
5132 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5133 aff_combination_convert (&step, steptype);
5134 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5135 aff_combination_convert (&nit, steptype);
5136 aff_combination_mult (&nit, &step, &delta);
5137 if (stmt_after_increment (loop, cand, at))
5138 aff_combination_add (&delta, &step);
5140 tree_to_aff_combination (iv->base, type, val);
5141 if (!POINTER_TYPE_P (type))
5142 aff_combination_convert (val, steptype);
5143 aff_combination_add (val, &delta);
5146 /* Returns period of induction variable iv. */
5148 static tree
5149 iv_period (struct iv *iv)
5151 tree step = iv->step, period, type;
5152 tree pow2div;
5154 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5156 type = unsigned_type_for (TREE_TYPE (step));
5157 /* Period of the iv is lcm (step, type_range)/step -1,
5158 i.e., N*type_range/step - 1. Since type range is power
5159 of two, N == (step >> num_of_ending_zeros_binary (step),
5160 so the final result is
5162 (type_range >> num_of_ending_zeros_binary (step)) - 1
5165 pow2div = num_ending_zeros (step);
5167 period = build_low_bits_mask (type,
5168 (TYPE_PRECISION (type)
5169 - tree_to_uhwi (pow2div)));
5171 return period;
5174 /* Returns the comparison operator used when eliminating the iv USE. */
5176 static enum tree_code
5177 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5179 struct loop *loop = data->current_loop;
5180 basic_block ex_bb;
5181 edge exit;
5183 ex_bb = gimple_bb (use->stmt);
5184 exit = EDGE_SUCC (ex_bb, 0);
5185 if (flow_bb_inside_loop_p (loop, exit->dest))
5186 exit = EDGE_SUCC (ex_bb, 1);
5188 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5191 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5192 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5193 calculation is performed in non-wrapping type.
5195 TODO: More generally, we could test for the situation that
5196 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5197 This would require knowing the sign of OFFSET. */
5199 static bool
5200 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5202 enum tree_code code;
5203 tree e1, e2;
5204 aff_tree aff_e1, aff_e2, aff_offset;
5206 if (!nowrap_type_p (TREE_TYPE (base)))
5207 return false;
5209 base = expand_simple_operations (base);
5211 if (TREE_CODE (base) == SSA_NAME)
5213 gimple *stmt = SSA_NAME_DEF_STMT (base);
5215 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5216 return false;
5218 code = gimple_assign_rhs_code (stmt);
5219 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5220 return false;
5222 e1 = gimple_assign_rhs1 (stmt);
5223 e2 = gimple_assign_rhs2 (stmt);
5225 else
5227 code = TREE_CODE (base);
5228 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5229 return false;
5230 e1 = TREE_OPERAND (base, 0);
5231 e2 = TREE_OPERAND (base, 1);
5234 /* Use affine expansion as deeper inspection to prove the equality. */
5235 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5236 &aff_e2, &data->name_expansion_cache);
5237 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5238 &aff_offset, &data->name_expansion_cache);
5239 aff_combination_scale (&aff_offset, -1);
5240 switch (code)
5242 case PLUS_EXPR:
5243 aff_combination_add (&aff_e2, &aff_offset);
5244 if (aff_combination_zero_p (&aff_e2))
5245 return true;
5247 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5248 &aff_e1, &data->name_expansion_cache);
5249 aff_combination_add (&aff_e1, &aff_offset);
5250 return aff_combination_zero_p (&aff_e1);
5252 case POINTER_PLUS_EXPR:
5253 aff_combination_add (&aff_e2, &aff_offset);
5254 return aff_combination_zero_p (&aff_e2);
5256 default:
5257 return false;
5261 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5262 comparison with CAND. NITER describes the number of iterations of
5263 the loops. If successful, the comparison in COMP_P is altered accordingly.
5265 We aim to handle the following situation:
5267 sometype *base, *p;
5268 int a, b, i;
5270 i = a;
5271 p = p_0 = base + a;
5275 bla (*p);
5276 p++;
5277 i++;
5279 while (i < b);
5281 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5282 We aim to optimize this to
5284 p = p_0 = base + a;
5287 bla (*p);
5288 p++;
5290 while (p < p_0 - a + b);
5292 This preserves the correctness, since the pointer arithmetics does not
5293 overflow. More precisely:
5295 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5296 overflow in computing it or the values of p.
5297 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5298 overflow. To prove this, we use the fact that p_0 = base + a. */
5300 static bool
5301 iv_elimination_compare_lt (struct ivopts_data *data,
5302 struct iv_cand *cand, enum tree_code *comp_p,
5303 struct tree_niter_desc *niter)
5305 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5306 struct aff_tree nit, tmpa, tmpb;
5307 enum tree_code comp;
5308 HOST_WIDE_INT step;
5310 /* We need to know that the candidate induction variable does not overflow.
5311 While more complex analysis may be used to prove this, for now just
5312 check that the variable appears in the original program and that it
5313 is computed in a type that guarantees no overflows. */
5314 cand_type = TREE_TYPE (cand->iv->base);
5315 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5316 return false;
5318 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5319 the calculation of the BOUND could overflow, making the comparison
5320 invalid. */
5321 if (!data->loop_single_exit_p)
5322 return false;
5324 /* We need to be able to decide whether candidate is increasing or decreasing
5325 in order to choose the right comparison operator. */
5326 if (!cst_and_fits_in_hwi (cand->iv->step))
5327 return false;
5328 step = int_cst_value (cand->iv->step);
5330 /* Check that the number of iterations matches the expected pattern:
5331 a + 1 > b ? 0 : b - a - 1. */
5332 mbz = niter->may_be_zero;
5333 if (TREE_CODE (mbz) == GT_EXPR)
5335 /* Handle a + 1 > b. */
5336 tree op0 = TREE_OPERAND (mbz, 0);
5337 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5339 a = TREE_OPERAND (op0, 0);
5340 b = TREE_OPERAND (mbz, 1);
5342 else
5343 return false;
5345 else if (TREE_CODE (mbz) == LT_EXPR)
5347 tree op1 = TREE_OPERAND (mbz, 1);
5349 /* Handle b < a + 1. */
5350 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5352 a = TREE_OPERAND (op1, 0);
5353 b = TREE_OPERAND (mbz, 0);
5355 else
5356 return false;
5358 else
5359 return false;
5361 /* Expected number of iterations is B - A - 1. Check that it matches
5362 the actual number, i.e., that B - A - NITER = 1. */
5363 tree_to_aff_combination (niter->niter, nit_type, &nit);
5364 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5365 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5366 aff_combination_scale (&nit, -1);
5367 aff_combination_scale (&tmpa, -1);
5368 aff_combination_add (&tmpb, &tmpa);
5369 aff_combination_add (&tmpb, &nit);
5370 if (tmpb.n != 0 || tmpb.offset != 1)
5371 return false;
5373 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5374 overflow. */
5375 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5376 cand->iv->step,
5377 fold_convert (TREE_TYPE (cand->iv->step), a));
5378 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5379 return false;
5381 /* Determine the new comparison operator. */
5382 comp = step < 0 ? GT_EXPR : LT_EXPR;
5383 if (*comp_p == NE_EXPR)
5384 *comp_p = comp;
5385 else if (*comp_p == EQ_EXPR)
5386 *comp_p = invert_tree_comparison (comp, false);
5387 else
5388 gcc_unreachable ();
5390 return true;
5393 /* Check whether it is possible to express the condition in USE by comparison
5394 of candidate CAND. If so, store the value compared with to BOUND, and the
5395 comparison operator to COMP. */
5397 static bool
5398 may_eliminate_iv (struct ivopts_data *data,
5399 struct iv_use *use, struct iv_cand *cand, tree *bound,
5400 enum tree_code *comp)
5402 basic_block ex_bb;
5403 edge exit;
5404 tree period;
5405 struct loop *loop = data->current_loop;
5406 aff_tree bnd;
5407 struct tree_niter_desc *desc = NULL;
5409 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5410 return false;
5412 /* For now works only for exits that dominate the loop latch.
5413 TODO: extend to other conditions inside loop body. */
5414 ex_bb = gimple_bb (use->stmt);
5415 if (use->stmt != last_stmt (ex_bb)
5416 || gimple_code (use->stmt) != GIMPLE_COND
5417 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5418 return false;
5420 exit = EDGE_SUCC (ex_bb, 0);
5421 if (flow_bb_inside_loop_p (loop, exit->dest))
5422 exit = EDGE_SUCC (ex_bb, 1);
5423 if (flow_bb_inside_loop_p (loop, exit->dest))
5424 return false;
5426 desc = niter_for_exit (data, exit);
5427 if (!desc)
5428 return false;
5430 /* Determine whether we can use the variable to test the exit condition.
5431 This is the case iff the period of the induction variable is greater
5432 than the number of iterations for which the exit condition is true. */
5433 period = iv_period (cand->iv);
5435 /* If the number of iterations is constant, compare against it directly. */
5436 if (TREE_CODE (desc->niter) == INTEGER_CST)
5438 /* See cand_value_at. */
5439 if (stmt_after_increment (loop, cand, use->stmt))
5441 if (!tree_int_cst_lt (desc->niter, period))
5442 return false;
5444 else
5446 if (tree_int_cst_lt (period, desc->niter))
5447 return false;
5451 /* If not, and if this is the only possible exit of the loop, see whether
5452 we can get a conservative estimate on the number of iterations of the
5453 entire loop and compare against that instead. */
5454 else
5456 widest_int period_value, max_niter;
5458 max_niter = desc->max;
5459 if (stmt_after_increment (loop, cand, use->stmt))
5460 max_niter += 1;
5461 period_value = wi::to_widest (period);
5462 if (wi::gtu_p (max_niter, period_value))
5464 /* See if we can take advantage of inferred loop bound information. */
5465 if (data->loop_single_exit_p)
5467 if (!max_loop_iterations (loop, &max_niter))
5468 return false;
5469 /* The loop bound is already adjusted by adding 1. */
5470 if (wi::gtu_p (max_niter, period_value))
5471 return false;
5473 else
5474 return false;
5478 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5480 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5481 aff_combination_to_tree (&bnd));
5482 *comp = iv_elimination_compare (data, use);
5484 /* It is unlikely that computing the number of iterations using division
5485 would be more profitable than keeping the original induction variable. */
5486 if (expression_expensive_p (*bound))
5487 return false;
5489 /* Sometimes, it is possible to handle the situation that the number of
5490 iterations may be zero unless additional assumtions by using <
5491 instead of != in the exit condition.
5493 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5494 base the exit condition on it. However, that is often too
5495 expensive. */
5496 if (!integer_zerop (desc->may_be_zero))
5497 return iv_elimination_compare_lt (data, cand, comp, desc);
5499 return true;
5502 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5503 be copied, if it is used in the loop body and DATA->body_includes_call. */
5505 static int
5506 parm_decl_cost (struct ivopts_data *data, tree bound)
5508 tree sbound = bound;
5509 STRIP_NOPS (sbound);
5511 if (TREE_CODE (sbound) == SSA_NAME
5512 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5513 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5514 && data->body_includes_call)
5515 return COSTS_N_INSNS (1);
5517 return 0;
5520 /* Determines cost of basing replacement of USE on CAND in a condition. */
5522 static bool
5523 determine_use_iv_cost_condition (struct ivopts_data *data,
5524 struct iv_use *use, struct iv_cand *cand)
5526 tree bound = NULL_TREE;
5527 struct iv *cmp_iv;
5528 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5529 comp_cost elim_cost, express_cost, cost, bound_cost;
5530 bool ok;
5531 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5532 tree *control_var, *bound_cst;
5533 enum tree_code comp = ERROR_MARK;
5535 /* Only consider real candidates. */
5536 if (!cand->iv)
5538 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5539 ERROR_MARK, -1);
5540 return false;
5543 /* Try iv elimination. */
5544 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5546 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5547 if (elim_cost.cost == 0)
5548 elim_cost.cost = parm_decl_cost (data, bound);
5549 else if (TREE_CODE (bound) == INTEGER_CST)
5550 elim_cost.cost = 0;
5551 /* If we replace a loop condition 'i < n' with 'p < base + n',
5552 depends_on_elim will have 'base' and 'n' set, which implies
5553 that both 'base' and 'n' will be live during the loop. More likely,
5554 'base + n' will be loop invariant, resulting in only one live value
5555 during the loop. So in that case we clear depends_on_elim and set
5556 elim_inv_expr_id instead. */
5557 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5559 elim_inv_expr_id = get_expr_id (data, bound);
5560 bitmap_clear (depends_on_elim);
5562 /* The bound is a loop invariant, so it will be only computed
5563 once. */
5564 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5566 else
5567 elim_cost = infinite_cost;
5569 /* Try expressing the original giv. If it is compared with an invariant,
5570 note that we cannot get rid of it. */
5571 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5572 NULL, &cmp_iv);
5573 gcc_assert (ok);
5575 /* When the condition is a comparison of the candidate IV against
5576 zero, prefer this IV.
5578 TODO: The constant that we're subtracting from the cost should
5579 be target-dependent. This information should be added to the
5580 target costs for each backend. */
5581 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5582 && integer_zerop (*bound_cst)
5583 && (operand_equal_p (*control_var, cand->var_after, 0)
5584 || operand_equal_p (*control_var, cand->var_before, 0)))
5585 elim_cost.cost -= 1;
5587 express_cost = get_computation_cost (data, use, cand, false,
5588 &depends_on_express, NULL,
5589 &express_inv_expr_id);
5590 fd_ivopts_data = data;
5591 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5593 /* Count the cost of the original bound as well. */
5594 bound_cost = force_var_cost (data, *bound_cst, NULL);
5595 if (bound_cost.cost == 0)
5596 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5597 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5598 bound_cost.cost = 0;
5599 express_cost.cost += bound_cost.cost;
5601 /* Choose the better approach, preferring the eliminated IV. */
5602 if (compare_costs (elim_cost, express_cost) <= 0)
5604 cost = elim_cost;
5605 depends_on = depends_on_elim;
5606 depends_on_elim = NULL;
5607 inv_expr_id = elim_inv_expr_id;
5609 else
5611 cost = express_cost;
5612 depends_on = depends_on_express;
5613 depends_on_express = NULL;
5614 bound = NULL_TREE;
5615 comp = ERROR_MARK;
5616 inv_expr_id = express_inv_expr_id;
5619 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5621 if (depends_on_elim)
5622 BITMAP_FREE (depends_on_elim);
5623 if (depends_on_express)
5624 BITMAP_FREE (depends_on_express);
5626 return !infinite_cost_p (cost);
5629 /* Determines cost of basing replacement of USE on CAND. Returns false
5630 if USE cannot be based on CAND. */
5632 static bool
5633 determine_use_iv_cost (struct ivopts_data *data,
5634 struct iv_use *use, struct iv_cand *cand)
5636 switch (use->type)
5638 case USE_NONLINEAR_EXPR:
5639 return determine_use_iv_cost_generic (data, use, cand);
5641 case USE_ADDRESS:
5642 return determine_use_iv_cost_address (data, use, cand);
5644 case USE_COMPARE:
5645 return determine_use_iv_cost_condition (data, use, cand);
5647 default:
5648 gcc_unreachable ();
5652 /* Return true if get_computation_cost indicates that autoincrement is
5653 a possibility for the pair of USE and CAND, false otherwise. */
5655 static bool
5656 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5657 struct iv_cand *cand)
5659 bitmap depends_on;
5660 bool can_autoinc;
5661 comp_cost cost;
5663 if (use->type != USE_ADDRESS)
5664 return false;
5666 cost = get_computation_cost (data, use, cand, true, &depends_on,
5667 &can_autoinc, NULL);
5669 BITMAP_FREE (depends_on);
5671 return !infinite_cost_p (cost) && can_autoinc;
5674 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5675 use that allows autoincrement, and set their AINC_USE if possible. */
5677 static void
5678 set_autoinc_for_original_candidates (struct ivopts_data *data)
5680 unsigned i, j;
5682 for (i = 0; i < n_iv_cands (data); i++)
5684 struct iv_cand *cand = iv_cand (data, i);
5685 struct iv_use *closest_before = NULL;
5686 struct iv_use *closest_after = NULL;
5687 if (cand->pos != IP_ORIGINAL)
5688 continue;
5690 for (j = 0; j < n_iv_uses (data); j++)
5692 struct iv_use *use = iv_use (data, j);
5693 unsigned uid = gimple_uid (use->stmt);
5695 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5696 continue;
5698 if (uid < gimple_uid (cand->incremented_at)
5699 && (closest_before == NULL
5700 || uid > gimple_uid (closest_before->stmt)))
5701 closest_before = use;
5703 if (uid > gimple_uid (cand->incremented_at)
5704 && (closest_after == NULL
5705 || uid < gimple_uid (closest_after->stmt)))
5706 closest_after = use;
5709 if (closest_before != NULL
5710 && autoinc_possible_for_pair (data, closest_before, cand))
5711 cand->ainc_use = closest_before;
5712 else if (closest_after != NULL
5713 && autoinc_possible_for_pair (data, closest_after, cand))
5714 cand->ainc_use = closest_after;
5718 /* Finds the candidates for the induction variables. */
5720 static void
5721 find_iv_candidates (struct ivopts_data *data)
5723 /* Add commonly used ivs. */
5724 add_standard_iv_candidates (data);
5726 /* Add old induction variables. */
5727 add_iv_candidate_for_bivs (data);
5729 /* Add induction variables derived from uses. */
5730 add_iv_candidate_for_uses (data);
5732 set_autoinc_for_original_candidates (data);
5734 /* Record the important candidates. */
5735 record_important_candidates (data);
5738 /* Determines costs of basing the use of the iv on an iv candidate. */
5740 static void
5741 determine_use_iv_costs (struct ivopts_data *data)
5743 unsigned i, j;
5744 struct iv_use *use;
5745 struct iv_cand *cand;
5746 bitmap to_clear = BITMAP_ALLOC (NULL);
5748 alloc_use_cost_map (data);
5750 for (i = 0; i < n_iv_uses (data); i++)
5752 use = iv_use (data, i);
5754 if (data->consider_all_candidates)
5756 for (j = 0; j < n_iv_cands (data); j++)
5758 cand = iv_cand (data, j);
5759 determine_use_iv_cost (data, use, cand);
5762 else
5764 bitmap_iterator bi;
5766 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5768 cand = iv_cand (data, j);
5769 if (!determine_use_iv_cost (data, use, cand))
5770 bitmap_set_bit (to_clear, j);
5773 /* Remove the candidates for that the cost is infinite from
5774 the list of related candidates. */
5775 bitmap_and_compl_into (use->related_cands, to_clear);
5776 bitmap_clear (to_clear);
5780 BITMAP_FREE (to_clear);
5782 if (dump_file && (dump_flags & TDF_DETAILS))
5784 fprintf (dump_file, "Use-candidate costs:\n");
5786 for (i = 0; i < n_iv_uses (data); i++)
5788 use = iv_use (data, i);
5790 fprintf (dump_file, "Use %d:\n", i);
5791 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5792 for (j = 0; j < use->n_map_members; j++)
5794 if (!use->cost_map[j].cand
5795 || infinite_cost_p (use->cost_map[j].cost))
5796 continue;
5798 fprintf (dump_file, " %d\t%d\t%d\t",
5799 use->cost_map[j].cand->id,
5800 use->cost_map[j].cost.cost,
5801 use->cost_map[j].cost.complexity);
5802 if (use->cost_map[j].depends_on)
5803 bitmap_print (dump_file,
5804 use->cost_map[j].depends_on, "","");
5805 if (use->cost_map[j].inv_expr_id != -1)
5806 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5807 fprintf (dump_file, "\n");
5810 fprintf (dump_file, "\n");
5812 fprintf (dump_file, "\n");
5816 /* Determines cost of the candidate CAND. */
5818 static void
5819 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5821 comp_cost cost_base;
5822 unsigned cost, cost_step;
5823 tree base;
5825 if (!cand->iv)
5827 cand->cost = 0;
5828 return;
5831 /* There are two costs associated with the candidate -- its increment
5832 and its initialization. The second is almost negligible for any loop
5833 that rolls enough, so we take it just very little into account. */
5835 base = cand->iv->base;
5836 cost_base = force_var_cost (data, base, NULL);
5837 /* It will be exceptional that the iv register happens to be initialized with
5838 the proper value at no cost. In general, there will at least be a regcopy
5839 or a const set. */
5840 if (cost_base.cost == 0)
5841 cost_base.cost = COSTS_N_INSNS (1);
5842 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5844 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5846 /* Prefer the original ivs unless we may gain something by replacing it.
5847 The reason is to make debugging simpler; so this is not relevant for
5848 artificial ivs created by other optimization passes. */
5849 if (cand->pos != IP_ORIGINAL
5850 || !SSA_NAME_VAR (cand->var_before)
5851 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5852 cost++;
5854 /* Prefer not to insert statements into latch unless there are some
5855 already (so that we do not create unnecessary jumps). */
5856 if (cand->pos == IP_END
5857 && empty_block_p (ip_end_pos (data->current_loop)))
5858 cost++;
5860 cand->cost = cost;
5861 cand->cost_step = cost_step;
5864 /* Determines costs of computation of the candidates. */
5866 static void
5867 determine_iv_costs (struct ivopts_data *data)
5869 unsigned i;
5871 if (dump_file && (dump_flags & TDF_DETAILS))
5873 fprintf (dump_file, "Candidate costs:\n");
5874 fprintf (dump_file, " cand\tcost\n");
5877 for (i = 0; i < n_iv_cands (data); i++)
5879 struct iv_cand *cand = iv_cand (data, i);
5881 determine_iv_cost (data, cand);
5883 if (dump_file && (dump_flags & TDF_DETAILS))
5884 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5887 if (dump_file && (dump_flags & TDF_DETAILS))
5888 fprintf (dump_file, "\n");
5891 /* Calculates cost for having SIZE induction variables. */
5893 static unsigned
5894 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5896 /* We add size to the cost, so that we prefer eliminating ivs
5897 if possible. */
5898 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5899 data->body_includes_call);
5902 /* For each size of the induction variable set determine the penalty. */
5904 static void
5905 determine_set_costs (struct ivopts_data *data)
5907 unsigned j, n;
5908 gphi *phi;
5909 gphi_iterator psi;
5910 tree op;
5911 struct loop *loop = data->current_loop;
5912 bitmap_iterator bi;
5914 if (dump_file && (dump_flags & TDF_DETAILS))
5916 fprintf (dump_file, "Global costs:\n");
5917 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5918 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5919 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5920 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5923 n = 0;
5924 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5926 phi = psi.phi ();
5927 op = PHI_RESULT (phi);
5929 if (virtual_operand_p (op))
5930 continue;
5932 if (get_iv (data, op))
5933 continue;
5935 n++;
5938 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5940 struct version_info *info = ver_info (data, j);
5942 if (info->inv_id && info->has_nonlin_use)
5943 n++;
5946 data->regs_used = n;
5947 if (dump_file && (dump_flags & TDF_DETAILS))
5948 fprintf (dump_file, " regs_used %d\n", n);
5950 if (dump_file && (dump_flags & TDF_DETAILS))
5952 fprintf (dump_file, " cost for size:\n");
5953 fprintf (dump_file, " ivs\tcost\n");
5954 for (j = 0; j <= 2 * target_avail_regs; j++)
5955 fprintf (dump_file, " %d\t%d\n", j,
5956 ivopts_global_cost_for_size (data, j));
5957 fprintf (dump_file, "\n");
5961 /* Returns true if A is a cheaper cost pair than B. */
5963 static bool
5964 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5966 int cmp;
5968 if (!a)
5969 return false;
5971 if (!b)
5972 return true;
5974 cmp = compare_costs (a->cost, b->cost);
5975 if (cmp < 0)
5976 return true;
5978 if (cmp > 0)
5979 return false;
5981 /* In case the costs are the same, prefer the cheaper candidate. */
5982 if (a->cand->cost < b->cand->cost)
5983 return true;
5985 return false;
5989 /* Returns candidate by that USE is expressed in IVS. */
5991 static struct cost_pair *
5992 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5994 return ivs->cand_for_use[use->id];
5997 /* Computes the cost field of IVS structure. */
5999 static void
6000 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6002 comp_cost cost = ivs->cand_use_cost;
6004 cost.cost += ivs->cand_cost;
6006 cost.cost += ivopts_global_cost_for_size (data,
6007 ivs->n_regs + ivs->num_used_inv_expr);
6009 ivs->cost = cost;
6012 /* Remove invariants in set INVS to set IVS. */
6014 static void
6015 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6017 bitmap_iterator bi;
6018 unsigned iid;
6020 if (!invs)
6021 return;
6023 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6025 ivs->n_invariant_uses[iid]--;
6026 if (ivs->n_invariant_uses[iid] == 0)
6027 ivs->n_regs--;
6031 /* Set USE not to be expressed by any candidate in IVS. */
6033 static void
6034 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6035 struct iv_use *use)
6037 unsigned uid = use->id, cid;
6038 struct cost_pair *cp;
6040 cp = ivs->cand_for_use[uid];
6041 if (!cp)
6042 return;
6043 cid = cp->cand->id;
6045 ivs->bad_uses++;
6046 ivs->cand_for_use[uid] = NULL;
6047 ivs->n_cand_uses[cid]--;
6049 if (ivs->n_cand_uses[cid] == 0)
6051 bitmap_clear_bit (ivs->cands, cid);
6052 /* Do not count the pseudocandidates. */
6053 if (cp->cand->iv)
6054 ivs->n_regs--;
6055 ivs->n_cands--;
6056 ivs->cand_cost -= cp->cand->cost;
6058 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6061 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6063 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6065 if (cp->inv_expr_id != -1)
6067 ivs->used_inv_expr[cp->inv_expr_id]--;
6068 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6069 ivs->num_used_inv_expr--;
6071 iv_ca_recount_cost (data, ivs);
6074 /* Add invariants in set INVS to set IVS. */
6076 static void
6077 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6079 bitmap_iterator bi;
6080 unsigned iid;
6082 if (!invs)
6083 return;
6085 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6087 ivs->n_invariant_uses[iid]++;
6088 if (ivs->n_invariant_uses[iid] == 1)
6089 ivs->n_regs++;
6093 /* Set cost pair for USE in set IVS to CP. */
6095 static void
6096 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6097 struct iv_use *use, struct cost_pair *cp)
6099 unsigned uid = use->id, cid;
6101 if (ivs->cand_for_use[uid] == cp)
6102 return;
6104 if (ivs->cand_for_use[uid])
6105 iv_ca_set_no_cp (data, ivs, use);
6107 if (cp)
6109 cid = cp->cand->id;
6111 ivs->bad_uses--;
6112 ivs->cand_for_use[uid] = cp;
6113 ivs->n_cand_uses[cid]++;
6114 if (ivs->n_cand_uses[cid] == 1)
6116 bitmap_set_bit (ivs->cands, cid);
6117 /* Do not count the pseudocandidates. */
6118 if (cp->cand->iv)
6119 ivs->n_regs++;
6120 ivs->n_cands++;
6121 ivs->cand_cost += cp->cand->cost;
6123 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6126 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6127 iv_ca_set_add_invariants (ivs, cp->depends_on);
6129 if (cp->inv_expr_id != -1)
6131 ivs->used_inv_expr[cp->inv_expr_id]++;
6132 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6133 ivs->num_used_inv_expr++;
6135 iv_ca_recount_cost (data, ivs);
6139 /* Extend set IVS by expressing USE by some of the candidates in it
6140 if possible. Consider all important candidates if candidates in
6141 set IVS don't give any result. */
6143 static void
6144 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6145 struct iv_use *use)
6147 struct cost_pair *best_cp = NULL, *cp;
6148 bitmap_iterator bi;
6149 unsigned i;
6150 struct iv_cand *cand;
6152 gcc_assert (ivs->upto >= use->id);
6153 ivs->upto++;
6154 ivs->bad_uses++;
6156 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6158 cand = iv_cand (data, i);
6159 cp = get_use_iv_cost (data, use, cand);
6160 if (cheaper_cost_pair (cp, best_cp))
6161 best_cp = cp;
6164 if (best_cp == NULL)
6166 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6168 cand = iv_cand (data, i);
6169 cp = get_use_iv_cost (data, use, cand);
6170 if (cheaper_cost_pair (cp, best_cp))
6171 best_cp = cp;
6175 iv_ca_set_cp (data, ivs, use, best_cp);
6178 /* Get cost for assignment IVS. */
6180 static comp_cost
6181 iv_ca_cost (struct iv_ca *ivs)
6183 /* This was a conditional expression but it triggered a bug in
6184 Sun C 5.5. */
6185 if (ivs->bad_uses)
6186 return infinite_cost;
6187 else
6188 return ivs->cost;
6191 /* Returns true if all dependences of CP are among invariants in IVS. */
6193 static bool
6194 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6196 unsigned i;
6197 bitmap_iterator bi;
6199 if (!cp->depends_on)
6200 return true;
6202 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6204 if (ivs->n_invariant_uses[i] == 0)
6205 return false;
6208 return true;
6211 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6212 it before NEXT_CHANGE. */
6214 static struct iv_ca_delta *
6215 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6216 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6218 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6220 change->use = use;
6221 change->old_cp = old_cp;
6222 change->new_cp = new_cp;
6223 change->next_change = next_change;
6225 return change;
6228 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6229 are rewritten. */
6231 static struct iv_ca_delta *
6232 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6234 struct iv_ca_delta *last;
6236 if (!l2)
6237 return l1;
6239 if (!l1)
6240 return l2;
6242 for (last = l1; last->next_change; last = last->next_change)
6243 continue;
6244 last->next_change = l2;
6246 return l1;
6249 /* Reverse the list of changes DELTA, forming the inverse to it. */
6251 static struct iv_ca_delta *
6252 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6254 struct iv_ca_delta *act, *next, *prev = NULL;
6256 for (act = delta; act; act = next)
6258 next = act->next_change;
6259 act->next_change = prev;
6260 prev = act;
6262 std::swap (act->old_cp, act->new_cp);
6265 return prev;
6268 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6269 reverted instead. */
6271 static void
6272 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6273 struct iv_ca_delta *delta, bool forward)
6275 struct cost_pair *from, *to;
6276 struct iv_ca_delta *act;
6278 if (!forward)
6279 delta = iv_ca_delta_reverse (delta);
6281 for (act = delta; act; act = act->next_change)
6283 from = act->old_cp;
6284 to = act->new_cp;
6285 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6286 iv_ca_set_cp (data, ivs, act->use, to);
6289 if (!forward)
6290 iv_ca_delta_reverse (delta);
6293 /* Returns true if CAND is used in IVS. */
6295 static bool
6296 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6298 return ivs->n_cand_uses[cand->id] > 0;
6301 /* Returns number of induction variable candidates in the set IVS. */
6303 static unsigned
6304 iv_ca_n_cands (struct iv_ca *ivs)
6306 return ivs->n_cands;
6309 /* Free the list of changes DELTA. */
6311 static void
6312 iv_ca_delta_free (struct iv_ca_delta **delta)
6314 struct iv_ca_delta *act, *next;
6316 for (act = *delta; act; act = next)
6318 next = act->next_change;
6319 free (act);
6322 *delta = NULL;
6325 /* Allocates new iv candidates assignment. */
6327 static struct iv_ca *
6328 iv_ca_new (struct ivopts_data *data)
6330 struct iv_ca *nw = XNEW (struct iv_ca);
6332 nw->upto = 0;
6333 nw->bad_uses = 0;
6334 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6335 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6336 nw->cands = BITMAP_ALLOC (NULL);
6337 nw->n_cands = 0;
6338 nw->n_regs = 0;
6339 nw->cand_use_cost = no_cost;
6340 nw->cand_cost = 0;
6341 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6342 nw->cost = no_cost;
6343 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6344 nw->num_used_inv_expr = 0;
6346 return nw;
6349 /* Free memory occupied by the set IVS. */
6351 static void
6352 iv_ca_free (struct iv_ca **ivs)
6354 free ((*ivs)->cand_for_use);
6355 free ((*ivs)->n_cand_uses);
6356 BITMAP_FREE ((*ivs)->cands);
6357 free ((*ivs)->n_invariant_uses);
6358 free ((*ivs)->used_inv_expr);
6359 free (*ivs);
6360 *ivs = NULL;
6363 /* Dumps IVS to FILE. */
6365 static void
6366 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6368 const char *pref = " invariants ";
6369 unsigned i;
6370 comp_cost cost = iv_ca_cost (ivs);
6372 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6373 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6374 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6375 bitmap_print (file, ivs->cands, " candidates: ","\n");
6377 for (i = 0; i < ivs->upto; i++)
6379 struct iv_use *use = iv_use (data, i);
6380 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6381 if (cp)
6382 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6383 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6384 else
6385 fprintf (file, " use:%d --> ??\n", use->id);
6388 for (i = 1; i <= data->max_inv_id; i++)
6389 if (ivs->n_invariant_uses[i])
6391 fprintf (file, "%s%d", pref, i);
6392 pref = ", ";
6394 fprintf (file, "\n\n");
6397 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6398 new set, and store differences in DELTA. Number of induction variables
6399 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6400 the function will try to find a solution with mimimal iv candidates. */
6402 static comp_cost
6403 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6404 struct iv_cand *cand, struct iv_ca_delta **delta,
6405 unsigned *n_ivs, bool min_ncand)
6407 unsigned i;
6408 comp_cost cost;
6409 struct iv_use *use;
6410 struct cost_pair *old_cp, *new_cp;
6412 *delta = NULL;
6413 for (i = 0; i < ivs->upto; i++)
6415 use = iv_use (data, i);
6416 old_cp = iv_ca_cand_for_use (ivs, use);
6418 if (old_cp
6419 && old_cp->cand == cand)
6420 continue;
6422 new_cp = get_use_iv_cost (data, use, cand);
6423 if (!new_cp)
6424 continue;
6426 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6427 continue;
6429 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6430 continue;
6432 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6435 iv_ca_delta_commit (data, ivs, *delta, true);
6436 cost = iv_ca_cost (ivs);
6437 if (n_ivs)
6438 *n_ivs = iv_ca_n_cands (ivs);
6439 iv_ca_delta_commit (data, ivs, *delta, false);
6441 return cost;
6444 /* Try narrowing set IVS by removing CAND. Return the cost of
6445 the new set and store the differences in DELTA. START is
6446 the candidate with which we start narrowing. */
6448 static comp_cost
6449 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6450 struct iv_cand *cand, struct iv_cand *start,
6451 struct iv_ca_delta **delta)
6453 unsigned i, ci;
6454 struct iv_use *use;
6455 struct cost_pair *old_cp, *new_cp, *cp;
6456 bitmap_iterator bi;
6457 struct iv_cand *cnd;
6458 comp_cost cost, best_cost, acost;
6460 *delta = NULL;
6461 for (i = 0; i < n_iv_uses (data); i++)
6463 use = iv_use (data, i);
6465 old_cp = iv_ca_cand_for_use (ivs, use);
6466 if (old_cp->cand != cand)
6467 continue;
6469 best_cost = iv_ca_cost (ivs);
6470 /* Start narrowing with START. */
6471 new_cp = get_use_iv_cost (data, use, start);
6473 if (data->consider_all_candidates)
6475 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6477 if (ci == cand->id || (start && ci == start->id))
6478 continue;
6480 cnd = iv_cand (data, ci);
6482 cp = get_use_iv_cost (data, use, cnd);
6483 if (!cp)
6484 continue;
6486 iv_ca_set_cp (data, ivs, use, cp);
6487 acost = iv_ca_cost (ivs);
6489 if (compare_costs (acost, best_cost) < 0)
6491 best_cost = acost;
6492 new_cp = cp;
6496 else
6498 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6500 if (ci == cand->id || (start && ci == start->id))
6501 continue;
6503 cnd = iv_cand (data, ci);
6505 cp = get_use_iv_cost (data, use, cnd);
6506 if (!cp)
6507 continue;
6509 iv_ca_set_cp (data, ivs, use, cp);
6510 acost = iv_ca_cost (ivs);
6512 if (compare_costs (acost, best_cost) < 0)
6514 best_cost = acost;
6515 new_cp = cp;
6519 /* Restore to old cp for use. */
6520 iv_ca_set_cp (data, ivs, use, old_cp);
6522 if (!new_cp)
6524 iv_ca_delta_free (delta);
6525 return infinite_cost;
6528 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6531 iv_ca_delta_commit (data, ivs, *delta, true);
6532 cost = iv_ca_cost (ivs);
6533 iv_ca_delta_commit (data, ivs, *delta, false);
6535 return cost;
6538 /* Try optimizing the set of candidates IVS by removing candidates different
6539 from to EXCEPT_CAND from it. Return cost of the new set, and store
6540 differences in DELTA. */
6542 static comp_cost
6543 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6544 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6546 bitmap_iterator bi;
6547 struct iv_ca_delta *act_delta, *best_delta;
6548 unsigned i;
6549 comp_cost best_cost, acost;
6550 struct iv_cand *cand;
6552 best_delta = NULL;
6553 best_cost = iv_ca_cost (ivs);
6555 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6557 cand = iv_cand (data, i);
6559 if (cand == except_cand)
6560 continue;
6562 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6564 if (compare_costs (acost, best_cost) < 0)
6566 best_cost = acost;
6567 iv_ca_delta_free (&best_delta);
6568 best_delta = act_delta;
6570 else
6571 iv_ca_delta_free (&act_delta);
6574 if (!best_delta)
6576 *delta = NULL;
6577 return best_cost;
6580 /* Recurse to possibly remove other unnecessary ivs. */
6581 iv_ca_delta_commit (data, ivs, best_delta, true);
6582 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6583 iv_ca_delta_commit (data, ivs, best_delta, false);
6584 *delta = iv_ca_delta_join (best_delta, *delta);
6585 return best_cost;
6588 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6589 cheaper local cost for USE than BEST_CP. Return pointer to
6590 the corresponding cost_pair, otherwise just return BEST_CP. */
6592 static struct cost_pair*
6593 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6594 unsigned int cand_idx, struct iv_cand *old_cand,
6595 struct cost_pair *best_cp)
6597 struct iv_cand *cand;
6598 struct cost_pair *cp;
6600 gcc_assert (old_cand != NULL && best_cp != NULL);
6601 if (cand_idx == old_cand->id)
6602 return best_cp;
6604 cand = iv_cand (data, cand_idx);
6605 cp = get_use_iv_cost (data, use, cand);
6606 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6607 return cp;
6609 return best_cp;
6612 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6613 which are used by more than one iv uses. For each of those candidates,
6614 this function tries to represent iv uses under that candidate using
6615 other ones with lower local cost, then tries to prune the new set.
6616 If the new set has lower cost, It returns the new cost after recording
6617 candidate replacement in list DELTA. */
6619 static comp_cost
6620 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6621 struct iv_ca_delta **delta)
6623 bitmap_iterator bi, bj;
6624 unsigned int i, j, k;
6625 struct iv_use *use;
6626 struct iv_cand *cand;
6627 comp_cost orig_cost, acost;
6628 struct iv_ca_delta *act_delta, *tmp_delta;
6629 struct cost_pair *old_cp, *best_cp = NULL;
6631 *delta = NULL;
6632 orig_cost = iv_ca_cost (ivs);
6634 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6636 if (ivs->n_cand_uses[i] == 1
6637 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6638 continue;
6640 cand = iv_cand (data, i);
6642 act_delta = NULL;
6643 /* Represent uses under current candidate using other ones with
6644 lower local cost. */
6645 for (j = 0; j < ivs->upto; j++)
6647 use = iv_use (data, j);
6648 old_cp = iv_ca_cand_for_use (ivs, use);
6650 if (old_cp->cand != cand)
6651 continue;
6653 best_cp = old_cp;
6654 if (data->consider_all_candidates)
6655 for (k = 0; k < n_iv_cands (data); k++)
6656 best_cp = cheaper_cost_with_cand (data, use, k,
6657 old_cp->cand, best_cp);
6658 else
6659 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6660 best_cp = cheaper_cost_with_cand (data, use, k,
6661 old_cp->cand, best_cp);
6663 if (best_cp == old_cp)
6664 continue;
6666 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6668 /* No need for further prune. */
6669 if (!act_delta)
6670 continue;
6672 /* Prune the new candidate set. */
6673 iv_ca_delta_commit (data, ivs, act_delta, true);
6674 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6675 iv_ca_delta_commit (data, ivs, act_delta, false);
6676 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6678 if (compare_costs (acost, orig_cost) < 0)
6680 *delta = act_delta;
6681 return acost;
6683 else
6684 iv_ca_delta_free (&act_delta);
6687 return orig_cost;
6690 /* Tries to extend the sets IVS in the best possible way in order
6691 to express the USE. If ORIGINALP is true, prefer candidates from
6692 the original set of IVs, otherwise favor important candidates not
6693 based on any memory object. */
6695 static bool
6696 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6697 struct iv_use *use, bool originalp)
6699 comp_cost best_cost, act_cost;
6700 unsigned i;
6701 bitmap_iterator bi;
6702 struct iv_cand *cand;
6703 struct iv_ca_delta *best_delta = NULL, *act_delta;
6704 struct cost_pair *cp;
6706 iv_ca_add_use (data, ivs, use);
6707 best_cost = iv_ca_cost (ivs);
6708 cp = iv_ca_cand_for_use (ivs, use);
6709 if (cp)
6711 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6712 iv_ca_set_no_cp (data, ivs, use);
6715 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6716 first try important candidates not based on any memory object. Only if
6717 this fails, try the specific ones. Rationale -- in loops with many
6718 variables the best choice often is to use just one generic biv. If we
6719 added here many ivs specific to the uses, the optimization algorithm later
6720 would be likely to get stuck in a local minimum, thus causing us to create
6721 too many ivs. The approach from few ivs to more seems more likely to be
6722 successful -- starting from few ivs, replacing an expensive use by a
6723 specific iv should always be a win. */
6724 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6726 cand = iv_cand (data, i);
6728 if (originalp && cand->pos !=IP_ORIGINAL)
6729 continue;
6731 if (!originalp && cand->iv->base_object != NULL_TREE)
6732 continue;
6734 if (iv_ca_cand_used_p (ivs, cand))
6735 continue;
6737 cp = get_use_iv_cost (data, use, cand);
6738 if (!cp)
6739 continue;
6741 iv_ca_set_cp (data, ivs, use, cp);
6742 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6743 true);
6744 iv_ca_set_no_cp (data, ivs, use);
6745 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6747 if (compare_costs (act_cost, best_cost) < 0)
6749 best_cost = act_cost;
6751 iv_ca_delta_free (&best_delta);
6752 best_delta = act_delta;
6754 else
6755 iv_ca_delta_free (&act_delta);
6758 if (infinite_cost_p (best_cost))
6760 for (i = 0; i < use->n_map_members; i++)
6762 cp = use->cost_map + i;
6763 cand = cp->cand;
6764 if (!cand)
6765 continue;
6767 /* Already tried this. */
6768 if (cand->important)
6770 if (originalp && cand->pos == IP_ORIGINAL)
6771 continue;
6772 if (!originalp && cand->iv->base_object == NULL_TREE)
6773 continue;
6776 if (iv_ca_cand_used_p (ivs, cand))
6777 continue;
6779 act_delta = NULL;
6780 iv_ca_set_cp (data, ivs, use, cp);
6781 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6782 iv_ca_set_no_cp (data, ivs, use);
6783 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6784 cp, act_delta);
6786 if (compare_costs (act_cost, best_cost) < 0)
6788 best_cost = act_cost;
6790 if (best_delta)
6791 iv_ca_delta_free (&best_delta);
6792 best_delta = act_delta;
6794 else
6795 iv_ca_delta_free (&act_delta);
6799 iv_ca_delta_commit (data, ivs, best_delta, true);
6800 iv_ca_delta_free (&best_delta);
6802 return !infinite_cost_p (best_cost);
6805 /* Finds an initial assignment of candidates to uses. */
6807 static struct iv_ca *
6808 get_initial_solution (struct ivopts_data *data, bool originalp)
6810 struct iv_ca *ivs = iv_ca_new (data);
6811 unsigned i;
6813 for (i = 0; i < n_iv_uses (data); i++)
6814 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6816 iv_ca_free (&ivs);
6817 return NULL;
6820 return ivs;
6823 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6824 points to a bool variable, this function tries to break local
6825 optimal fixed-point by replacing candidates in IVS if it's true. */
6827 static bool
6828 try_improve_iv_set (struct ivopts_data *data,
6829 struct iv_ca *ivs, bool *try_replace_p)
6831 unsigned i, n_ivs;
6832 comp_cost acost, best_cost = iv_ca_cost (ivs);
6833 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6834 struct iv_cand *cand;
6836 /* Try extending the set of induction variables by one. */
6837 for (i = 0; i < n_iv_cands (data); i++)
6839 cand = iv_cand (data, i);
6841 if (iv_ca_cand_used_p (ivs, cand))
6842 continue;
6844 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6845 if (!act_delta)
6846 continue;
6848 /* If we successfully added the candidate and the set is small enough,
6849 try optimizing it by removing other candidates. */
6850 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6852 iv_ca_delta_commit (data, ivs, act_delta, true);
6853 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6854 iv_ca_delta_commit (data, ivs, act_delta, false);
6855 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6858 if (compare_costs (acost, best_cost) < 0)
6860 best_cost = acost;
6861 iv_ca_delta_free (&best_delta);
6862 best_delta = act_delta;
6864 else
6865 iv_ca_delta_free (&act_delta);
6868 if (!best_delta)
6870 /* Try removing the candidates from the set instead. */
6871 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6873 if (!best_delta && *try_replace_p)
6875 *try_replace_p = false;
6876 /* So far candidate selecting algorithm tends to choose fewer IVs
6877 so that it can handle cases in which loops have many variables
6878 but the best choice is often to use only one general biv. One
6879 weakness is it can't handle opposite cases, in which different
6880 candidates should be chosen with respect to each use. To solve
6881 the problem, we replace candidates in a manner described by the
6882 comments of iv_ca_replace, thus give general algorithm a chance
6883 to break local optimal fixed-point in these cases. */
6884 best_cost = iv_ca_replace (data, ivs, &best_delta);
6887 if (!best_delta)
6888 return false;
6891 iv_ca_delta_commit (data, ivs, best_delta, true);
6892 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6893 iv_ca_delta_free (&best_delta);
6894 return true;
6897 /* Attempts to find the optimal set of induction variables. We do simple
6898 greedy heuristic -- we try to replace at most one candidate in the selected
6899 solution and remove the unused ivs while this improves the cost. */
6901 static struct iv_ca *
6902 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6904 struct iv_ca *set;
6905 bool try_replace_p = true;
6907 /* Get the initial solution. */
6908 set = get_initial_solution (data, originalp);
6909 if (!set)
6911 if (dump_file && (dump_flags & TDF_DETAILS))
6912 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6913 return NULL;
6916 if (dump_file && (dump_flags & TDF_DETAILS))
6918 fprintf (dump_file, "Initial set of candidates:\n");
6919 iv_ca_dump (data, dump_file, set);
6922 while (try_improve_iv_set (data, set, &try_replace_p))
6924 if (dump_file && (dump_flags & TDF_DETAILS))
6926 fprintf (dump_file, "Improved to:\n");
6927 iv_ca_dump (data, dump_file, set);
6931 return set;
6934 static struct iv_ca *
6935 find_optimal_iv_set (struct ivopts_data *data)
6937 unsigned i;
6938 struct iv_ca *set, *origset;
6939 struct iv_use *use;
6940 comp_cost cost, origcost;
6942 /* Determine the cost based on a strategy that starts with original IVs,
6943 and try again using a strategy that prefers candidates not based
6944 on any IVs. */
6945 origset = find_optimal_iv_set_1 (data, true);
6946 set = find_optimal_iv_set_1 (data, false);
6948 if (!origset && !set)
6949 return NULL;
6951 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6952 cost = set ? iv_ca_cost (set) : infinite_cost;
6954 if (dump_file && (dump_flags & TDF_DETAILS))
6956 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6957 origcost.cost, origcost.complexity);
6958 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6959 cost.cost, cost.complexity);
6962 /* Choose the one with the best cost. */
6963 if (compare_costs (origcost, cost) <= 0)
6965 if (set)
6966 iv_ca_free (&set);
6967 set = origset;
6969 else if (origset)
6970 iv_ca_free (&origset);
6972 for (i = 0; i < n_iv_uses (data); i++)
6974 use = iv_use (data, i);
6975 use->selected = iv_ca_cand_for_use (set, use)->cand;
6978 return set;
6981 /* Creates a new induction variable corresponding to CAND. */
6983 static void
6984 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6986 gimple_stmt_iterator incr_pos;
6987 tree base;
6988 bool after = false;
6990 if (!cand->iv)
6991 return;
6993 switch (cand->pos)
6995 case IP_NORMAL:
6996 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6997 break;
6999 case IP_END:
7000 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7001 after = true;
7002 break;
7004 case IP_AFTER_USE:
7005 after = true;
7006 /* fall through */
7007 case IP_BEFORE_USE:
7008 incr_pos = gsi_for_stmt (cand->incremented_at);
7009 break;
7011 case IP_ORIGINAL:
7012 /* Mark that the iv is preserved. */
7013 name_info (data, cand->var_before)->preserve_biv = true;
7014 name_info (data, cand->var_after)->preserve_biv = true;
7016 /* Rewrite the increment so that it uses var_before directly. */
7017 find_interesting_uses_op (data, cand->var_after)->selected = cand;
7018 return;
7021 gimple_add_tmp_var (cand->var_before);
7023 base = unshare_expr (cand->iv->base);
7025 create_iv (base, unshare_expr (cand->iv->step),
7026 cand->var_before, data->current_loop,
7027 &incr_pos, after, &cand->var_before, &cand->var_after);
7030 /* Creates new induction variables described in SET. */
7032 static void
7033 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7035 unsigned i;
7036 struct iv_cand *cand;
7037 bitmap_iterator bi;
7039 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7041 cand = iv_cand (data, i);
7042 create_new_iv (data, cand);
7045 if (dump_file && (dump_flags & TDF_DETAILS))
7047 fprintf (dump_file, "Selected IV set for loop %d",
7048 data->current_loop->num);
7049 if (data->loop_loc != UNKNOWN_LOCATION)
7050 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7051 LOCATION_LINE (data->loop_loc));
7052 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7053 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7055 cand = iv_cand (data, i);
7056 dump_cand (dump_file, cand);
7058 fprintf (dump_file, "\n");
7062 /* Rewrites USE (definition of iv used in a nonlinear expression)
7063 using candidate CAND. */
7065 static void
7066 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7067 struct iv_use *use, struct iv_cand *cand)
7069 tree comp;
7070 tree op, tgt;
7071 gassign *ass;
7072 gimple_stmt_iterator bsi;
7074 /* An important special case -- if we are asked to express value of
7075 the original iv by itself, just exit; there is no need to
7076 introduce a new computation (that might also need casting the
7077 variable to unsigned and back). */
7078 if (cand->pos == IP_ORIGINAL
7079 && cand->incremented_at == use->stmt)
7081 enum tree_code stmt_code;
7083 gcc_assert (is_gimple_assign (use->stmt));
7084 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7086 /* Check whether we may leave the computation unchanged.
7087 This is the case only if it does not rely on other
7088 computations in the loop -- otherwise, the computation
7089 we rely upon may be removed in remove_unused_ivs,
7090 thus leading to ICE. */
7091 stmt_code = gimple_assign_rhs_code (use->stmt);
7092 if (stmt_code == PLUS_EXPR
7093 || stmt_code == MINUS_EXPR
7094 || stmt_code == POINTER_PLUS_EXPR)
7096 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7097 op = gimple_assign_rhs2 (use->stmt);
7098 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7099 op = gimple_assign_rhs1 (use->stmt);
7100 else
7101 op = NULL_TREE;
7103 else
7104 op = NULL_TREE;
7106 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7107 return;
7110 comp = get_computation (data->current_loop, use, cand);
7111 gcc_assert (comp != NULL_TREE);
7113 switch (gimple_code (use->stmt))
7115 case GIMPLE_PHI:
7116 tgt = PHI_RESULT (use->stmt);
7118 /* If we should keep the biv, do not replace it. */
7119 if (name_info (data, tgt)->preserve_biv)
7120 return;
7122 bsi = gsi_after_labels (gimple_bb (use->stmt));
7123 break;
7125 case GIMPLE_ASSIGN:
7126 tgt = gimple_assign_lhs (use->stmt);
7127 bsi = gsi_for_stmt (use->stmt);
7128 break;
7130 default:
7131 gcc_unreachable ();
7134 if (!valid_gimple_rhs_p (comp)
7135 || (gimple_code (use->stmt) != GIMPLE_PHI
7136 /* We can't allow re-allocating the stmt as it might be pointed
7137 to still. */
7138 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7139 >= gimple_num_ops (gsi_stmt (bsi)))))
7141 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7142 true, GSI_SAME_STMT);
7143 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7145 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7146 /* As this isn't a plain copy we have to reset alignment
7147 information. */
7148 if (SSA_NAME_PTR_INFO (comp))
7149 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7153 if (gimple_code (use->stmt) == GIMPLE_PHI)
7155 ass = gimple_build_assign (tgt, comp);
7156 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7158 bsi = gsi_for_stmt (use->stmt);
7159 remove_phi_node (&bsi, false);
7161 else
7163 gimple_assign_set_rhs_from_tree (&bsi, comp);
7164 use->stmt = gsi_stmt (bsi);
7168 /* Performs a peephole optimization to reorder the iv update statement with
7169 a mem ref to enable instruction combining in later phases. The mem ref uses
7170 the iv value before the update, so the reordering transformation requires
7171 adjustment of the offset. CAND is the selected IV_CAND.
7173 Example:
7175 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7176 iv2 = iv1 + 1;
7178 if (t < val) (1)
7179 goto L;
7180 goto Head;
7183 directly propagating t over to (1) will introduce overlapping live range
7184 thus increase register pressure. This peephole transform it into:
7187 iv2 = iv1 + 1;
7188 t = MEM_REF (base, iv2, 8, 8);
7189 if (t < val)
7190 goto L;
7191 goto Head;
7194 static void
7195 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7197 tree var_after;
7198 gimple *iv_update, *stmt;
7199 basic_block bb;
7200 gimple_stmt_iterator gsi, gsi_iv;
7202 if (cand->pos != IP_NORMAL)
7203 return;
7205 var_after = cand->var_after;
7206 iv_update = SSA_NAME_DEF_STMT (var_after);
7208 bb = gimple_bb (iv_update);
7209 gsi = gsi_last_nondebug_bb (bb);
7210 stmt = gsi_stmt (gsi);
7212 /* Only handle conditional statement for now. */
7213 if (gimple_code (stmt) != GIMPLE_COND)
7214 return;
7216 gsi_prev_nondebug (&gsi);
7217 stmt = gsi_stmt (gsi);
7218 if (stmt != iv_update)
7219 return;
7221 gsi_prev_nondebug (&gsi);
7222 if (gsi_end_p (gsi))
7223 return;
7225 stmt = gsi_stmt (gsi);
7226 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7227 return;
7229 if (stmt != use->stmt)
7230 return;
7232 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7233 return;
7235 if (dump_file && (dump_flags & TDF_DETAILS))
7237 fprintf (dump_file, "Reordering \n");
7238 print_gimple_stmt (dump_file, iv_update, 0, 0);
7239 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7240 fprintf (dump_file, "\n");
7243 gsi = gsi_for_stmt (use->stmt);
7244 gsi_iv = gsi_for_stmt (iv_update);
7245 gsi_move_before (&gsi_iv, &gsi);
7247 cand->pos = IP_BEFORE_USE;
7248 cand->incremented_at = use->stmt;
7251 /* Rewrites USE (address that is an iv) using candidate CAND. */
7253 static void
7254 rewrite_use_address_1 (struct ivopts_data *data,
7255 struct iv_use *use, struct iv_cand *cand)
7257 aff_tree aff;
7258 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7259 tree base_hint = NULL_TREE;
7260 tree ref, iv;
7261 bool ok;
7263 adjust_iv_update_pos (cand, use);
7264 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7265 gcc_assert (ok);
7266 unshare_aff_combination (&aff);
7268 /* To avoid undefined overflow problems, all IV candidates use unsigned
7269 integer types. The drawback is that this makes it impossible for
7270 create_mem_ref to distinguish an IV that is based on a memory object
7271 from one that represents simply an offset.
7273 To work around this problem, we pass a hint to create_mem_ref that
7274 indicates which variable (if any) in aff is an IV based on a memory
7275 object. Note that we only consider the candidate. If this is not
7276 based on an object, the base of the reference is in some subexpression
7277 of the use -- but these will use pointer types, so they are recognized
7278 by the create_mem_ref heuristics anyway. */
7279 if (cand->iv->base_object)
7280 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7282 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7283 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7284 reference_alias_ptr_type (*use->op_p),
7285 iv, base_hint, data->speed);
7286 copy_ref_info (ref, *use->op_p);
7287 *use->op_p = ref;
7290 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7291 first use of a group, rewrites sub uses in the group too. */
7293 static void
7294 rewrite_use_address (struct ivopts_data *data,
7295 struct iv_use *use, struct iv_cand *cand)
7297 struct iv_use *next;
7299 gcc_assert (use->sub_id == 0);
7300 rewrite_use_address_1 (data, use, cand);
7301 update_stmt (use->stmt);
7303 for (next = use->next; next != NULL; next = next->next)
7305 rewrite_use_address_1 (data, next, cand);
7306 update_stmt (next->stmt);
7309 return;
7312 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7313 candidate CAND. */
7315 static void
7316 rewrite_use_compare (struct ivopts_data *data,
7317 struct iv_use *use, struct iv_cand *cand)
7319 tree comp, *var_p, op, bound;
7320 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7321 enum tree_code compare;
7322 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7323 bool ok;
7325 bound = cp->value;
7326 if (bound)
7328 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7329 tree var_type = TREE_TYPE (var);
7330 gimple_seq stmts;
7332 if (dump_file && (dump_flags & TDF_DETAILS))
7334 fprintf (dump_file, "Replacing exit test: ");
7335 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7337 compare = cp->comp;
7338 bound = unshare_expr (fold_convert (var_type, bound));
7339 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7340 if (stmts)
7341 gsi_insert_seq_on_edge_immediate (
7342 loop_preheader_edge (data->current_loop),
7343 stmts);
7345 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7346 gimple_cond_set_lhs (cond_stmt, var);
7347 gimple_cond_set_code (cond_stmt, compare);
7348 gimple_cond_set_rhs (cond_stmt, op);
7349 return;
7352 /* The induction variable elimination failed; just express the original
7353 giv. */
7354 comp = get_computation (data->current_loop, use, cand);
7355 gcc_assert (comp != NULL_TREE);
7357 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7358 gcc_assert (ok);
7360 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7361 true, GSI_SAME_STMT);
7364 /* Rewrites USE using candidate CAND. */
7366 static void
7367 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7369 switch (use->type)
7371 case USE_NONLINEAR_EXPR:
7372 rewrite_use_nonlinear_expr (data, use, cand);
7373 break;
7375 case USE_ADDRESS:
7376 rewrite_use_address (data, use, cand);
7377 break;
7379 case USE_COMPARE:
7380 rewrite_use_compare (data, use, cand);
7381 break;
7383 default:
7384 gcc_unreachable ();
7387 update_stmt (use->stmt);
7390 /* Rewrite the uses using the selected induction variables. */
7392 static void
7393 rewrite_uses (struct ivopts_data *data)
7395 unsigned i;
7396 struct iv_cand *cand;
7397 struct iv_use *use;
7399 for (i = 0; i < n_iv_uses (data); i++)
7401 use = iv_use (data, i);
7402 cand = use->selected;
7403 gcc_assert (cand);
7405 rewrite_use (data, use, cand);
7409 /* Removes the ivs that are not used after rewriting. */
7411 static void
7412 remove_unused_ivs (struct ivopts_data *data)
7414 unsigned j;
7415 bitmap_iterator bi;
7416 bitmap toremove = BITMAP_ALLOC (NULL);
7418 /* Figure out an order in which to release SSA DEFs so that we don't
7419 release something that we'd have to propagate into a debug stmt
7420 afterwards. */
7421 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7423 struct version_info *info;
7425 info = ver_info (data, j);
7426 if (info->iv
7427 && !integer_zerop (info->iv->step)
7428 && !info->inv_id
7429 && !info->iv->have_use_for
7430 && !info->preserve_biv)
7432 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7434 tree def = info->iv->ssa_name;
7436 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7438 imm_use_iterator imm_iter;
7439 use_operand_p use_p;
7440 gimple *stmt;
7441 int count = 0;
7443 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7445 if (!gimple_debug_bind_p (stmt))
7446 continue;
7448 /* We just want to determine whether to do nothing
7449 (count == 0), to substitute the computed
7450 expression into a single use of the SSA DEF by
7451 itself (count == 1), or to use a debug temp
7452 because the SSA DEF is used multiple times or as
7453 part of a larger expression (count > 1). */
7454 count++;
7455 if (gimple_debug_bind_get_value (stmt) != def)
7456 count++;
7458 if (count > 1)
7459 BREAK_FROM_IMM_USE_STMT (imm_iter);
7462 if (!count)
7463 continue;
7465 struct iv_use dummy_use;
7466 struct iv_cand *best_cand = NULL, *cand;
7467 unsigned i, best_pref = 0, cand_pref;
7469 memset (&dummy_use, 0, sizeof (dummy_use));
7470 dummy_use.iv = info->iv;
7471 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7473 cand = iv_use (data, i)->selected;
7474 if (cand == best_cand)
7475 continue;
7476 cand_pref = operand_equal_p (cand->iv->step,
7477 info->iv->step, 0)
7478 ? 4 : 0;
7479 cand_pref
7480 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7481 == TYPE_MODE (TREE_TYPE (info->iv->base))
7482 ? 2 : 0;
7483 cand_pref
7484 += TREE_CODE (cand->iv->base) == INTEGER_CST
7485 ? 1 : 0;
7486 if (best_cand == NULL || best_pref < cand_pref)
7488 best_cand = cand;
7489 best_pref = cand_pref;
7493 if (!best_cand)
7494 continue;
7496 tree comp = get_computation_at (data->current_loop,
7497 &dummy_use, best_cand,
7498 SSA_NAME_DEF_STMT (def));
7499 if (!comp)
7500 continue;
7502 if (count > 1)
7504 tree vexpr = make_node (DEBUG_EXPR_DECL);
7505 DECL_ARTIFICIAL (vexpr) = 1;
7506 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7507 if (SSA_NAME_VAR (def))
7508 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7509 else
7510 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7511 gdebug *def_temp
7512 = gimple_build_debug_bind (vexpr, comp, NULL);
7513 gimple_stmt_iterator gsi;
7515 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7516 gsi = gsi_after_labels (gimple_bb
7517 (SSA_NAME_DEF_STMT (def)));
7518 else
7519 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7521 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7522 comp = vexpr;
7525 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7527 if (!gimple_debug_bind_p (stmt))
7528 continue;
7530 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7531 SET_USE (use_p, comp);
7533 update_stmt (stmt);
7539 release_defs_bitset (toremove);
7541 BITMAP_FREE (toremove);
7544 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7545 for hash_map::traverse. */
7547 bool
7548 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7550 free (value);
7551 return true;
7554 /* Frees data allocated by the optimization of a single loop. */
7556 static void
7557 free_loop_data (struct ivopts_data *data)
7559 unsigned i, j;
7560 bitmap_iterator bi;
7561 tree obj;
7563 if (data->niters)
7565 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7566 delete data->niters;
7567 data->niters = NULL;
7570 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7572 struct version_info *info;
7574 info = ver_info (data, i);
7575 info->iv = NULL;
7576 info->has_nonlin_use = false;
7577 info->preserve_biv = false;
7578 info->inv_id = 0;
7580 bitmap_clear (data->relevant);
7581 bitmap_clear (data->important_candidates);
7583 for (i = 0; i < n_iv_uses (data); i++)
7585 struct iv_use *use = iv_use (data, i);
7586 struct iv_use *pre = use, *sub = use->next;
7588 while (sub)
7590 gcc_assert (sub->related_cands == NULL);
7591 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7593 pre = sub;
7594 sub = sub->next;
7595 free (pre);
7598 BITMAP_FREE (use->related_cands);
7599 for (j = 0; j < use->n_map_members; j++)
7600 if (use->cost_map[j].depends_on)
7601 BITMAP_FREE (use->cost_map[j].depends_on);
7602 free (use->cost_map);
7603 free (use);
7605 data->iv_uses.truncate (0);
7607 for (i = 0; i < n_iv_cands (data); i++)
7609 struct iv_cand *cand = iv_cand (data, i);
7611 if (cand->depends_on)
7612 BITMAP_FREE (cand->depends_on);
7613 free (cand);
7615 data->iv_candidates.truncate (0);
7617 if (data->version_info_size < num_ssa_names)
7619 data->version_info_size = 2 * num_ssa_names;
7620 free (data->version_info);
7621 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7624 data->max_inv_id = 0;
7626 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7627 SET_DECL_RTL (obj, NULL_RTX);
7629 decl_rtl_to_reset.truncate (0);
7631 data->inv_expr_tab->empty ();
7632 data->inv_expr_id = 0;
7634 data->iv_common_cand_tab->empty ();
7635 data->iv_common_cands.truncate (0);
7638 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7639 loop tree. */
7641 static void
7642 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7644 free_loop_data (data);
7645 free (data->version_info);
7646 BITMAP_FREE (data->relevant);
7647 BITMAP_FREE (data->important_candidates);
7649 decl_rtl_to_reset.release ();
7650 data->iv_uses.release ();
7651 data->iv_candidates.release ();
7652 delete data->inv_expr_tab;
7653 data->inv_expr_tab = NULL;
7654 free_affine_expand_cache (&data->name_expansion_cache);
7655 delete data->iv_common_cand_tab;
7656 data->iv_common_cand_tab = NULL;
7657 data->iv_common_cands.release ();
7658 obstack_free (&data->iv_obstack, NULL);
7661 /* Returns true if the loop body BODY includes any function calls. */
7663 static bool
7664 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7666 gimple_stmt_iterator gsi;
7667 unsigned i;
7669 for (i = 0; i < num_nodes; i++)
7670 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7672 gimple *stmt = gsi_stmt (gsi);
7673 if (is_gimple_call (stmt)
7674 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7675 return true;
7677 return false;
7680 /* Optimizes the LOOP. Returns true if anything changed. */
7682 static bool
7683 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7685 bool changed = false;
7686 struct iv_ca *iv_ca;
7687 edge exit = single_dom_exit (loop);
7688 basic_block *body;
7690 gcc_assert (!data->niters);
7691 data->current_loop = loop;
7692 data->loop_loc = find_loop_location (loop);
7693 data->speed = optimize_loop_for_speed_p (loop);
7695 if (dump_file && (dump_flags & TDF_DETAILS))
7697 fprintf (dump_file, "Processing loop %d", loop->num);
7698 if (data->loop_loc != UNKNOWN_LOCATION)
7699 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7700 LOCATION_LINE (data->loop_loc));
7701 fprintf (dump_file, "\n");
7703 if (exit)
7705 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7706 exit->src->index, exit->dest->index);
7707 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7708 fprintf (dump_file, "\n");
7711 fprintf (dump_file, "\n");
7714 body = get_loop_body (loop);
7715 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7716 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7717 free (body);
7719 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7721 /* For each ssa name determines whether it behaves as an induction variable
7722 in some loop. */
7723 if (!find_induction_variables (data))
7724 goto finish;
7726 /* Finds interesting uses (item 1). */
7727 find_interesting_uses (data);
7728 group_address_uses (data);
7729 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7730 goto finish;
7732 /* Finds candidates for the induction variables (item 2). */
7733 find_iv_candidates (data);
7735 /* Calculates the costs (item 3, part 1). */
7736 determine_iv_costs (data);
7737 determine_use_iv_costs (data);
7738 determine_set_costs (data);
7740 /* Find the optimal set of induction variables (item 3, part 2). */
7741 iv_ca = find_optimal_iv_set (data);
7742 if (!iv_ca)
7743 goto finish;
7744 changed = true;
7746 /* Create the new induction variables (item 4, part 1). */
7747 create_new_ivs (data, iv_ca);
7748 iv_ca_free (&iv_ca);
7750 /* Rewrite the uses (item 4, part 2). */
7751 rewrite_uses (data);
7753 /* Remove the ivs that are unused after rewriting. */
7754 remove_unused_ivs (data);
7756 /* We have changed the structure of induction variables; it might happen
7757 that definitions in the scev database refer to some of them that were
7758 eliminated. */
7759 scev_reset ();
7761 finish:
7762 free_loop_data (data);
7764 return changed;
7767 /* Main entry point. Optimizes induction variables in loops. */
7769 void
7770 tree_ssa_iv_optimize (void)
7772 struct loop *loop;
7773 struct ivopts_data data;
7775 tree_ssa_iv_optimize_init (&data);
7777 /* Optimize the loops starting with the innermost ones. */
7778 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7780 if (dump_file && (dump_flags & TDF_DETAILS))
7781 flow_loop_dump (loop, dump_file, NULL, 1);
7783 tree_ssa_iv_optimize_loop (&data, loop);
7786 tree_ssa_iv_optimize_finalize (&data);