2015-06-25 Zhouyi Zhou <yizhouzhou@ict.ac.cn>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobcab5acfc8d5b106ffdf5bc244995ee8fe2773be2
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "alias.h"
69 #include "symtab.h"
70 #include "tree.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
73 #include "tm_p.h"
74 #include "predict.h"
75 #include "hard-reg-set.h"
76 #include "function.h"
77 #include "dominance.h"
78 #include "cfg.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
90 #include "plugin-api.h"
91 #include "ipa-ref.h"
92 #include "cgraph.h"
93 #include "tree-cfg.h"
94 #include "tree-phinodes.h"
95 #include "ssa-iterators.h"
96 #include "stringpool.h"
97 #include "tree-ssanames.h"
98 #include "tree-ssa-loop-ivopts.h"
99 #include "tree-ssa-loop-manip.h"
100 #include "tree-ssa-loop-niter.h"
101 #include "tree-ssa-loop.h"
102 #include "rtl.h"
103 #include "flags.h"
104 #include "insn-config.h"
105 #include "expmed.h"
106 #include "dojump.h"
107 #include "explow.h"
108 #include "calls.h"
109 #include "emit-rtl.h"
110 #include "varasm.h"
111 #include "stmt.h"
112 #include "expr.h"
113 #include "tree-dfa.h"
114 #include "tree-ssa.h"
115 #include "cfgloop.h"
116 #include "tree-pass.h"
117 #include "tree-chrec.h"
118 #include "tree-scalar-evolution.h"
119 #include "params.h"
120 #include "langhooks.h"
121 #include "tree-affine.h"
122 #include "target.h"
123 #include "tree-inline.h"
124 #include "tree-ssa-propagate.h"
125 #include "tree-ssa-address.h"
126 #include "builtins.h"
127 #include "tree-vectorizer.h"
129 /* FIXME: Expressions are expanded to RTL in this pass to determine the
130 cost of different addressing modes. This should be moved to a TBD
131 interface between the GIMPLE and RTL worlds. */
132 #include "recog.h"
134 /* The infinite cost. */
135 #define INFTY 10000000
137 #define AVG_LOOP_NITER(LOOP) 5
139 /* Returns the expected number of loop iterations for LOOP.
140 The average trip count is computed from profile data if it
141 exists. */
143 static inline HOST_WIDE_INT
144 avg_loop_niter (struct loop *loop)
146 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
147 if (niter == -1)
148 return AVG_LOOP_NITER (loop);
150 return niter;
153 /* Representation of the induction variable. */
154 struct iv
156 tree base; /* Initial value of the iv. */
157 tree base_object; /* A memory object to that the induction variable points. */
158 tree step; /* Step of the iv (constant only). */
159 tree ssa_name; /* The ssa name with the value. */
160 unsigned use_id; /* The identifier in the use if it is the case. */
161 bool biv_p; /* Is it a biv? */
162 bool have_use_for; /* Do we already have a use for it? */
163 bool no_overflow; /* True if the iv doesn't overflow. */
166 /* Per-ssa version information (induction variable descriptions, etc.). */
167 struct version_info
169 tree name; /* The ssa name. */
170 struct iv *iv; /* Induction variable description. */
171 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
172 an expression that is not an induction variable. */
173 bool preserve_biv; /* For the original biv, whether to preserve it. */
174 unsigned inv_id; /* Id of an invariant. */
177 /* Types of uses. */
178 enum use_type
180 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
181 USE_ADDRESS, /* Use in an address. */
182 USE_COMPARE /* Use is a compare. */
185 /* Cost of a computation. */
186 typedef struct
188 int cost; /* The runtime cost. */
189 unsigned complexity; /* The estimate of the complexity of the code for
190 the computation (in no concrete units --
191 complexity field should be larger for more
192 complex expressions and addressing modes). */
193 } comp_cost;
195 static const comp_cost no_cost = {0, 0};
196 static const comp_cost infinite_cost = {INFTY, INFTY};
198 /* The candidate - cost pair. */
199 struct cost_pair
201 struct iv_cand *cand; /* The candidate. */
202 comp_cost cost; /* The cost. */
203 bitmap depends_on; /* The list of invariants that have to be
204 preserved. */
205 tree value; /* For final value elimination, the expression for
206 the final value of the iv. For iv elimination,
207 the new bound to compare with. */
208 enum tree_code comp; /* For iv elimination, the comparison. */
209 int inv_expr_id; /* Loop invariant expression id. */
212 /* Use. */
213 struct iv_use
215 unsigned id; /* The id of the use. */
216 unsigned sub_id; /* The id of the sub use. */
217 enum use_type type; /* Type of the use. */
218 struct iv *iv; /* The induction variable it is based on. */
219 gimple stmt; /* Statement in that it occurs. */
220 tree *op_p; /* The place where it occurs. */
221 bitmap related_cands; /* The set of "related" iv candidates, plus the common
222 important ones. */
224 unsigned n_map_members; /* Number of candidates in the cost_map list. */
225 struct cost_pair *cost_map;
226 /* The costs wrto the iv candidates. */
228 struct iv_cand *selected;
229 /* The selected candidate. */
231 struct iv_use *next; /* The next sub use. */
232 tree addr_base; /* Base address with const offset stripped. */
233 unsigned HOST_WIDE_INT addr_offset;
234 /* Const offset stripped from base address. */
237 /* The position where the iv is computed. */
238 enum iv_position
240 IP_NORMAL, /* At the end, just before the exit condition. */
241 IP_END, /* At the end of the latch block. */
242 IP_BEFORE_USE, /* Immediately before a specific use. */
243 IP_AFTER_USE, /* Immediately after a specific use. */
244 IP_ORIGINAL /* The original biv. */
247 /* The induction variable candidate. */
248 struct iv_cand
250 unsigned id; /* The number of the candidate. */
251 bool important; /* Whether this is an "important" candidate, i.e. such
252 that it should be considered by all uses. */
253 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
254 gimple incremented_at;/* For original biv, the statement where it is
255 incremented. */
256 tree var_before; /* The variable used for it before increment. */
257 tree var_after; /* The variable used for it after increment. */
258 struct iv *iv; /* The value of the candidate. NULL for
259 "pseudocandidate" used to indicate the possibility
260 to replace the final value of an iv by direct
261 computation of the value. */
262 unsigned cost; /* Cost of the candidate. */
263 unsigned cost_step; /* Cost of the candidate's increment operation. */
264 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
265 where it is incremented. */
266 bitmap depends_on; /* The list of invariants that are used in step of the
267 biv. */
270 /* Loop invariant expression hashtable entry. */
271 struct iv_inv_expr_ent
273 tree expr;
274 int id;
275 hashval_t hash;
278 /* The data used by the induction variable optimizations. */
280 typedef struct iv_use *iv_use_p;
282 typedef struct iv_cand *iv_cand_p;
284 /* Hashtable helpers. */
286 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
288 typedef iv_inv_expr_ent *value_type;
289 typedef iv_inv_expr_ent *compare_type;
290 static inline hashval_t hash (const iv_inv_expr_ent *);
291 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
294 /* Hash function for loop invariant expressions. */
296 inline hashval_t
297 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
299 return expr->hash;
302 /* Hash table equality function for expressions. */
304 inline bool
305 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
306 const iv_inv_expr_ent *expr2)
308 return expr1->hash == expr2->hash
309 && operand_equal_p (expr1->expr, expr2->expr, 0);
312 struct ivopts_data
314 /* The currently optimized loop. */
315 struct loop *current_loop;
316 source_location loop_loc;
318 /* Numbers of iterations for all exits of the current loop. */
319 hash_map<edge, tree_niter_desc *> *niters;
321 /* Number of registers used in it. */
322 unsigned regs_used;
324 /* The size of version_info array allocated. */
325 unsigned version_info_size;
327 /* The array of information for the ssa names. */
328 struct version_info *version_info;
330 /* The hashtable of loop invariant expressions created
331 by ivopt. */
332 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
334 /* Loop invariant expression id. */
335 int inv_expr_id;
337 /* The bitmap of indices in version_info whose value was changed. */
338 bitmap relevant;
340 /* The uses of induction variables. */
341 vec<iv_use_p> iv_uses;
343 /* The candidates. */
344 vec<iv_cand_p> iv_candidates;
346 /* A bitmap of important candidates. */
347 bitmap important_candidates;
349 /* Cache used by tree_to_aff_combination_expand. */
350 hash_map<tree, name_expansion *> *name_expansion_cache;
352 /* The maximum invariant id. */
353 unsigned max_inv_id;
355 /* Whether to consider just related and important candidates when replacing a
356 use. */
357 bool consider_all_candidates;
359 /* Are we optimizing for speed? */
360 bool speed;
362 /* Whether the loop body includes any function calls. */
363 bool body_includes_call;
365 /* Whether the loop body can only be exited via single exit. */
366 bool loop_single_exit_p;
369 /* An assignment of iv candidates to uses. */
371 struct iv_ca
373 /* The number of uses covered by the assignment. */
374 unsigned upto;
376 /* Number of uses that cannot be expressed by the candidates in the set. */
377 unsigned bad_uses;
379 /* Candidate assigned to a use, together with the related costs. */
380 struct cost_pair **cand_for_use;
382 /* Number of times each candidate is used. */
383 unsigned *n_cand_uses;
385 /* The candidates used. */
386 bitmap cands;
388 /* The number of candidates in the set. */
389 unsigned n_cands;
391 /* Total number of registers needed. */
392 unsigned n_regs;
394 /* Total cost of expressing uses. */
395 comp_cost cand_use_cost;
397 /* Total cost of candidates. */
398 unsigned cand_cost;
400 /* Number of times each invariant is used. */
401 unsigned *n_invariant_uses;
403 /* The array holding the number of uses of each loop
404 invariant expressions created by ivopt. */
405 unsigned *used_inv_expr;
407 /* The number of created loop invariants. */
408 unsigned num_used_inv_expr;
410 /* Total cost of the assignment. */
411 comp_cost cost;
414 /* Difference of two iv candidate assignments. */
416 struct iv_ca_delta
418 /* Changed use. */
419 struct iv_use *use;
421 /* An old assignment (for rollback purposes). */
422 struct cost_pair *old_cp;
424 /* A new assignment. */
425 struct cost_pair *new_cp;
427 /* Next change in the list. */
428 struct iv_ca_delta *next_change;
431 /* Bound on number of candidates below that all candidates are considered. */
433 #define CONSIDER_ALL_CANDIDATES_BOUND \
434 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
436 /* If there are more iv occurrences, we just give up (it is quite unlikely that
437 optimizing such a loop would help, and it would take ages). */
439 #define MAX_CONSIDERED_USES \
440 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
442 /* If there are at most this number of ivs in the set, try removing unnecessary
443 ivs from the set always. */
445 #define ALWAYS_PRUNE_CAND_SET_BOUND \
446 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
448 /* The list of trees for that the decl_rtl field must be reset is stored
449 here. */
451 static vec<tree> decl_rtl_to_reset;
453 static comp_cost force_expr_to_var_cost (tree, bool);
455 /* Number of uses recorded in DATA. */
457 static inline unsigned
458 n_iv_uses (struct ivopts_data *data)
460 return data->iv_uses.length ();
463 /* Ith use recorded in DATA. */
465 static inline struct iv_use *
466 iv_use (struct ivopts_data *data, unsigned i)
468 return data->iv_uses[i];
471 /* Number of candidates recorded in DATA. */
473 static inline unsigned
474 n_iv_cands (struct ivopts_data *data)
476 return data->iv_candidates.length ();
479 /* Ith candidate recorded in DATA. */
481 static inline struct iv_cand *
482 iv_cand (struct ivopts_data *data, unsigned i)
484 return data->iv_candidates[i];
487 /* The single loop exit if it dominates the latch, NULL otherwise. */
489 edge
490 single_dom_exit (struct loop *loop)
492 edge exit = single_exit (loop);
494 if (!exit)
495 return NULL;
497 if (!just_once_each_iteration_p (loop, exit->src))
498 return NULL;
500 return exit;
503 /* Dumps information about the induction variable IV to FILE. */
505 void
506 dump_iv (FILE *file, struct iv *iv, bool dump_name)
508 if (iv->ssa_name && dump_name)
510 fprintf (file, "ssa name ");
511 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
512 fprintf (file, "\n");
515 fprintf (file, " type ");
516 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
517 fprintf (file, "\n");
519 if (iv->step)
521 fprintf (file, " base ");
522 print_generic_expr (file, iv->base, TDF_SLIM);
523 fprintf (file, "\n");
525 fprintf (file, " step ");
526 print_generic_expr (file, iv->step, TDF_SLIM);
527 fprintf (file, "\n");
529 else
531 fprintf (file, " invariant ");
532 print_generic_expr (file, iv->base, TDF_SLIM);
533 fprintf (file, "\n");
536 if (iv->base_object)
538 fprintf (file, " base object ");
539 print_generic_expr (file, iv->base_object, TDF_SLIM);
540 fprintf (file, "\n");
543 if (iv->biv_p)
544 fprintf (file, " is a biv\n");
547 /* Dumps information about the USE to FILE. */
549 void
550 dump_use (FILE *file, struct iv_use *use)
552 fprintf (file, "use %d", use->id);
553 if (use->sub_id)
554 fprintf (file, ".%d", use->sub_id);
556 fprintf (file, "\n");
558 switch (use->type)
560 case USE_NONLINEAR_EXPR:
561 fprintf (file, " generic\n");
562 break;
564 case USE_ADDRESS:
565 fprintf (file, " address\n");
566 break;
568 case USE_COMPARE:
569 fprintf (file, " compare\n");
570 break;
572 default:
573 gcc_unreachable ();
576 fprintf (file, " in statement ");
577 print_gimple_stmt (file, use->stmt, 0, 0);
578 fprintf (file, "\n");
580 fprintf (file, " at position ");
581 if (use->op_p)
582 print_generic_expr (file, *use->op_p, TDF_SLIM);
583 fprintf (file, "\n");
585 dump_iv (file, use->iv, false);
587 if (use->related_cands)
589 fprintf (file, " related candidates ");
590 dump_bitmap (file, use->related_cands);
594 /* Dumps information about the uses to FILE. */
596 void
597 dump_uses (FILE *file, struct ivopts_data *data)
599 unsigned i;
600 struct iv_use *use;
602 for (i = 0; i < n_iv_uses (data); i++)
604 use = iv_use (data, i);
607 dump_use (file, use);
608 use = use->next;
610 while (use);
611 fprintf (file, "\n");
615 /* Dumps information about induction variable candidate CAND to FILE. */
617 void
618 dump_cand (FILE *file, struct iv_cand *cand)
620 struct iv *iv = cand->iv;
622 fprintf (file, "candidate %d%s\n",
623 cand->id, cand->important ? " (important)" : "");
625 if (cand->depends_on)
627 fprintf (file, " depends on ");
628 dump_bitmap (file, cand->depends_on);
631 if (!iv)
633 fprintf (file, " final value replacement\n");
634 return;
637 if (cand->var_before)
639 fprintf (file, " var_before ");
640 print_generic_expr (file, cand->var_before, TDF_SLIM);
641 fprintf (file, "\n");
643 if (cand->var_after)
645 fprintf (file, " var_after ");
646 print_generic_expr (file, cand->var_after, TDF_SLIM);
647 fprintf (file, "\n");
650 switch (cand->pos)
652 case IP_NORMAL:
653 fprintf (file, " incremented before exit test\n");
654 break;
656 case IP_BEFORE_USE:
657 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
658 break;
660 case IP_AFTER_USE:
661 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
662 break;
664 case IP_END:
665 fprintf (file, " incremented at end\n");
666 break;
668 case IP_ORIGINAL:
669 fprintf (file, " original biv\n");
670 break;
673 dump_iv (file, iv, false);
676 /* Returns the info for ssa version VER. */
678 static inline struct version_info *
679 ver_info (struct ivopts_data *data, unsigned ver)
681 return data->version_info + ver;
684 /* Returns the info for ssa name NAME. */
686 static inline struct version_info *
687 name_info (struct ivopts_data *data, tree name)
689 return ver_info (data, SSA_NAME_VERSION (name));
692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
693 emitted in LOOP. */
695 static bool
696 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
698 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
700 gcc_assert (bb);
702 if (sbb == loop->latch)
703 return true;
705 if (sbb != bb)
706 return false;
708 return stmt == last_stmt (bb);
711 /* Returns true if STMT if after the place where the original induction
712 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
713 if the positions are identical. */
715 static bool
716 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
718 basic_block cand_bb = gimple_bb (cand->incremented_at);
719 basic_block stmt_bb = gimple_bb (stmt);
721 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
722 return false;
724 if (stmt_bb != cand_bb)
725 return true;
727 if (true_if_equal
728 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
729 return true;
730 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
733 /* Returns true if STMT if after the place where the induction variable
734 CAND is incremented in LOOP. */
736 static bool
737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
739 switch (cand->pos)
741 case IP_END:
742 return false;
744 case IP_NORMAL:
745 return stmt_after_ip_normal_pos (loop, stmt);
747 case IP_ORIGINAL:
748 case IP_AFTER_USE:
749 return stmt_after_inc_pos (cand, stmt, false);
751 case IP_BEFORE_USE:
752 return stmt_after_inc_pos (cand, stmt, true);
754 default:
755 gcc_unreachable ();
759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
761 static bool
762 abnormal_ssa_name_p (tree exp)
764 if (!exp)
765 return false;
767 if (TREE_CODE (exp) != SSA_NAME)
768 return false;
770 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
774 abnormal phi node. Callback for for_each_index. */
776 static bool
777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
778 void *data ATTRIBUTE_UNUSED)
780 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
783 return false;
784 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
785 return false;
788 return !abnormal_ssa_name_p (*index);
791 /* Returns true if EXPR contains a ssa name that occurs in an
792 abnormal phi node. */
794 bool
795 contains_abnormal_ssa_name_p (tree expr)
797 enum tree_code code;
798 enum tree_code_class codeclass;
800 if (!expr)
801 return false;
803 code = TREE_CODE (expr);
804 codeclass = TREE_CODE_CLASS (code);
806 if (code == SSA_NAME)
807 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
809 if (code == INTEGER_CST
810 || is_gimple_min_invariant (expr))
811 return false;
813 if (code == ADDR_EXPR)
814 return !for_each_index (&TREE_OPERAND (expr, 0),
815 idx_contains_abnormal_ssa_name_p,
816 NULL);
818 if (code == COND_EXPR)
819 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
821 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
823 switch (codeclass)
825 case tcc_binary:
826 case tcc_comparison:
827 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
828 return true;
830 /* Fallthru. */
831 case tcc_unary:
832 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
833 return true;
835 break;
837 default:
838 gcc_unreachable ();
841 return false;
844 /* Returns the structure describing number of iterations determined from
845 EXIT of DATA->current_loop, or NULL if something goes wrong. */
847 static struct tree_niter_desc *
848 niter_for_exit (struct ivopts_data *data, edge exit)
850 struct tree_niter_desc *desc;
851 tree_niter_desc **slot;
853 if (!data->niters)
855 data->niters = new hash_map<edge, tree_niter_desc *>;
856 slot = NULL;
858 else
859 slot = data->niters->get (exit);
861 if (!slot)
863 /* Try to determine number of iterations. We cannot safely work with ssa
864 names that appear in phi nodes on abnormal edges, so that we do not
865 create overlapping life ranges for them (PR 27283). */
866 desc = XNEW (struct tree_niter_desc);
867 if (!number_of_iterations_exit (data->current_loop,
868 exit, desc, true)
869 || contains_abnormal_ssa_name_p (desc->niter))
871 XDELETE (desc);
872 desc = NULL;
874 data->niters->put (exit, desc);
876 else
877 desc = *slot;
879 return desc;
882 /* Returns the structure describing number of iterations determined from
883 single dominating exit of DATA->current_loop, or NULL if something
884 goes wrong. */
886 static struct tree_niter_desc *
887 niter_for_single_dom_exit (struct ivopts_data *data)
889 edge exit = single_dom_exit (data->current_loop);
891 if (!exit)
892 return NULL;
894 return niter_for_exit (data, exit);
897 /* Initializes data structures used by the iv optimization pass, stored
898 in DATA. */
900 static void
901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
903 data->version_info_size = 2 * num_ssa_names;
904 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
905 data->relevant = BITMAP_ALLOC (NULL);
906 data->important_candidates = BITMAP_ALLOC (NULL);
907 data->max_inv_id = 0;
908 data->niters = NULL;
909 data->iv_uses.create (20);
910 data->iv_candidates.create (20);
911 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
912 data->inv_expr_id = 0;
913 data->name_expansion_cache = NULL;
914 decl_rtl_to_reset.create (20);
917 /* Returns a memory object to that EXPR points. In case we are able to
918 determine that it does not point to any such object, NULL is returned. */
920 static tree
921 determine_base_object (tree expr)
923 enum tree_code code = TREE_CODE (expr);
924 tree base, obj;
926 /* If this is a pointer casted to any type, we need to determine
927 the base object for the pointer; so handle conversions before
928 throwing away non-pointer expressions. */
929 if (CONVERT_EXPR_P (expr))
930 return determine_base_object (TREE_OPERAND (expr, 0));
932 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
933 return NULL_TREE;
935 switch (code)
937 case INTEGER_CST:
938 return NULL_TREE;
940 case ADDR_EXPR:
941 obj = TREE_OPERAND (expr, 0);
942 base = get_base_address (obj);
944 if (!base)
945 return expr;
947 if (TREE_CODE (base) == MEM_REF)
948 return determine_base_object (TREE_OPERAND (base, 0));
950 return fold_convert (ptr_type_node,
951 build_fold_addr_expr (base));
953 case POINTER_PLUS_EXPR:
954 return determine_base_object (TREE_OPERAND (expr, 0));
956 case PLUS_EXPR:
957 case MINUS_EXPR:
958 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
959 gcc_unreachable ();
961 default:
962 return fold_convert (ptr_type_node, expr);
966 /* Return true if address expression with non-DECL_P operand appears
967 in EXPR. */
969 static bool
970 contain_complex_addr_expr (tree expr)
972 bool res = false;
974 STRIP_NOPS (expr);
975 switch (TREE_CODE (expr))
977 case POINTER_PLUS_EXPR:
978 case PLUS_EXPR:
979 case MINUS_EXPR:
980 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
981 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
982 break;
984 case ADDR_EXPR:
985 return (!DECL_P (TREE_OPERAND (expr, 0)));
987 default:
988 return false;
991 return res;
994 /* Allocates an induction variable with given initial value BASE and step STEP
995 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
997 static struct iv *
998 alloc_iv (tree base, tree step, bool no_overflow = false)
1000 tree expr = base;
1001 struct iv *iv = XCNEW (struct iv);
1002 gcc_assert (step != NULL_TREE);
1004 /* Lower address expression in base except ones with DECL_P as operand.
1005 By doing this:
1006 1) More accurate cost can be computed for address expressions;
1007 2) Duplicate candidates won't be created for bases in different
1008 forms, like &a[0] and &a. */
1009 STRIP_NOPS (expr);
1010 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1011 || contain_complex_addr_expr (expr))
1013 aff_tree comb;
1014 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1015 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1018 iv->base = base;
1019 iv->base_object = determine_base_object (base);
1020 iv->step = step;
1021 iv->biv_p = false;
1022 iv->have_use_for = false;
1023 iv->use_id = 0;
1024 iv->ssa_name = NULL_TREE;
1025 iv->no_overflow = no_overflow;
1027 return iv;
1030 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1031 doesn't overflow. */
1033 static void
1034 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1035 bool no_overflow)
1037 struct version_info *info = name_info (data, iv);
1039 gcc_assert (!info->iv);
1041 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1042 info->iv = alloc_iv (base, step, no_overflow);
1043 info->iv->ssa_name = iv;
1046 /* Finds induction variable declaration for VAR. */
1048 static struct iv *
1049 get_iv (struct ivopts_data *data, tree var)
1051 basic_block bb;
1052 tree type = TREE_TYPE (var);
1054 if (!POINTER_TYPE_P (type)
1055 && !INTEGRAL_TYPE_P (type))
1056 return NULL;
1058 if (!name_info (data, var)->iv)
1060 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1062 if (!bb
1063 || !flow_bb_inside_loop_p (data->current_loop, bb))
1064 set_iv (data, var, var, build_int_cst (type, 0), true);
1067 return name_info (data, var)->iv;
1070 /* Return the first non-invariant ssa var found in EXPR. */
1072 static tree
1073 extract_single_var_from_expr (tree expr)
1075 int i, n;
1076 tree tmp;
1077 enum tree_code code;
1079 if (!expr || is_gimple_min_invariant (expr))
1080 return NULL;
1082 code = TREE_CODE (expr);
1083 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1085 n = TREE_OPERAND_LENGTH (expr);
1086 for (i = 0; i < n; i++)
1088 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1090 if (tmp)
1091 return tmp;
1094 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1097 /* Finds basic ivs. */
1099 static bool
1100 find_bivs (struct ivopts_data *data)
1102 gphi *phi;
1103 affine_iv iv;
1104 tree step, type, base, stop;
1105 bool found = false;
1106 struct loop *loop = data->current_loop;
1107 gphi_iterator psi;
1109 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1111 phi = psi.phi ();
1113 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1114 continue;
1116 if (virtual_operand_p (PHI_RESULT (phi)))
1117 continue;
1119 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1120 continue;
1122 if (integer_zerop (iv.step))
1123 continue;
1125 step = iv.step;
1126 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1127 /* Stop expanding iv base at the first ssa var referred by iv step.
1128 Ideally we should stop at any ssa var, because that's expensive
1129 and unusual to happen, we just do it on the first one.
1131 See PR64705 for the rationale. */
1132 stop = extract_single_var_from_expr (step);
1133 base = expand_simple_operations (base, stop);
1134 if (contains_abnormal_ssa_name_p (base)
1135 || contains_abnormal_ssa_name_p (step))
1136 continue;
1138 type = TREE_TYPE (PHI_RESULT (phi));
1139 base = fold_convert (type, base);
1140 if (step)
1142 if (POINTER_TYPE_P (type))
1143 step = convert_to_ptrofftype (step);
1144 else
1145 step = fold_convert (type, step);
1148 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1149 found = true;
1152 return found;
1155 /* Marks basic ivs. */
1157 static void
1158 mark_bivs (struct ivopts_data *data)
1160 gphi *phi;
1161 gimple def;
1162 tree var;
1163 struct iv *iv, *incr_iv;
1164 struct loop *loop = data->current_loop;
1165 basic_block incr_bb;
1166 gphi_iterator psi;
1168 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1170 phi = psi.phi ();
1172 iv = get_iv (data, PHI_RESULT (phi));
1173 if (!iv)
1174 continue;
1176 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1177 def = SSA_NAME_DEF_STMT (var);
1178 /* Don't mark iv peeled from other one as biv. */
1179 if (def
1180 && gimple_code (def) == GIMPLE_PHI
1181 && gimple_bb (def) == loop->header)
1182 continue;
1184 incr_iv = get_iv (data, var);
1185 if (!incr_iv)
1186 continue;
1188 /* If the increment is in the subloop, ignore it. */
1189 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1190 if (incr_bb->loop_father != data->current_loop
1191 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1192 continue;
1194 iv->biv_p = true;
1195 incr_iv->biv_p = true;
1199 /* Checks whether STMT defines a linear induction variable and stores its
1200 parameters to IV. */
1202 static bool
1203 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1205 tree lhs, stop;
1206 struct loop *loop = data->current_loop;
1208 iv->base = NULL_TREE;
1209 iv->step = NULL_TREE;
1211 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1212 return false;
1214 lhs = gimple_assign_lhs (stmt);
1215 if (TREE_CODE (lhs) != SSA_NAME)
1216 return false;
1218 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1219 return false;
1221 /* Stop expanding iv base at the first ssa var referred by iv step.
1222 Ideally we should stop at any ssa var, because that's expensive
1223 and unusual to happen, we just do it on the first one.
1225 See PR64705 for the rationale. */
1226 stop = extract_single_var_from_expr (iv->step);
1227 iv->base = expand_simple_operations (iv->base, stop);
1228 if (contains_abnormal_ssa_name_p (iv->base)
1229 || contains_abnormal_ssa_name_p (iv->step))
1230 return false;
1232 /* If STMT could throw, then do not consider STMT as defining a GIV.
1233 While this will suppress optimizations, we can not safely delete this
1234 GIV and associated statements, even if it appears it is not used. */
1235 if (stmt_could_throw_p (stmt))
1236 return false;
1238 return true;
1241 /* Finds general ivs in statement STMT. */
1243 static void
1244 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1246 affine_iv iv;
1248 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1249 return;
1251 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 /* Finds general ivs in basic block BB. */
1256 static void
1257 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1259 gimple_stmt_iterator bsi;
1261 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1262 find_givs_in_stmt (data, gsi_stmt (bsi));
1265 /* Finds general ivs. */
1267 static void
1268 find_givs (struct ivopts_data *data)
1270 struct loop *loop = data->current_loop;
1271 basic_block *body = get_loop_body_in_dom_order (loop);
1272 unsigned i;
1274 for (i = 0; i < loop->num_nodes; i++)
1275 find_givs_in_bb (data, body[i]);
1276 free (body);
1279 /* For each ssa name defined in LOOP determines whether it is an induction
1280 variable and if so, its initial value and step. */
1282 static bool
1283 find_induction_variables (struct ivopts_data *data)
1285 unsigned i;
1286 bitmap_iterator bi;
1288 if (!find_bivs (data))
1289 return false;
1291 find_givs (data);
1292 mark_bivs (data);
1294 if (dump_file && (dump_flags & TDF_DETAILS))
1296 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1298 if (niter)
1300 fprintf (dump_file, " number of iterations ");
1301 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1302 if (!integer_zerop (niter->may_be_zero))
1304 fprintf (dump_file, "; zero if ");
1305 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1307 fprintf (dump_file, "\n\n");
1310 fprintf (dump_file, "Induction variables:\n\n");
1312 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1314 if (ver_info (data, i)->iv)
1315 dump_iv (dump_file, ver_info (data, i)->iv, true);
1319 return true;
1322 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1323 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1324 is the const offset stripped from IV base. For uses of other types,
1325 ADDR_BASE and ADDR_OFFSET are zero by default. */
1327 static struct iv_use *
1328 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1329 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1330 unsigned HOST_WIDE_INT addr_offset = 0)
1332 struct iv_use *use = XCNEW (struct iv_use);
1334 use->id = n_iv_uses (data);
1335 use->sub_id = 0;
1336 use->type = use_type;
1337 use->iv = iv;
1338 use->stmt = stmt;
1339 use->op_p = use_p;
1340 use->related_cands = BITMAP_ALLOC (NULL);
1341 use->next = NULL;
1342 use->addr_base = addr_base;
1343 use->addr_offset = addr_offset;
1345 data->iv_uses.safe_push (use);
1347 return use;
1350 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1351 The sub use is recorded under the one whose use id is ID_GROUP. */
1353 static struct iv_use *
1354 record_sub_use (struct ivopts_data *data, tree *use_p,
1355 struct iv *iv, gimple stmt, enum use_type use_type,
1356 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1357 unsigned int id_group)
1359 struct iv_use *use = XCNEW (struct iv_use);
1360 struct iv_use *group = iv_use (data, id_group);
1362 use->id = group->id;
1363 use->sub_id = 0;
1364 use->type = use_type;
1365 use->iv = iv;
1366 use->stmt = stmt;
1367 use->op_p = use_p;
1368 use->related_cands = NULL;
1369 use->addr_base = addr_base;
1370 use->addr_offset = addr_offset;
1372 /* Sub use list is maintained in offset ascending order. */
1373 if (addr_offset <= group->addr_offset)
1375 use->related_cands = group->related_cands;
1376 group->related_cands = NULL;
1377 use->next = group;
1378 data->iv_uses[id_group] = use;
1380 else
1382 struct iv_use *pre;
1385 pre = group;
1386 group = group->next;
1388 while (group && addr_offset > group->addr_offset);
1389 use->next = pre->next;
1390 pre->next = use;
1393 /* To avoid showing ssa name in the dumps, if it was not reset by the
1394 caller. */
1395 iv->ssa_name = NULL_TREE;
1397 return use;
1400 /* Checks whether OP is a loop-level invariant and if so, records it.
1401 NONLINEAR_USE is true if the invariant is used in a way we do not
1402 handle specially. */
1404 static void
1405 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1407 basic_block bb;
1408 struct version_info *info;
1410 if (TREE_CODE (op) != SSA_NAME
1411 || virtual_operand_p (op))
1412 return;
1414 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1415 if (bb
1416 && flow_bb_inside_loop_p (data->current_loop, bb))
1417 return;
1419 info = name_info (data, op);
1420 info->name = op;
1421 info->has_nonlin_use |= nonlinear_use;
1422 if (!info->inv_id)
1423 info->inv_id = ++data->max_inv_id;
1424 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1427 /* Checks whether the use OP is interesting and if so, records it. */
1429 static struct iv_use *
1430 find_interesting_uses_op (struct ivopts_data *data, tree op)
1432 struct iv *iv;
1433 struct iv *civ;
1434 gimple stmt;
1435 struct iv_use *use;
1437 if (TREE_CODE (op) != SSA_NAME)
1438 return NULL;
1440 iv = get_iv (data, op);
1441 if (!iv)
1442 return NULL;
1444 if (iv->have_use_for)
1446 use = iv_use (data, iv->use_id);
1448 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1449 return use;
1452 if (integer_zerop (iv->step))
1454 record_invariant (data, op, true);
1455 return NULL;
1457 iv->have_use_for = true;
1459 civ = XNEW (struct iv);
1460 *civ = *iv;
1462 stmt = SSA_NAME_DEF_STMT (op);
1463 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1464 || is_gimple_assign (stmt));
1466 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1467 iv->use_id = use->id;
1469 return use;
1472 /* Given a condition in statement STMT, checks whether it is a compare
1473 of an induction variable and an invariant. If this is the case,
1474 CONTROL_VAR is set to location of the iv, BOUND to the location of
1475 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1476 induction variable descriptions, and true is returned. If this is not
1477 the case, CONTROL_VAR and BOUND are set to the arguments of the
1478 condition and false is returned. */
1480 static bool
1481 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1482 tree **control_var, tree **bound,
1483 struct iv **iv_var, struct iv **iv_bound)
1485 /* The objects returned when COND has constant operands. */
1486 static struct iv const_iv;
1487 static tree zero;
1488 tree *op0 = &zero, *op1 = &zero;
1489 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1490 bool ret = false;
1492 if (gimple_code (stmt) == GIMPLE_COND)
1494 gcond *cond_stmt = as_a <gcond *> (stmt);
1495 op0 = gimple_cond_lhs_ptr (cond_stmt);
1496 op1 = gimple_cond_rhs_ptr (cond_stmt);
1498 else
1500 op0 = gimple_assign_rhs1_ptr (stmt);
1501 op1 = gimple_assign_rhs2_ptr (stmt);
1504 zero = integer_zero_node;
1505 const_iv.step = integer_zero_node;
1507 if (TREE_CODE (*op0) == SSA_NAME)
1508 iv0 = get_iv (data, *op0);
1509 if (TREE_CODE (*op1) == SSA_NAME)
1510 iv1 = get_iv (data, *op1);
1512 /* Exactly one of the compared values must be an iv, and the other one must
1513 be an invariant. */
1514 if (!iv0 || !iv1)
1515 goto end;
1517 if (integer_zerop (iv0->step))
1519 /* Control variable may be on the other side. */
1520 std::swap (op0, op1);
1521 std::swap (iv0, iv1);
1523 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1525 end:
1526 if (control_var)
1527 *control_var = op0;
1528 if (iv_var)
1529 *iv_var = iv0;
1530 if (bound)
1531 *bound = op1;
1532 if (iv_bound)
1533 *iv_bound = iv1;
1535 return ret;
1538 /* Checks whether the condition in STMT is interesting and if so,
1539 records it. */
1541 static void
1542 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1544 tree *var_p, *bound_p;
1545 struct iv *var_iv, *civ;
1547 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1549 find_interesting_uses_op (data, *var_p);
1550 find_interesting_uses_op (data, *bound_p);
1551 return;
1554 civ = XNEW (struct iv);
1555 *civ = *var_iv;
1556 record_use (data, NULL, civ, stmt, USE_COMPARE);
1559 /* Returns the outermost loop EXPR is obviously invariant in
1560 relative to the loop LOOP, i.e. if all its operands are defined
1561 outside of the returned loop. Returns NULL if EXPR is not
1562 even obviously invariant in LOOP. */
1564 struct loop *
1565 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1567 basic_block def_bb;
1568 unsigned i, len;
1570 if (is_gimple_min_invariant (expr))
1571 return current_loops->tree_root;
1573 if (TREE_CODE (expr) == SSA_NAME)
1575 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1576 if (def_bb)
1578 if (flow_bb_inside_loop_p (loop, def_bb))
1579 return NULL;
1580 return superloop_at_depth (loop,
1581 loop_depth (def_bb->loop_father) + 1);
1584 return current_loops->tree_root;
1587 if (!EXPR_P (expr))
1588 return NULL;
1590 unsigned maxdepth = 0;
1591 len = TREE_OPERAND_LENGTH (expr);
1592 for (i = 0; i < len; i++)
1594 struct loop *ivloop;
1595 if (!TREE_OPERAND (expr, i))
1596 continue;
1598 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1599 if (!ivloop)
1600 return NULL;
1601 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1604 return superloop_at_depth (loop, maxdepth);
1607 /* Returns true if expression EXPR is obviously invariant in LOOP,
1608 i.e. if all its operands are defined outside of the LOOP. LOOP
1609 should not be the function body. */
1611 bool
1612 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1614 basic_block def_bb;
1615 unsigned i, len;
1617 gcc_assert (loop_depth (loop) > 0);
1619 if (is_gimple_min_invariant (expr))
1620 return true;
1622 if (TREE_CODE (expr) == SSA_NAME)
1624 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1625 if (def_bb
1626 && flow_bb_inside_loop_p (loop, def_bb))
1627 return false;
1629 return true;
1632 if (!EXPR_P (expr))
1633 return false;
1635 len = TREE_OPERAND_LENGTH (expr);
1636 for (i = 0; i < len; i++)
1637 if (TREE_OPERAND (expr, i)
1638 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1639 return false;
1641 return true;
1644 /* Cumulates the steps of indices into DATA and replaces their values with the
1645 initial ones. Returns false when the value of the index cannot be determined.
1646 Callback for for_each_index. */
1648 struct ifs_ivopts_data
1650 struct ivopts_data *ivopts_data;
1651 gimple stmt;
1652 tree step;
1655 static bool
1656 idx_find_step (tree base, tree *idx, void *data)
1658 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1659 struct iv *iv;
1660 bool use_overflow_semantics = false;
1661 tree step, iv_base, iv_step, lbound, off;
1662 struct loop *loop = dta->ivopts_data->current_loop;
1664 /* If base is a component ref, require that the offset of the reference
1665 be invariant. */
1666 if (TREE_CODE (base) == COMPONENT_REF)
1668 off = component_ref_field_offset (base);
1669 return expr_invariant_in_loop_p (loop, off);
1672 /* If base is array, first check whether we will be able to move the
1673 reference out of the loop (in order to take its address in strength
1674 reduction). In order for this to work we need both lower bound
1675 and step to be loop invariants. */
1676 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1678 /* Moreover, for a range, the size needs to be invariant as well. */
1679 if (TREE_CODE (base) == ARRAY_RANGE_REF
1680 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1681 return false;
1683 step = array_ref_element_size (base);
1684 lbound = array_ref_low_bound (base);
1686 if (!expr_invariant_in_loop_p (loop, step)
1687 || !expr_invariant_in_loop_p (loop, lbound))
1688 return false;
1691 if (TREE_CODE (*idx) != SSA_NAME)
1692 return true;
1694 iv = get_iv (dta->ivopts_data, *idx);
1695 if (!iv)
1696 return false;
1698 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1699 *&x[0], which is not folded and does not trigger the
1700 ARRAY_REF path below. */
1701 *idx = iv->base;
1703 if (integer_zerop (iv->step))
1704 return true;
1706 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1708 step = array_ref_element_size (base);
1710 /* We only handle addresses whose step is an integer constant. */
1711 if (TREE_CODE (step) != INTEGER_CST)
1712 return false;
1714 else
1715 /* The step for pointer arithmetics already is 1 byte. */
1716 step = size_one_node;
1718 iv_base = iv->base;
1719 iv_step = iv->step;
1720 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1721 use_overflow_semantics = true;
1723 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1724 sizetype, &iv_base, &iv_step, dta->stmt,
1725 use_overflow_semantics))
1727 /* The index might wrap. */
1728 return false;
1731 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1732 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1734 return true;
1737 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1738 object is passed to it in DATA. */
1740 static bool
1741 idx_record_use (tree base, tree *idx,
1742 void *vdata)
1744 struct ivopts_data *data = (struct ivopts_data *) vdata;
1745 find_interesting_uses_op (data, *idx);
1746 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1748 find_interesting_uses_op (data, array_ref_element_size (base));
1749 find_interesting_uses_op (data, array_ref_low_bound (base));
1751 return true;
1754 /* If we can prove that TOP = cst * BOT for some constant cst,
1755 store cst to MUL and return true. Otherwise return false.
1756 The returned value is always sign-extended, regardless of the
1757 signedness of TOP and BOT. */
1759 static bool
1760 constant_multiple_of (tree top, tree bot, widest_int *mul)
1762 tree mby;
1763 enum tree_code code;
1764 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1765 widest_int res, p0, p1;
1767 STRIP_NOPS (top);
1768 STRIP_NOPS (bot);
1770 if (operand_equal_p (top, bot, 0))
1772 *mul = 1;
1773 return true;
1776 code = TREE_CODE (top);
1777 switch (code)
1779 case MULT_EXPR:
1780 mby = TREE_OPERAND (top, 1);
1781 if (TREE_CODE (mby) != INTEGER_CST)
1782 return false;
1784 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1785 return false;
1787 *mul = wi::sext (res * wi::to_widest (mby), precision);
1788 return true;
1790 case PLUS_EXPR:
1791 case MINUS_EXPR:
1792 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1793 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1794 return false;
1796 if (code == MINUS_EXPR)
1797 p1 = -p1;
1798 *mul = wi::sext (p0 + p1, precision);
1799 return true;
1801 case INTEGER_CST:
1802 if (TREE_CODE (bot) != INTEGER_CST)
1803 return false;
1805 p0 = widest_int::from (top, SIGNED);
1806 p1 = widest_int::from (bot, SIGNED);
1807 if (p1 == 0)
1808 return false;
1809 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1810 return res == 0;
1812 default:
1813 return false;
1817 /* Return true if memory reference REF with step STEP may be unaligned. */
1819 static bool
1820 may_be_unaligned_p (tree ref, tree step)
1822 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1823 thus they are not misaligned. */
1824 if (TREE_CODE (ref) == TARGET_MEM_REF)
1825 return false;
1827 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1828 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1829 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1831 unsigned HOST_WIDE_INT bitpos;
1832 unsigned int ref_align;
1833 get_object_alignment_1 (ref, &ref_align, &bitpos);
1834 if (ref_align < align
1835 || (bitpos % align) != 0
1836 || (bitpos % BITS_PER_UNIT) != 0)
1837 return true;
1839 unsigned int trailing_zeros = tree_ctz (step);
1840 if (trailing_zeros < HOST_BITS_PER_INT
1841 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1842 return true;
1844 return false;
1847 /* Return true if EXPR may be non-addressable. */
1849 bool
1850 may_be_nonaddressable_p (tree expr)
1852 switch (TREE_CODE (expr))
1854 case TARGET_MEM_REF:
1855 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1856 target, thus they are always addressable. */
1857 return false;
1859 case COMPONENT_REF:
1860 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1861 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1863 case VIEW_CONVERT_EXPR:
1864 /* This kind of view-conversions may wrap non-addressable objects
1865 and make them look addressable. After some processing the
1866 non-addressability may be uncovered again, causing ADDR_EXPRs
1867 of inappropriate objects to be built. */
1868 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1869 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1870 return true;
1872 /* ... fall through ... */
1874 case ARRAY_REF:
1875 case ARRAY_RANGE_REF:
1876 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1878 CASE_CONVERT:
1879 return true;
1881 default:
1882 break;
1885 return false;
1888 static tree
1889 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1891 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1892 If there is an existing use which has same stripped iv base and step,
1893 this function records this one as a sub use to that; otherwise records
1894 it as a normal one. */
1896 static struct iv_use *
1897 record_group_use (struct ivopts_data *data, tree *use_p,
1898 struct iv *iv, gimple stmt, enum use_type use_type)
1900 unsigned int i;
1901 struct iv_use *use;
1902 tree addr_base;
1903 unsigned HOST_WIDE_INT addr_offset;
1905 /* Only support sub use for address type uses, that is, with base
1906 object. */
1907 if (!iv->base_object)
1908 return record_use (data, use_p, iv, stmt, use_type);
1910 addr_base = strip_offset (iv->base, &addr_offset);
1911 for (i = 0; i < n_iv_uses (data); i++)
1913 use = iv_use (data, i);
1914 if (use->type != USE_ADDRESS || !use->iv->base_object)
1915 continue;
1917 /* Check if it has the same stripped base and step. */
1918 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1919 && operand_equal_p (iv->step, use->iv->step, 0)
1920 && operand_equal_p (addr_base, use->addr_base, 0))
1921 break;
1924 if (i == n_iv_uses (data))
1925 return record_use (data, use_p, iv, stmt,
1926 use_type, addr_base, addr_offset);
1927 else
1928 return record_sub_use (data, use_p, iv, stmt,
1929 use_type, addr_base, addr_offset, i);
1932 /* Finds addresses in *OP_P inside STMT. */
1934 static void
1935 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1937 tree base = *op_p, step = size_zero_node;
1938 struct iv *civ;
1939 struct ifs_ivopts_data ifs_ivopts_data;
1941 /* Do not play with volatile memory references. A bit too conservative,
1942 perhaps, but safe. */
1943 if (gimple_has_volatile_ops (stmt))
1944 goto fail;
1946 /* Ignore bitfields for now. Not really something terribly complicated
1947 to handle. TODO. */
1948 if (TREE_CODE (base) == BIT_FIELD_REF)
1949 goto fail;
1951 base = unshare_expr (base);
1953 if (TREE_CODE (base) == TARGET_MEM_REF)
1955 tree type = build_pointer_type (TREE_TYPE (base));
1956 tree astep;
1958 if (TMR_BASE (base)
1959 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1961 civ = get_iv (data, TMR_BASE (base));
1962 if (!civ)
1963 goto fail;
1965 TMR_BASE (base) = civ->base;
1966 step = civ->step;
1968 if (TMR_INDEX2 (base)
1969 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1971 civ = get_iv (data, TMR_INDEX2 (base));
1972 if (!civ)
1973 goto fail;
1975 TMR_INDEX2 (base) = civ->base;
1976 step = civ->step;
1978 if (TMR_INDEX (base)
1979 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1981 civ = get_iv (data, TMR_INDEX (base));
1982 if (!civ)
1983 goto fail;
1985 TMR_INDEX (base) = civ->base;
1986 astep = civ->step;
1988 if (astep)
1990 if (TMR_STEP (base))
1991 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1993 step = fold_build2 (PLUS_EXPR, type, step, astep);
1997 if (integer_zerop (step))
1998 goto fail;
1999 base = tree_mem_ref_addr (type, base);
2001 else
2003 ifs_ivopts_data.ivopts_data = data;
2004 ifs_ivopts_data.stmt = stmt;
2005 ifs_ivopts_data.step = size_zero_node;
2006 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2007 || integer_zerop (ifs_ivopts_data.step))
2008 goto fail;
2009 step = ifs_ivopts_data.step;
2011 /* Check that the base expression is addressable. This needs
2012 to be done after substituting bases of IVs into it. */
2013 if (may_be_nonaddressable_p (base))
2014 goto fail;
2016 /* Moreover, on strict alignment platforms, check that it is
2017 sufficiently aligned. */
2018 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2019 goto fail;
2021 base = build_fold_addr_expr (base);
2023 /* Substituting bases of IVs into the base expression might
2024 have caused folding opportunities. */
2025 if (TREE_CODE (base) == ADDR_EXPR)
2027 tree *ref = &TREE_OPERAND (base, 0);
2028 while (handled_component_p (*ref))
2029 ref = &TREE_OPERAND (*ref, 0);
2030 if (TREE_CODE (*ref) == MEM_REF)
2032 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2033 TREE_OPERAND (*ref, 0),
2034 TREE_OPERAND (*ref, 1));
2035 if (tem)
2036 *ref = tem;
2041 civ = alloc_iv (base, step);
2042 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2043 return;
2045 fail:
2046 for_each_index (op_p, idx_record_use, data);
2049 /* Finds and records invariants used in STMT. */
2051 static void
2052 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2054 ssa_op_iter iter;
2055 use_operand_p use_p;
2056 tree op;
2058 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2060 op = USE_FROM_PTR (use_p);
2061 record_invariant (data, op, false);
2065 /* Finds interesting uses of induction variables in the statement STMT. */
2067 static void
2068 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2070 struct iv *iv;
2071 tree op, *lhs, *rhs;
2072 ssa_op_iter iter;
2073 use_operand_p use_p;
2074 enum tree_code code;
2076 find_invariants_stmt (data, stmt);
2078 if (gimple_code (stmt) == GIMPLE_COND)
2080 find_interesting_uses_cond (data, stmt);
2081 return;
2084 if (is_gimple_assign (stmt))
2086 lhs = gimple_assign_lhs_ptr (stmt);
2087 rhs = gimple_assign_rhs1_ptr (stmt);
2089 if (TREE_CODE (*lhs) == SSA_NAME)
2091 /* If the statement defines an induction variable, the uses are not
2092 interesting by themselves. */
2094 iv = get_iv (data, *lhs);
2096 if (iv && !integer_zerop (iv->step))
2097 return;
2100 code = gimple_assign_rhs_code (stmt);
2101 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2102 && (REFERENCE_CLASS_P (*rhs)
2103 || is_gimple_val (*rhs)))
2105 if (REFERENCE_CLASS_P (*rhs))
2106 find_interesting_uses_address (data, stmt, rhs);
2107 else
2108 find_interesting_uses_op (data, *rhs);
2110 if (REFERENCE_CLASS_P (*lhs))
2111 find_interesting_uses_address (data, stmt, lhs);
2112 return;
2114 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2116 find_interesting_uses_cond (data, stmt);
2117 return;
2120 /* TODO -- we should also handle address uses of type
2122 memory = call (whatever);
2126 call (memory). */
2129 if (gimple_code (stmt) == GIMPLE_PHI
2130 && gimple_bb (stmt) == data->current_loop->header)
2132 iv = get_iv (data, PHI_RESULT (stmt));
2134 if (iv && !integer_zerop (iv->step))
2135 return;
2138 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2140 op = USE_FROM_PTR (use_p);
2142 if (TREE_CODE (op) != SSA_NAME)
2143 continue;
2145 iv = get_iv (data, op);
2146 if (!iv)
2147 continue;
2149 find_interesting_uses_op (data, op);
2153 /* Finds interesting uses of induction variables outside of loops
2154 on loop exit edge EXIT. */
2156 static void
2157 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2159 gphi *phi;
2160 gphi_iterator psi;
2161 tree def;
2163 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2165 phi = psi.phi ();
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2167 if (!virtual_operand_p (def))
2168 find_interesting_uses_op (data, def);
2172 /* Finds uses of the induction variables that are interesting. */
2174 static void
2175 find_interesting_uses (struct ivopts_data *data)
2177 basic_block bb;
2178 gimple_stmt_iterator bsi;
2179 basic_block *body = get_loop_body (data->current_loop);
2180 unsigned i;
2181 struct version_info *info;
2182 edge e;
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "Uses:\n\n");
2187 for (i = 0; i < data->current_loop->num_nodes; i++)
2189 edge_iterator ei;
2190 bb = body[i];
2192 FOR_EACH_EDGE (e, ei, bb->succs)
2193 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2194 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2195 find_interesting_uses_outside (data, e);
2197 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2198 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2200 if (!is_gimple_debug (gsi_stmt (bsi)))
2201 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2206 bitmap_iterator bi;
2208 fprintf (dump_file, "\n");
2210 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2212 info = ver_info (data, i);
2213 if (info->inv_id)
2215 fprintf (dump_file, " ");
2216 print_generic_expr (dump_file, info->name, TDF_SLIM);
2217 fprintf (dump_file, " is invariant (%d)%s\n",
2218 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2222 fprintf (dump_file, "\n");
2225 free (body);
2228 /* Compute maximum offset of [base + offset] addressing mode
2229 for memory reference represented by USE. */
2231 static HOST_WIDE_INT
2232 compute_max_addr_offset (struct iv_use *use)
2234 int width;
2235 rtx reg, addr;
2236 HOST_WIDE_INT i, off;
2237 unsigned list_index, num;
2238 addr_space_t as;
2239 machine_mode mem_mode, addr_mode;
2240 static vec<HOST_WIDE_INT> max_offset_list;
2242 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2243 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2245 num = max_offset_list.length ();
2246 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2247 if (list_index >= num)
2249 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2250 for (; num < max_offset_list.length (); num++)
2251 max_offset_list[num] = -1;
2254 off = max_offset_list[list_index];
2255 if (off != -1)
2256 return off;
2258 addr_mode = targetm.addr_space.address_mode (as);
2259 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2260 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2262 width = GET_MODE_BITSIZE (addr_mode) - 1;
2263 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2264 width = HOST_BITS_PER_WIDE_INT - 1;
2266 for (i = width; i > 0; i--)
2268 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2269 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2270 if (memory_address_addr_space_p (mem_mode, addr, as))
2271 break;
2273 /* For some strict-alignment targets, the offset must be naturally
2274 aligned. Try an aligned offset if mem_mode is not QImode. */
2275 off = ((unsigned HOST_WIDE_INT) 1 << i);
2276 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2278 off -= GET_MODE_SIZE (mem_mode);
2279 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2280 if (memory_address_addr_space_p (mem_mode, addr, as))
2281 break;
2284 if (i == 0)
2285 off = 0;
2287 max_offset_list[list_index] = off;
2288 return off;
2291 /* Check if all small groups should be split. Return true if and
2292 only if:
2294 1) At least one groups contain two uses with different offsets.
2295 2) No group contains more than two uses with different offsets.
2297 Return false otherwise. We want to split such groups because:
2299 1) Small groups don't have much benefit and may interfer with
2300 general candidate selection.
2301 2) Size for problem with only small groups is usually small and
2302 general algorithm can handle it well.
2304 TODO -- Above claim may not hold when auto increment is supported. */
2306 static bool
2307 split_all_small_groups (struct ivopts_data *data)
2309 bool split_p = false;
2310 unsigned int i, n, distinct;
2311 struct iv_use *pre, *use;
2313 n = n_iv_uses (data);
2314 for (i = 0; i < n; i++)
2316 use = iv_use (data, i);
2317 if (!use->next)
2318 continue;
2320 distinct = 1;
2321 gcc_assert (use->type == USE_ADDRESS);
2322 for (pre = use, use = use->next; use; pre = use, use = use->next)
2324 if (pre->addr_offset != use->addr_offset)
2325 distinct++;
2327 if (distinct > 2)
2328 return false;
2330 if (distinct == 2)
2331 split_p = true;
2334 return split_p;
2337 /* For each group of address type uses, this function further groups
2338 these uses according to the maximum offset supported by target's
2339 [base + offset] addressing mode. */
2341 static void
2342 group_address_uses (struct ivopts_data *data)
2344 HOST_WIDE_INT max_offset = -1;
2345 unsigned int i, n, sub_id;
2346 struct iv_use *pre, *use;
2347 unsigned HOST_WIDE_INT addr_offset_first;
2349 /* Reset max offset to split all small groups. */
2350 if (split_all_small_groups (data))
2351 max_offset = 0;
2353 n = n_iv_uses (data);
2354 for (i = 0; i < n; i++)
2356 use = iv_use (data, i);
2357 if (!use->next)
2358 continue;
2360 gcc_assert (use->type == USE_ADDRESS);
2361 if (max_offset != 0)
2362 max_offset = compute_max_addr_offset (use);
2364 while (use)
2366 sub_id = 0;
2367 addr_offset_first = use->addr_offset;
2368 /* Only uses with offset that can fit in offset part against
2369 the first use can be grouped together. */
2370 for (pre = use, use = use->next;
2371 use && (use->addr_offset - addr_offset_first
2372 <= (unsigned HOST_WIDE_INT) max_offset);
2373 pre = use, use = use->next)
2375 use->id = pre->id;
2376 use->sub_id = ++sub_id;
2379 /* Break the list and create new group. */
2380 if (use)
2382 pre->next = NULL;
2383 use->id = n_iv_uses (data);
2384 use->related_cands = BITMAP_ALLOC (NULL);
2385 data->iv_uses.safe_push (use);
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 dump_uses (dump_file, data);
2394 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2395 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2396 we are at the top-level of the processed address. */
2398 static tree
2399 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2400 HOST_WIDE_INT *offset)
2402 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2403 enum tree_code code;
2404 tree type, orig_type = TREE_TYPE (expr);
2405 HOST_WIDE_INT off0, off1, st;
2406 tree orig_expr = expr;
2408 STRIP_NOPS (expr);
2410 type = TREE_TYPE (expr);
2411 code = TREE_CODE (expr);
2412 *offset = 0;
2414 switch (code)
2416 case INTEGER_CST:
2417 if (!cst_and_fits_in_hwi (expr)
2418 || integer_zerop (expr))
2419 return orig_expr;
2421 *offset = int_cst_value (expr);
2422 return build_int_cst (orig_type, 0);
2424 case POINTER_PLUS_EXPR:
2425 case PLUS_EXPR:
2426 case MINUS_EXPR:
2427 op0 = TREE_OPERAND (expr, 0);
2428 op1 = TREE_OPERAND (expr, 1);
2430 op0 = strip_offset_1 (op0, false, false, &off0);
2431 op1 = strip_offset_1 (op1, false, false, &off1);
2433 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2434 if (op0 == TREE_OPERAND (expr, 0)
2435 && op1 == TREE_OPERAND (expr, 1))
2436 return orig_expr;
2438 if (integer_zerop (op1))
2439 expr = op0;
2440 else if (integer_zerop (op0))
2442 if (code == MINUS_EXPR)
2443 expr = fold_build1 (NEGATE_EXPR, type, op1);
2444 else
2445 expr = op1;
2447 else
2448 expr = fold_build2 (code, type, op0, op1);
2450 return fold_convert (orig_type, expr);
2452 case MULT_EXPR:
2453 op1 = TREE_OPERAND (expr, 1);
2454 if (!cst_and_fits_in_hwi (op1))
2455 return orig_expr;
2457 op0 = TREE_OPERAND (expr, 0);
2458 op0 = strip_offset_1 (op0, false, false, &off0);
2459 if (op0 == TREE_OPERAND (expr, 0))
2460 return orig_expr;
2462 *offset = off0 * int_cst_value (op1);
2463 if (integer_zerop (op0))
2464 expr = op0;
2465 else
2466 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2468 return fold_convert (orig_type, expr);
2470 case ARRAY_REF:
2471 case ARRAY_RANGE_REF:
2472 if (!inside_addr)
2473 return orig_expr;
2475 step = array_ref_element_size (expr);
2476 if (!cst_and_fits_in_hwi (step))
2477 break;
2479 st = int_cst_value (step);
2480 op1 = TREE_OPERAND (expr, 1);
2481 op1 = strip_offset_1 (op1, false, false, &off1);
2482 *offset = off1 * st;
2484 if (top_compref
2485 && integer_zerop (op1))
2487 /* Strip the component reference completely. */
2488 op0 = TREE_OPERAND (expr, 0);
2489 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2490 *offset += off0;
2491 return op0;
2493 break;
2495 case COMPONENT_REF:
2497 tree field;
2499 if (!inside_addr)
2500 return orig_expr;
2502 tmp = component_ref_field_offset (expr);
2503 field = TREE_OPERAND (expr, 1);
2504 if (top_compref
2505 && cst_and_fits_in_hwi (tmp)
2506 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2508 HOST_WIDE_INT boffset, abs_off;
2510 /* Strip the component reference completely. */
2511 op0 = TREE_OPERAND (expr, 0);
2512 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2513 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2514 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2515 if (boffset < 0)
2516 abs_off = -abs_off;
2518 *offset = off0 + int_cst_value (tmp) + abs_off;
2519 return op0;
2522 break;
2524 case ADDR_EXPR:
2525 op0 = TREE_OPERAND (expr, 0);
2526 op0 = strip_offset_1 (op0, true, true, &off0);
2527 *offset += off0;
2529 if (op0 == TREE_OPERAND (expr, 0))
2530 return orig_expr;
2532 expr = build_fold_addr_expr (op0);
2533 return fold_convert (orig_type, expr);
2535 case MEM_REF:
2536 /* ??? Offset operand? */
2537 inside_addr = false;
2538 break;
2540 default:
2541 return orig_expr;
2544 /* Default handling of expressions for that we want to recurse into
2545 the first operand. */
2546 op0 = TREE_OPERAND (expr, 0);
2547 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2548 *offset += off0;
2550 if (op0 == TREE_OPERAND (expr, 0)
2551 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2552 return orig_expr;
2554 expr = copy_node (expr);
2555 TREE_OPERAND (expr, 0) = op0;
2556 if (op1)
2557 TREE_OPERAND (expr, 1) = op1;
2559 /* Inside address, we might strip the top level component references,
2560 thus changing type of the expression. Handling of ADDR_EXPR
2561 will fix that. */
2562 expr = fold_convert (orig_type, expr);
2564 return expr;
2567 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2569 static tree
2570 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2572 HOST_WIDE_INT off;
2573 tree core = strip_offset_1 (expr, false, false, &off);
2574 *offset = off;
2575 return core;
2578 /* Returns variant of TYPE that can be used as base for different uses.
2579 We return unsigned type with the same precision, which avoids problems
2580 with overflows. */
2582 static tree
2583 generic_type_for (tree type)
2585 if (POINTER_TYPE_P (type))
2586 return unsigned_type_for (type);
2588 if (TYPE_UNSIGNED (type))
2589 return type;
2591 return unsigned_type_for (type);
2594 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2595 the bitmap to that we should store it. */
2597 static struct ivopts_data *fd_ivopts_data;
2598 static tree
2599 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2601 bitmap *depends_on = (bitmap *) data;
2602 struct version_info *info;
2604 if (TREE_CODE (*expr_p) != SSA_NAME)
2605 return NULL_TREE;
2606 info = name_info (fd_ivopts_data, *expr_p);
2608 if (!info->inv_id || info->has_nonlin_use)
2609 return NULL_TREE;
2611 if (!*depends_on)
2612 *depends_on = BITMAP_ALLOC (NULL);
2613 bitmap_set_bit (*depends_on, info->inv_id);
2615 return NULL_TREE;
2618 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2619 position to POS. If USE is not NULL, the candidate is set as related to
2620 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2621 replacement of the final value of the iv by a direct computation. */
2623 static struct iv_cand *
2624 add_candidate_1 (struct ivopts_data *data,
2625 tree base, tree step, bool important, enum iv_position pos,
2626 struct iv_use *use, gimple incremented_at)
2628 unsigned i;
2629 struct iv_cand *cand = NULL;
2630 tree type, orig_type;
2632 /* For non-original variables, make sure their values are computed in a type
2633 that does not invoke undefined behavior on overflows (since in general,
2634 we cannot prove that these induction variables are non-wrapping). */
2635 if (pos != IP_ORIGINAL)
2637 orig_type = TREE_TYPE (base);
2638 type = generic_type_for (orig_type);
2639 if (type != orig_type)
2641 base = fold_convert (type, base);
2642 step = fold_convert (type, step);
2646 for (i = 0; i < n_iv_cands (data); i++)
2648 cand = iv_cand (data, i);
2650 if (cand->pos != pos)
2651 continue;
2653 if (cand->incremented_at != incremented_at
2654 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2655 && cand->ainc_use != use))
2656 continue;
2658 if (!cand->iv)
2660 if (!base && !step)
2661 break;
2663 continue;
2666 if (!base && !step)
2667 continue;
2669 if (operand_equal_p (base, cand->iv->base, 0)
2670 && operand_equal_p (step, cand->iv->step, 0)
2671 && (TYPE_PRECISION (TREE_TYPE (base))
2672 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2673 break;
2676 if (i == n_iv_cands (data))
2678 cand = XCNEW (struct iv_cand);
2679 cand->id = i;
2681 if (!base && !step)
2682 cand->iv = NULL;
2683 else
2684 cand->iv = alloc_iv (base, step);
2686 cand->pos = pos;
2687 if (pos != IP_ORIGINAL && cand->iv)
2689 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2690 cand->var_after = cand->var_before;
2692 cand->important = important;
2693 cand->incremented_at = incremented_at;
2694 data->iv_candidates.safe_push (cand);
2696 if (step
2697 && TREE_CODE (step) != INTEGER_CST)
2699 fd_ivopts_data = data;
2700 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2703 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2704 cand->ainc_use = use;
2705 else
2706 cand->ainc_use = NULL;
2708 if (dump_file && (dump_flags & TDF_DETAILS))
2709 dump_cand (dump_file, cand);
2712 if (important && !cand->important)
2714 cand->important = true;
2715 if (dump_file && (dump_flags & TDF_DETAILS))
2716 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2719 if (use)
2721 bitmap_set_bit (use->related_cands, i);
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "Candidate %d is related to use %d\n",
2724 cand->id, use->id);
2727 return cand;
2730 /* Returns true if incrementing the induction variable at the end of the LOOP
2731 is allowed.
2733 The purpose is to avoid splitting latch edge with a biv increment, thus
2734 creating a jump, possibly confusing other optimization passes and leaving
2735 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2736 is not available (so we do not have a better alternative), or if the latch
2737 edge is already nonempty. */
2739 static bool
2740 allow_ip_end_pos_p (struct loop *loop)
2742 if (!ip_normal_pos (loop))
2743 return true;
2745 if (!empty_block_p (ip_end_pos (loop)))
2746 return true;
2748 return false;
2751 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2752 Important field is set to IMPORTANT. */
2754 static void
2755 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2756 bool important, struct iv_use *use)
2758 basic_block use_bb = gimple_bb (use->stmt);
2759 machine_mode mem_mode;
2760 unsigned HOST_WIDE_INT cstepi;
2762 /* If we insert the increment in any position other than the standard
2763 ones, we must ensure that it is incremented once per iteration.
2764 It must not be in an inner nested loop, or one side of an if
2765 statement. */
2766 if (use_bb->loop_father != data->current_loop
2767 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2768 || stmt_could_throw_p (use->stmt)
2769 || !cst_and_fits_in_hwi (step))
2770 return;
2772 cstepi = int_cst_value (step);
2774 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2775 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2776 || USE_STORE_PRE_INCREMENT (mem_mode))
2777 && GET_MODE_SIZE (mem_mode) == cstepi)
2778 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2779 || USE_STORE_PRE_DECREMENT (mem_mode))
2780 && GET_MODE_SIZE (mem_mode) == -cstepi))
2782 enum tree_code code = MINUS_EXPR;
2783 tree new_base;
2784 tree new_step = step;
2786 if (POINTER_TYPE_P (TREE_TYPE (base)))
2788 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2789 code = POINTER_PLUS_EXPR;
2791 else
2792 new_step = fold_convert (TREE_TYPE (base), new_step);
2793 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2794 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2795 use->stmt);
2797 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2798 || USE_STORE_POST_INCREMENT (mem_mode))
2799 && GET_MODE_SIZE (mem_mode) == cstepi)
2800 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2801 || USE_STORE_POST_DECREMENT (mem_mode))
2802 && GET_MODE_SIZE (mem_mode) == -cstepi))
2804 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2805 use->stmt);
2809 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2810 position to POS. If USE is not NULL, the candidate is set as related to
2811 it. The candidate computation is scheduled on all available positions. */
2813 static void
2814 add_candidate (struct ivopts_data *data,
2815 tree base, tree step, bool important, struct iv_use *use)
2817 gcc_assert (use == NULL || use->sub_id == 0);
2819 if (ip_normal_pos (data->current_loop))
2820 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2821 if (ip_end_pos (data->current_loop)
2822 && allow_ip_end_pos_p (data->current_loop))
2823 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2825 if (use != NULL && use->type == USE_ADDRESS)
2826 add_autoinc_candidates (data, base, step, important, use);
2829 /* Adds standard iv candidates. */
2831 static void
2832 add_standard_iv_candidates (struct ivopts_data *data)
2834 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2836 /* The same for a double-integer type if it is still fast enough. */
2837 if (TYPE_PRECISION
2838 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2839 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2840 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2841 build_int_cst (long_integer_type_node, 1), true, NULL);
2843 /* The same for a double-integer type if it is still fast enough. */
2844 if (TYPE_PRECISION
2845 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2846 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2847 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2848 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2852 /* Adds candidates bases on the old induction variable IV. */
2854 static void
2855 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2857 gimple phi;
2858 tree def;
2859 struct iv_cand *cand;
2861 add_candidate (data, iv->base, iv->step, true, NULL);
2863 /* The same, but with initial value zero. */
2864 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2865 add_candidate (data, size_int (0), iv->step, true, NULL);
2866 else
2867 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2868 iv->step, true, NULL);
2870 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2871 if (gimple_code (phi) == GIMPLE_PHI)
2873 /* Additionally record the possibility of leaving the original iv
2874 untouched. */
2875 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2876 /* Don't add candidate if it's from another PHI node because
2877 it's an affine iv appearing in the form of PEELED_CHREC. */
2878 phi = SSA_NAME_DEF_STMT (def);
2879 if (gimple_code (phi) != GIMPLE_PHI)
2881 cand = add_candidate_1 (data,
2882 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2883 SSA_NAME_DEF_STMT (def));
2884 cand->var_before = iv->ssa_name;
2885 cand->var_after = def;
2887 else
2888 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2892 /* Adds candidates based on the old induction variables. */
2894 static void
2895 add_old_ivs_candidates (struct ivopts_data *data)
2897 unsigned i;
2898 struct iv *iv;
2899 bitmap_iterator bi;
2901 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2903 iv = ver_info (data, i)->iv;
2904 if (iv && iv->biv_p && !integer_zerop (iv->step))
2905 add_old_iv_candidates (data, iv);
2909 /* Adds candidates based on the value of the induction variable IV and USE. */
2911 static void
2912 add_iv_value_candidates (struct ivopts_data *data,
2913 struct iv *iv, struct iv_use *use)
2915 unsigned HOST_WIDE_INT offset;
2916 tree base;
2917 tree basetype;
2919 add_candidate (data, iv->base, iv->step, false, use);
2921 /* The same, but with initial value zero. Make such variable important,
2922 since it is generic enough so that possibly many uses may be based
2923 on it. */
2924 basetype = TREE_TYPE (iv->base);
2925 if (POINTER_TYPE_P (basetype))
2926 basetype = sizetype;
2927 add_candidate (data, build_int_cst (basetype, 0),
2928 iv->step, true, use);
2930 /* Third, try removing the constant offset. Make sure to even
2931 add a candidate for &a[0] vs. (T *)&a. */
2932 base = strip_offset (iv->base, &offset);
2933 if (offset
2934 || base != iv->base)
2935 add_candidate (data, base, iv->step, false, use);
2938 /* Adds candidates based on the uses. */
2940 static void
2941 add_derived_ivs_candidates (struct ivopts_data *data)
2943 unsigned i;
2945 for (i = 0; i < n_iv_uses (data); i++)
2947 struct iv_use *use = iv_use (data, i);
2949 if (!use)
2950 continue;
2952 switch (use->type)
2954 case USE_NONLINEAR_EXPR:
2955 case USE_COMPARE:
2956 case USE_ADDRESS:
2957 /* Just add the ivs based on the value of the iv used here. */
2958 add_iv_value_candidates (data, use->iv, use);
2959 break;
2961 default:
2962 gcc_unreachable ();
2967 /* Record important candidates and add them to related_cands bitmaps
2968 if needed. */
2970 static void
2971 record_important_candidates (struct ivopts_data *data)
2973 unsigned i;
2974 struct iv_use *use;
2976 for (i = 0; i < n_iv_cands (data); i++)
2978 struct iv_cand *cand = iv_cand (data, i);
2980 if (cand->important)
2981 bitmap_set_bit (data->important_candidates, i);
2984 data->consider_all_candidates = (n_iv_cands (data)
2985 <= CONSIDER_ALL_CANDIDATES_BOUND);
2987 if (data->consider_all_candidates)
2989 /* We will not need "related_cands" bitmaps in this case,
2990 so release them to decrease peak memory consumption. */
2991 for (i = 0; i < n_iv_uses (data); i++)
2993 use = iv_use (data, i);
2994 BITMAP_FREE (use->related_cands);
2997 else
2999 /* Add important candidates to the related_cands bitmaps. */
3000 for (i = 0; i < n_iv_uses (data); i++)
3001 bitmap_ior_into (iv_use (data, i)->related_cands,
3002 data->important_candidates);
3006 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3007 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3008 we allocate a simple list to every use. */
3010 static void
3011 alloc_use_cost_map (struct ivopts_data *data)
3013 unsigned i, size, s;
3015 for (i = 0; i < n_iv_uses (data); i++)
3017 struct iv_use *use = iv_use (data, i);
3019 if (data->consider_all_candidates)
3020 size = n_iv_cands (data);
3021 else
3023 s = bitmap_count_bits (use->related_cands);
3025 /* Round up to the power of two, so that moduling by it is fast. */
3026 size = s ? (1 << ceil_log2 (s)) : 1;
3029 use->n_map_members = size;
3030 use->cost_map = XCNEWVEC (struct cost_pair, size);
3034 /* Returns description of computation cost of expression whose runtime
3035 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3037 static comp_cost
3038 new_cost (unsigned runtime, unsigned complexity)
3040 comp_cost cost;
3042 cost.cost = runtime;
3043 cost.complexity = complexity;
3045 return cost;
3048 /* Returns true if COST is infinite. */
3050 static bool
3051 infinite_cost_p (comp_cost cost)
3053 return cost.cost == INFTY;
3056 /* Adds costs COST1 and COST2. */
3058 static comp_cost
3059 add_costs (comp_cost cost1, comp_cost cost2)
3061 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3062 return infinite_cost;
3064 cost1.cost += cost2.cost;
3065 cost1.complexity += cost2.complexity;
3067 return cost1;
3069 /* Subtracts costs COST1 and COST2. */
3071 static comp_cost
3072 sub_costs (comp_cost cost1, comp_cost cost2)
3074 cost1.cost -= cost2.cost;
3075 cost1.complexity -= cost2.complexity;
3077 return cost1;
3080 /* Returns a negative number if COST1 < COST2, a positive number if
3081 COST1 > COST2, and 0 if COST1 = COST2. */
3083 static int
3084 compare_costs (comp_cost cost1, comp_cost cost2)
3086 if (cost1.cost == cost2.cost)
3087 return cost1.complexity - cost2.complexity;
3089 return cost1.cost - cost2.cost;
3092 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3093 on invariants DEPENDS_ON and that the value used in expressing it
3094 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3096 static void
3097 set_use_iv_cost (struct ivopts_data *data,
3098 struct iv_use *use, struct iv_cand *cand,
3099 comp_cost cost, bitmap depends_on, tree value,
3100 enum tree_code comp, int inv_expr_id)
3102 unsigned i, s;
3104 if (infinite_cost_p (cost))
3106 BITMAP_FREE (depends_on);
3107 return;
3110 if (data->consider_all_candidates)
3112 use->cost_map[cand->id].cand = cand;
3113 use->cost_map[cand->id].cost = cost;
3114 use->cost_map[cand->id].depends_on = depends_on;
3115 use->cost_map[cand->id].value = value;
3116 use->cost_map[cand->id].comp = comp;
3117 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3118 return;
3121 /* n_map_members is a power of two, so this computes modulo. */
3122 s = cand->id & (use->n_map_members - 1);
3123 for (i = s; i < use->n_map_members; i++)
3124 if (!use->cost_map[i].cand)
3125 goto found;
3126 for (i = 0; i < s; i++)
3127 if (!use->cost_map[i].cand)
3128 goto found;
3130 gcc_unreachable ();
3132 found:
3133 use->cost_map[i].cand = cand;
3134 use->cost_map[i].cost = cost;
3135 use->cost_map[i].depends_on = depends_on;
3136 use->cost_map[i].value = value;
3137 use->cost_map[i].comp = comp;
3138 use->cost_map[i].inv_expr_id = inv_expr_id;
3141 /* Gets cost of (USE, CANDIDATE) pair. */
3143 static struct cost_pair *
3144 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3145 struct iv_cand *cand)
3147 unsigned i, s;
3148 struct cost_pair *ret;
3150 if (!cand)
3151 return NULL;
3153 if (data->consider_all_candidates)
3155 ret = use->cost_map + cand->id;
3156 if (!ret->cand)
3157 return NULL;
3159 return ret;
3162 /* n_map_members is a power of two, so this computes modulo. */
3163 s = cand->id & (use->n_map_members - 1);
3164 for (i = s; i < use->n_map_members; i++)
3165 if (use->cost_map[i].cand == cand)
3166 return use->cost_map + i;
3167 else if (use->cost_map[i].cand == NULL)
3168 return NULL;
3169 for (i = 0; i < s; i++)
3170 if (use->cost_map[i].cand == cand)
3171 return use->cost_map + i;
3172 else if (use->cost_map[i].cand == NULL)
3173 return NULL;
3175 return NULL;
3178 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3179 static rtx
3180 produce_memory_decl_rtl (tree obj, int *regno)
3182 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3183 machine_mode address_mode = targetm.addr_space.address_mode (as);
3184 rtx x;
3186 gcc_assert (obj);
3187 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3189 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3190 x = gen_rtx_SYMBOL_REF (address_mode, name);
3191 SET_SYMBOL_REF_DECL (x, obj);
3192 x = gen_rtx_MEM (DECL_MODE (obj), x);
3193 set_mem_addr_space (x, as);
3194 targetm.encode_section_info (obj, x, true);
3196 else
3198 x = gen_raw_REG (address_mode, (*regno)++);
3199 x = gen_rtx_MEM (DECL_MODE (obj), x);
3200 set_mem_addr_space (x, as);
3203 return x;
3206 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3207 walk_tree. DATA contains the actual fake register number. */
3209 static tree
3210 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3212 tree obj = NULL_TREE;
3213 rtx x = NULL_RTX;
3214 int *regno = (int *) data;
3216 switch (TREE_CODE (*expr_p))
3218 case ADDR_EXPR:
3219 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3220 handled_component_p (*expr_p);
3221 expr_p = &TREE_OPERAND (*expr_p, 0))
3222 continue;
3223 obj = *expr_p;
3224 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3225 x = produce_memory_decl_rtl (obj, regno);
3226 break;
3228 case SSA_NAME:
3229 *ws = 0;
3230 obj = SSA_NAME_VAR (*expr_p);
3231 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3232 if (!obj)
3233 return NULL_TREE;
3234 if (!DECL_RTL_SET_P (obj))
3235 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3236 break;
3238 case VAR_DECL:
3239 case PARM_DECL:
3240 case RESULT_DECL:
3241 *ws = 0;
3242 obj = *expr_p;
3244 if (DECL_RTL_SET_P (obj))
3245 break;
3247 if (DECL_MODE (obj) == BLKmode)
3248 x = produce_memory_decl_rtl (obj, regno);
3249 else
3250 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3252 break;
3254 default:
3255 break;
3258 if (x)
3260 decl_rtl_to_reset.safe_push (obj);
3261 SET_DECL_RTL (obj, x);
3264 return NULL_TREE;
3267 /* Determines cost of the computation of EXPR. */
3269 static unsigned
3270 computation_cost (tree expr, bool speed)
3272 rtx_insn *seq;
3273 rtx rslt;
3274 tree type = TREE_TYPE (expr);
3275 unsigned cost;
3276 /* Avoid using hard regs in ways which may be unsupported. */
3277 int regno = LAST_VIRTUAL_REGISTER + 1;
3278 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3279 enum node_frequency real_frequency = node->frequency;
3281 node->frequency = NODE_FREQUENCY_NORMAL;
3282 crtl->maybe_hot_insn_p = speed;
3283 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3284 start_sequence ();
3285 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3286 seq = get_insns ();
3287 end_sequence ();
3288 default_rtl_profile ();
3289 node->frequency = real_frequency;
3291 cost = seq_cost (seq, speed);
3292 if (MEM_P (rslt))
3293 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3294 TYPE_ADDR_SPACE (type), speed);
3295 else if (!REG_P (rslt))
3296 cost += set_src_cost (rslt, speed);
3298 return cost;
3301 /* Returns variable containing the value of candidate CAND at statement AT. */
3303 static tree
3304 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3306 if (stmt_after_increment (loop, cand, stmt))
3307 return cand->var_after;
3308 else
3309 return cand->var_before;
3312 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3313 same precision that is at least as wide as the precision of TYPE, stores
3314 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3315 type of A and B. */
3317 static tree
3318 determine_common_wider_type (tree *a, tree *b)
3320 tree wider_type = NULL;
3321 tree suba, subb;
3322 tree atype = TREE_TYPE (*a);
3324 if (CONVERT_EXPR_P (*a))
3326 suba = TREE_OPERAND (*a, 0);
3327 wider_type = TREE_TYPE (suba);
3328 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3329 return atype;
3331 else
3332 return atype;
3334 if (CONVERT_EXPR_P (*b))
3336 subb = TREE_OPERAND (*b, 0);
3337 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3338 return atype;
3340 else
3341 return atype;
3343 *a = suba;
3344 *b = subb;
3345 return wider_type;
3348 /* Determines the expression by that USE is expressed from induction variable
3349 CAND at statement AT in LOOP. The expression is stored in a decomposed
3350 form into AFF. Returns false if USE cannot be expressed using CAND. */
3352 static bool
3353 get_computation_aff (struct loop *loop,
3354 struct iv_use *use, struct iv_cand *cand, gimple at,
3355 struct aff_tree *aff)
3357 tree ubase = use->iv->base;
3358 tree ustep = use->iv->step;
3359 tree cbase = cand->iv->base;
3360 tree cstep = cand->iv->step, cstep_common;
3361 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3362 tree common_type, var;
3363 tree uutype;
3364 aff_tree cbase_aff, var_aff;
3365 widest_int rat;
3367 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3369 /* We do not have a precision to express the values of use. */
3370 return false;
3373 var = var_at_stmt (loop, cand, at);
3374 uutype = unsigned_type_for (utype);
3376 /* If the conversion is not noop, perform it. */
3377 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3379 cstep = fold_convert (uutype, cstep);
3380 cbase = fold_convert (uutype, cbase);
3381 var = fold_convert (uutype, var);
3384 if (!constant_multiple_of (ustep, cstep, &rat))
3385 return false;
3387 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3388 type, we achieve better folding by computing their difference in this
3389 wider type, and cast the result to UUTYPE. We do not need to worry about
3390 overflows, as all the arithmetics will in the end be performed in UUTYPE
3391 anyway. */
3392 common_type = determine_common_wider_type (&ubase, &cbase);
3394 /* use = ubase - ratio * cbase + ratio * var. */
3395 tree_to_aff_combination (ubase, common_type, aff);
3396 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3397 tree_to_aff_combination (var, uutype, &var_aff);
3399 /* We need to shift the value if we are after the increment. */
3400 if (stmt_after_increment (loop, cand, at))
3402 aff_tree cstep_aff;
3404 if (common_type != uutype)
3405 cstep_common = fold_convert (common_type, cstep);
3406 else
3407 cstep_common = cstep;
3409 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3410 aff_combination_add (&cbase_aff, &cstep_aff);
3413 aff_combination_scale (&cbase_aff, -rat);
3414 aff_combination_add (aff, &cbase_aff);
3415 if (common_type != uutype)
3416 aff_combination_convert (aff, uutype);
3418 aff_combination_scale (&var_aff, rat);
3419 aff_combination_add (aff, &var_aff);
3421 return true;
3424 /* Return the type of USE. */
3426 static tree
3427 get_use_type (struct iv_use *use)
3429 tree base_type = TREE_TYPE (use->iv->base);
3430 tree type;
3432 if (use->type == USE_ADDRESS)
3434 /* The base_type may be a void pointer. Create a pointer type based on
3435 the mem_ref instead. */
3436 type = build_pointer_type (TREE_TYPE (*use->op_p));
3437 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3438 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3440 else
3441 type = base_type;
3443 return type;
3446 /* Determines the expression by that USE is expressed from induction variable
3447 CAND at statement AT in LOOP. The computation is unshared. */
3449 static tree
3450 get_computation_at (struct loop *loop,
3451 struct iv_use *use, struct iv_cand *cand, gimple at)
3453 aff_tree aff;
3454 tree type = get_use_type (use);
3456 if (!get_computation_aff (loop, use, cand, at, &aff))
3457 return NULL_TREE;
3458 unshare_aff_combination (&aff);
3459 return fold_convert (type, aff_combination_to_tree (&aff));
3462 /* Determines the expression by that USE is expressed from induction variable
3463 CAND in LOOP. The computation is unshared. */
3465 static tree
3466 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3468 return get_computation_at (loop, use, cand, use->stmt);
3471 /* Adjust the cost COST for being in loop setup rather than loop body.
3472 If we're optimizing for space, the loop setup overhead is constant;
3473 if we're optimizing for speed, amortize it over the per-iteration cost. */
3474 static unsigned
3475 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3477 if (cost == INFTY)
3478 return cost;
3479 else if (optimize_loop_for_speed_p (data->current_loop))
3480 return cost / avg_loop_niter (data->current_loop);
3481 else
3482 return cost;
3485 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3486 validity for a memory reference accessing memory of mode MODE in
3487 address space AS. */
3490 bool
3491 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3492 addr_space_t as)
3494 #define MAX_RATIO 128
3495 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3496 static vec<sbitmap> valid_mult_list;
3497 sbitmap valid_mult;
3499 if (data_index >= valid_mult_list.length ())
3500 valid_mult_list.safe_grow_cleared (data_index + 1);
3502 valid_mult = valid_mult_list[data_index];
3503 if (!valid_mult)
3505 machine_mode address_mode = targetm.addr_space.address_mode (as);
3506 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3507 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3508 rtx addr, scaled;
3509 HOST_WIDE_INT i;
3511 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3512 bitmap_clear (valid_mult);
3513 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3514 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3515 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3517 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3518 if (memory_address_addr_space_p (mode, addr, as)
3519 || memory_address_addr_space_p (mode, scaled, as))
3520 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3525 fprintf (dump_file, " allowed multipliers:");
3526 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3527 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3528 fprintf (dump_file, " %d", (int) i);
3529 fprintf (dump_file, "\n");
3530 fprintf (dump_file, "\n");
3533 valid_mult_list[data_index] = valid_mult;
3536 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3537 return false;
3539 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3542 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3543 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3544 variable is omitted. Compute the cost for a memory reference that accesses
3545 a memory location of mode MEM_MODE in address space AS.
3547 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3548 size of MEM_MODE / RATIO) is available. To make this determination, we
3549 look at the size of the increment to be made, which is given in CSTEP.
3550 CSTEP may be zero if the step is unknown.
3551 STMT_AFTER_INC is true iff the statement we're looking at is after the
3552 increment of the original biv.
3554 TODO -- there must be some better way. This all is quite crude. */
3556 enum ainc_type
3558 AINC_PRE_INC, /* Pre increment. */
3559 AINC_PRE_DEC, /* Pre decrement. */
3560 AINC_POST_INC, /* Post increment. */
3561 AINC_POST_DEC, /* Post decrement. */
3562 AINC_NONE /* Also the number of auto increment types. */
3565 typedef struct address_cost_data_s
3567 HOST_WIDE_INT min_offset, max_offset;
3568 unsigned costs[2][2][2][2];
3569 unsigned ainc_costs[AINC_NONE];
3570 } *address_cost_data;
3573 static comp_cost
3574 get_address_cost (bool symbol_present, bool var_present,
3575 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3576 HOST_WIDE_INT cstep, machine_mode mem_mode,
3577 addr_space_t as, bool speed,
3578 bool stmt_after_inc, bool *may_autoinc)
3580 machine_mode address_mode = targetm.addr_space.address_mode (as);
3581 static vec<address_cost_data> address_cost_data_list;
3582 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3583 address_cost_data data;
3584 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3585 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3586 unsigned cost, acost, complexity;
3587 enum ainc_type autoinc_type;
3588 bool offset_p, ratio_p, autoinc;
3589 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3590 unsigned HOST_WIDE_INT mask;
3591 unsigned bits;
3593 if (data_index >= address_cost_data_list.length ())
3594 address_cost_data_list.safe_grow_cleared (data_index + 1);
3596 data = address_cost_data_list[data_index];
3597 if (!data)
3599 HOST_WIDE_INT i;
3600 HOST_WIDE_INT rat, off = 0;
3601 int old_cse_not_expected, width;
3602 unsigned sym_p, var_p, off_p, rat_p, add_c;
3603 rtx_insn *seq;
3604 rtx addr, base;
3605 rtx reg0, reg1;
3607 data = (address_cost_data) xcalloc (1, sizeof (*data));
3609 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3611 width = GET_MODE_BITSIZE (address_mode) - 1;
3612 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3613 width = HOST_BITS_PER_WIDE_INT - 1;
3614 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3616 for (i = width; i >= 0; i--)
3618 off = -((unsigned HOST_WIDE_INT) 1 << i);
3619 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3620 if (memory_address_addr_space_p (mem_mode, addr, as))
3621 break;
3623 data->min_offset = (i == -1? 0 : off);
3625 for (i = width; i >= 0; i--)
3627 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3628 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3629 if (memory_address_addr_space_p (mem_mode, addr, as))
3630 break;
3631 /* For some strict-alignment targets, the offset must be naturally
3632 aligned. Try an aligned offset if mem_mode is not QImode. */
3633 off = mem_mode != QImode
3634 ? ((unsigned HOST_WIDE_INT) 1 << i)
3635 - GET_MODE_SIZE (mem_mode)
3636 : 0;
3637 if (off > 0)
3639 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3640 if (memory_address_addr_space_p (mem_mode, addr, as))
3641 break;
3644 if (i == -1)
3645 off = 0;
3646 data->max_offset = off;
3648 if (dump_file && (dump_flags & TDF_DETAILS))
3650 fprintf (dump_file, "get_address_cost:\n");
3651 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3652 GET_MODE_NAME (mem_mode),
3653 data->min_offset);
3654 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3655 GET_MODE_NAME (mem_mode),
3656 data->max_offset);
3659 rat = 1;
3660 for (i = 2; i <= MAX_RATIO; i++)
3661 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3663 rat = i;
3664 break;
3667 /* Compute the cost of various addressing modes. */
3668 acost = 0;
3669 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3670 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3672 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3673 || USE_STORE_PRE_DECREMENT (mem_mode))
3675 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3676 has_predec[mem_mode]
3677 = memory_address_addr_space_p (mem_mode, addr, as);
3679 if (has_predec[mem_mode])
3680 data->ainc_costs[AINC_PRE_DEC]
3681 = address_cost (addr, mem_mode, as, speed);
3683 if (USE_LOAD_POST_DECREMENT (mem_mode)
3684 || USE_STORE_POST_DECREMENT (mem_mode))
3686 addr = gen_rtx_POST_DEC (address_mode, reg0);
3687 has_postdec[mem_mode]
3688 = memory_address_addr_space_p (mem_mode, addr, as);
3690 if (has_postdec[mem_mode])
3691 data->ainc_costs[AINC_POST_DEC]
3692 = address_cost (addr, mem_mode, as, speed);
3694 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3695 || USE_STORE_PRE_DECREMENT (mem_mode))
3697 addr = gen_rtx_PRE_INC (address_mode, reg0);
3698 has_preinc[mem_mode]
3699 = memory_address_addr_space_p (mem_mode, addr, as);
3701 if (has_preinc[mem_mode])
3702 data->ainc_costs[AINC_PRE_INC]
3703 = address_cost (addr, mem_mode, as, speed);
3705 if (USE_LOAD_POST_INCREMENT (mem_mode)
3706 || USE_STORE_POST_INCREMENT (mem_mode))
3708 addr = gen_rtx_POST_INC (address_mode, reg0);
3709 has_postinc[mem_mode]
3710 = memory_address_addr_space_p (mem_mode, addr, as);
3712 if (has_postinc[mem_mode])
3713 data->ainc_costs[AINC_POST_INC]
3714 = address_cost (addr, mem_mode, as, speed);
3716 for (i = 0; i < 16; i++)
3718 sym_p = i & 1;
3719 var_p = (i >> 1) & 1;
3720 off_p = (i >> 2) & 1;
3721 rat_p = (i >> 3) & 1;
3723 addr = reg0;
3724 if (rat_p)
3725 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3726 gen_int_mode (rat, address_mode));
3728 if (var_p)
3729 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3731 if (sym_p)
3733 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3734 /* ??? We can run into trouble with some backends by presenting
3735 it with symbols which haven't been properly passed through
3736 targetm.encode_section_info. By setting the local bit, we
3737 enhance the probability of things working. */
3738 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3740 if (off_p)
3741 base = gen_rtx_fmt_e (CONST, address_mode,
3742 gen_rtx_fmt_ee
3743 (PLUS, address_mode, base,
3744 gen_int_mode (off, address_mode)));
3746 else if (off_p)
3747 base = gen_int_mode (off, address_mode);
3748 else
3749 base = NULL_RTX;
3751 if (base)
3752 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3754 start_sequence ();
3755 /* To avoid splitting addressing modes, pretend that no cse will
3756 follow. */
3757 old_cse_not_expected = cse_not_expected;
3758 cse_not_expected = true;
3759 addr = memory_address_addr_space (mem_mode, addr, as);
3760 cse_not_expected = old_cse_not_expected;
3761 seq = get_insns ();
3762 end_sequence ();
3764 acost = seq_cost (seq, speed);
3765 acost += address_cost (addr, mem_mode, as, speed);
3767 if (!acost)
3768 acost = 1;
3769 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3772 /* On some targets, it is quite expensive to load symbol to a register,
3773 which makes addresses that contain symbols look much more expensive.
3774 However, the symbol will have to be loaded in any case before the
3775 loop (and quite likely we have it in register already), so it does not
3776 make much sense to penalize them too heavily. So make some final
3777 tweaks for the SYMBOL_PRESENT modes:
3779 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3780 var is cheaper, use this mode with small penalty.
3781 If VAR_PRESENT is true, try whether the mode with
3782 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3783 if this is the case, use it. */
3784 add_c = add_cost (speed, address_mode);
3785 for (i = 0; i < 8; i++)
3787 var_p = i & 1;
3788 off_p = (i >> 1) & 1;
3789 rat_p = (i >> 2) & 1;
3791 acost = data->costs[0][1][off_p][rat_p] + 1;
3792 if (var_p)
3793 acost += add_c;
3795 if (acost < data->costs[1][var_p][off_p][rat_p])
3796 data->costs[1][var_p][off_p][rat_p] = acost;
3799 if (dump_file && (dump_flags & TDF_DETAILS))
3801 fprintf (dump_file, "Address costs:\n");
3803 for (i = 0; i < 16; i++)
3805 sym_p = i & 1;
3806 var_p = (i >> 1) & 1;
3807 off_p = (i >> 2) & 1;
3808 rat_p = (i >> 3) & 1;
3810 fprintf (dump_file, " ");
3811 if (sym_p)
3812 fprintf (dump_file, "sym + ");
3813 if (var_p)
3814 fprintf (dump_file, "var + ");
3815 if (off_p)
3816 fprintf (dump_file, "cst + ");
3817 if (rat_p)
3818 fprintf (dump_file, "rat * ");
3820 acost = data->costs[sym_p][var_p][off_p][rat_p];
3821 fprintf (dump_file, "index costs %d\n", acost);
3823 if (has_predec[mem_mode] || has_postdec[mem_mode]
3824 || has_preinc[mem_mode] || has_postinc[mem_mode])
3825 fprintf (dump_file, " May include autoinc/dec\n");
3826 fprintf (dump_file, "\n");
3829 address_cost_data_list[data_index] = data;
3832 bits = GET_MODE_BITSIZE (address_mode);
3833 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3834 offset &= mask;
3835 if ((offset >> (bits - 1) & 1))
3836 offset |= ~mask;
3837 s_offset = offset;
3839 autoinc = false;
3840 autoinc_type = AINC_NONE;
3841 msize = GET_MODE_SIZE (mem_mode);
3842 autoinc_offset = offset;
3843 if (stmt_after_inc)
3844 autoinc_offset += ratio * cstep;
3845 if (symbol_present || var_present || ratio != 1)
3846 autoinc = false;
3847 else
3849 if (has_postinc[mem_mode] && autoinc_offset == 0
3850 && msize == cstep)
3851 autoinc_type = AINC_POST_INC;
3852 else if (has_postdec[mem_mode] && autoinc_offset == 0
3853 && msize == -cstep)
3854 autoinc_type = AINC_POST_DEC;
3855 else if (has_preinc[mem_mode] && autoinc_offset == msize
3856 && msize == cstep)
3857 autoinc_type = AINC_PRE_INC;
3858 else if (has_predec[mem_mode] && autoinc_offset == -msize
3859 && msize == -cstep)
3860 autoinc_type = AINC_PRE_DEC;
3862 if (autoinc_type != AINC_NONE)
3863 autoinc = true;
3866 cost = 0;
3867 offset_p = (s_offset != 0
3868 && data->min_offset <= s_offset
3869 && s_offset <= data->max_offset);
3870 ratio_p = (ratio != 1
3871 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3873 if (ratio != 1 && !ratio_p)
3874 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3876 if (s_offset && !offset_p && !symbol_present)
3877 cost += add_cost (speed, address_mode);
3879 if (may_autoinc)
3880 *may_autoinc = autoinc;
3881 if (autoinc)
3882 acost = data->ainc_costs[autoinc_type];
3883 else
3884 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3885 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3886 return new_cost (cost + acost, complexity);
3889 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3890 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3891 calculating the operands of EXPR. Returns true if successful, and returns
3892 the cost in COST. */
3894 static bool
3895 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3896 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3898 comp_cost res;
3899 tree op1 = TREE_OPERAND (expr, 1);
3900 tree cst = TREE_OPERAND (mult, 1);
3901 tree multop = TREE_OPERAND (mult, 0);
3902 int m = exact_log2 (int_cst_value (cst));
3903 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3904 int as_cost, sa_cost;
3905 bool mult_in_op1;
3907 if (!(m >= 0 && m < maxm))
3908 return false;
3910 mult_in_op1 = operand_equal_p (op1, mult, 0);
3912 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3914 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3915 use that in preference to a shift insn followed by an add insn. */
3916 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3917 ? shiftadd_cost (speed, mode, m)
3918 : (mult_in_op1
3919 ? shiftsub1_cost (speed, mode, m)
3920 : shiftsub0_cost (speed, mode, m)));
3922 res = new_cost (MIN (as_cost, sa_cost), 0);
3923 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3925 STRIP_NOPS (multop);
3926 if (!is_gimple_val (multop))
3927 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3929 *cost = res;
3930 return true;
3933 /* Estimates cost of forcing expression EXPR into a variable. */
3935 static comp_cost
3936 force_expr_to_var_cost (tree expr, bool speed)
3938 static bool costs_initialized = false;
3939 static unsigned integer_cost [2];
3940 static unsigned symbol_cost [2];
3941 static unsigned address_cost [2];
3942 tree op0, op1;
3943 comp_cost cost0, cost1, cost;
3944 machine_mode mode;
3946 if (!costs_initialized)
3948 tree type = build_pointer_type (integer_type_node);
3949 tree var, addr;
3950 rtx x;
3951 int i;
3953 var = create_tmp_var_raw (integer_type_node, "test_var");
3954 TREE_STATIC (var) = 1;
3955 x = produce_memory_decl_rtl (var, NULL);
3956 SET_DECL_RTL (var, x);
3958 addr = build1 (ADDR_EXPR, type, var);
3961 for (i = 0; i < 2; i++)
3963 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3964 2000), i);
3966 symbol_cost[i] = computation_cost (addr, i) + 1;
3968 address_cost[i]
3969 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3970 if (dump_file && (dump_flags & TDF_DETAILS))
3972 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3973 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3974 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3975 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3976 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3977 fprintf (dump_file, "\n");
3981 costs_initialized = true;
3984 STRIP_NOPS (expr);
3986 if (SSA_VAR_P (expr))
3987 return no_cost;
3989 if (is_gimple_min_invariant (expr))
3991 if (TREE_CODE (expr) == INTEGER_CST)
3992 return new_cost (integer_cost [speed], 0);
3994 if (TREE_CODE (expr) == ADDR_EXPR)
3996 tree obj = TREE_OPERAND (expr, 0);
3998 if (TREE_CODE (obj) == VAR_DECL
3999 || TREE_CODE (obj) == PARM_DECL
4000 || TREE_CODE (obj) == RESULT_DECL)
4001 return new_cost (symbol_cost [speed], 0);
4004 return new_cost (address_cost [speed], 0);
4007 switch (TREE_CODE (expr))
4009 case POINTER_PLUS_EXPR:
4010 case PLUS_EXPR:
4011 case MINUS_EXPR:
4012 case MULT_EXPR:
4013 op0 = TREE_OPERAND (expr, 0);
4014 op1 = TREE_OPERAND (expr, 1);
4015 STRIP_NOPS (op0);
4016 STRIP_NOPS (op1);
4017 break;
4019 CASE_CONVERT:
4020 case NEGATE_EXPR:
4021 op0 = TREE_OPERAND (expr, 0);
4022 STRIP_NOPS (op0);
4023 op1 = NULL_TREE;
4024 break;
4026 default:
4027 /* Just an arbitrary value, FIXME. */
4028 return new_cost (target_spill_cost[speed], 0);
4031 if (op0 == NULL_TREE
4032 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4033 cost0 = no_cost;
4034 else
4035 cost0 = force_expr_to_var_cost (op0, speed);
4037 if (op1 == NULL_TREE
4038 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4039 cost1 = no_cost;
4040 else
4041 cost1 = force_expr_to_var_cost (op1, speed);
4043 mode = TYPE_MODE (TREE_TYPE (expr));
4044 switch (TREE_CODE (expr))
4046 case POINTER_PLUS_EXPR:
4047 case PLUS_EXPR:
4048 case MINUS_EXPR:
4049 case NEGATE_EXPR:
4050 cost = new_cost (add_cost (speed, mode), 0);
4051 if (TREE_CODE (expr) != NEGATE_EXPR)
4053 tree mult = NULL_TREE;
4054 comp_cost sa_cost;
4055 if (TREE_CODE (op1) == MULT_EXPR)
4056 mult = op1;
4057 else if (TREE_CODE (op0) == MULT_EXPR)
4058 mult = op0;
4060 if (mult != NULL_TREE
4061 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4062 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4063 speed, &sa_cost))
4064 return sa_cost;
4066 break;
4068 CASE_CONVERT:
4070 tree inner_mode, outer_mode;
4071 outer_mode = TREE_TYPE (expr);
4072 inner_mode = TREE_TYPE (op0);
4073 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4074 TYPE_MODE (inner_mode), speed), 0);
4076 break;
4078 case MULT_EXPR:
4079 if (cst_and_fits_in_hwi (op0))
4080 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4081 mode, speed), 0);
4082 else if (cst_and_fits_in_hwi (op1))
4083 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4084 mode, speed), 0);
4085 else
4086 return new_cost (target_spill_cost [speed], 0);
4087 break;
4089 default:
4090 gcc_unreachable ();
4093 cost = add_costs (cost, cost0);
4094 cost = add_costs (cost, cost1);
4096 /* Bound the cost by target_spill_cost. The parts of complicated
4097 computations often are either loop invariant or at least can
4098 be shared between several iv uses, so letting this grow without
4099 limits would not give reasonable results. */
4100 if (cost.cost > (int) target_spill_cost [speed])
4101 cost.cost = target_spill_cost [speed];
4103 return cost;
4106 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4107 invariants the computation depends on. */
4109 static comp_cost
4110 force_var_cost (struct ivopts_data *data,
4111 tree expr, bitmap *depends_on)
4113 if (depends_on)
4115 fd_ivopts_data = data;
4116 walk_tree (&expr, find_depends, depends_on, NULL);
4119 return force_expr_to_var_cost (expr, data->speed);
4122 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4123 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4124 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4125 invariants the computation depends on. */
4127 static comp_cost
4128 split_address_cost (struct ivopts_data *data,
4129 tree addr, bool *symbol_present, bool *var_present,
4130 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4132 tree core;
4133 HOST_WIDE_INT bitsize;
4134 HOST_WIDE_INT bitpos;
4135 tree toffset;
4136 machine_mode mode;
4137 int unsignedp, volatilep;
4139 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4140 &unsignedp, &volatilep, false);
4142 if (toffset != 0
4143 || bitpos % BITS_PER_UNIT != 0
4144 || TREE_CODE (core) != VAR_DECL)
4146 *symbol_present = false;
4147 *var_present = true;
4148 fd_ivopts_data = data;
4149 walk_tree (&addr, find_depends, depends_on, NULL);
4150 return new_cost (target_spill_cost[data->speed], 0);
4153 *offset += bitpos / BITS_PER_UNIT;
4154 if (TREE_STATIC (core)
4155 || DECL_EXTERNAL (core))
4157 *symbol_present = true;
4158 *var_present = false;
4159 return no_cost;
4162 *symbol_present = false;
4163 *var_present = true;
4164 return no_cost;
4167 /* Estimates cost of expressing difference of addresses E1 - E2 as
4168 var + symbol + offset. The value of offset is added to OFFSET,
4169 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4170 part is missing. DEPENDS_ON is a set of the invariants the computation
4171 depends on. */
4173 static comp_cost
4174 ptr_difference_cost (struct ivopts_data *data,
4175 tree e1, tree e2, bool *symbol_present, bool *var_present,
4176 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4178 HOST_WIDE_INT diff = 0;
4179 aff_tree aff_e1, aff_e2;
4180 tree type;
4182 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4184 if (ptr_difference_const (e1, e2, &diff))
4186 *offset += diff;
4187 *symbol_present = false;
4188 *var_present = false;
4189 return no_cost;
4192 if (integer_zerop (e2))
4193 return split_address_cost (data, TREE_OPERAND (e1, 0),
4194 symbol_present, var_present, offset, depends_on);
4196 *symbol_present = false;
4197 *var_present = true;
4199 type = signed_type_for (TREE_TYPE (e1));
4200 tree_to_aff_combination (e1, type, &aff_e1);
4201 tree_to_aff_combination (e2, type, &aff_e2);
4202 aff_combination_scale (&aff_e2, -1);
4203 aff_combination_add (&aff_e1, &aff_e2);
4205 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4208 /* Estimates cost of expressing difference E1 - E2 as
4209 var + symbol + offset. The value of offset is added to OFFSET,
4210 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4211 part is missing. DEPENDS_ON is a set of the invariants the computation
4212 depends on. */
4214 static comp_cost
4215 difference_cost (struct ivopts_data *data,
4216 tree e1, tree e2, bool *symbol_present, bool *var_present,
4217 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4219 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4220 unsigned HOST_WIDE_INT off1, off2;
4221 aff_tree aff_e1, aff_e2;
4222 tree type;
4224 e1 = strip_offset (e1, &off1);
4225 e2 = strip_offset (e2, &off2);
4226 *offset += off1 - off2;
4228 STRIP_NOPS (e1);
4229 STRIP_NOPS (e2);
4231 if (TREE_CODE (e1) == ADDR_EXPR)
4232 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4233 offset, depends_on);
4234 *symbol_present = false;
4236 if (operand_equal_p (e1, e2, 0))
4238 *var_present = false;
4239 return no_cost;
4242 *var_present = true;
4244 if (integer_zerop (e2))
4245 return force_var_cost (data, e1, depends_on);
4247 if (integer_zerop (e1))
4249 comp_cost cost = force_var_cost (data, e2, depends_on);
4250 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4251 return cost;
4254 type = signed_type_for (TREE_TYPE (e1));
4255 tree_to_aff_combination (e1, type, &aff_e1);
4256 tree_to_aff_combination (e2, type, &aff_e2);
4257 aff_combination_scale (&aff_e2, -1);
4258 aff_combination_add (&aff_e1, &aff_e2);
4260 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4263 /* Returns true if AFF1 and AFF2 are identical. */
4265 static bool
4266 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4268 unsigned i;
4270 if (aff1->n != aff2->n)
4271 return false;
4273 for (i = 0; i < aff1->n; i++)
4275 if (aff1->elts[i].coef != aff2->elts[i].coef)
4276 return false;
4278 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4279 return false;
4281 return true;
4284 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4286 static int
4287 get_expr_id (struct ivopts_data *data, tree expr)
4289 struct iv_inv_expr_ent ent;
4290 struct iv_inv_expr_ent **slot;
4292 ent.expr = expr;
4293 ent.hash = iterative_hash_expr (expr, 0);
4294 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4295 if (*slot)
4296 return (*slot)->id;
4298 *slot = XNEW (struct iv_inv_expr_ent);
4299 (*slot)->expr = expr;
4300 (*slot)->hash = ent.hash;
4301 (*slot)->id = data->inv_expr_id++;
4302 return (*slot)->id;
4305 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4306 requires a new compiler generated temporary. Returns -1 otherwise.
4307 ADDRESS_P is a flag indicating if the expression is for address
4308 computation. */
4310 static int
4311 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4312 tree cbase, HOST_WIDE_INT ratio,
4313 bool address_p)
4315 aff_tree ubase_aff, cbase_aff;
4316 tree expr, ub, cb;
4318 STRIP_NOPS (ubase);
4319 STRIP_NOPS (cbase);
4320 ub = ubase;
4321 cb = cbase;
4323 if ((TREE_CODE (ubase) == INTEGER_CST)
4324 && (TREE_CODE (cbase) == INTEGER_CST))
4325 return -1;
4327 /* Strips the constant part. */
4328 if (TREE_CODE (ubase) == PLUS_EXPR
4329 || TREE_CODE (ubase) == MINUS_EXPR
4330 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4332 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4333 ubase = TREE_OPERAND (ubase, 0);
4336 /* Strips the constant part. */
4337 if (TREE_CODE (cbase) == PLUS_EXPR
4338 || TREE_CODE (cbase) == MINUS_EXPR
4339 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4341 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4342 cbase = TREE_OPERAND (cbase, 0);
4345 if (address_p)
4347 if (((TREE_CODE (ubase) == SSA_NAME)
4348 || (TREE_CODE (ubase) == ADDR_EXPR
4349 && is_gimple_min_invariant (ubase)))
4350 && (TREE_CODE (cbase) == INTEGER_CST))
4351 return -1;
4353 if (((TREE_CODE (cbase) == SSA_NAME)
4354 || (TREE_CODE (cbase) == ADDR_EXPR
4355 && is_gimple_min_invariant (cbase)))
4356 && (TREE_CODE (ubase) == INTEGER_CST))
4357 return -1;
4360 if (ratio == 1)
4362 if (operand_equal_p (ubase, cbase, 0))
4363 return -1;
4365 if (TREE_CODE (ubase) == ADDR_EXPR
4366 && TREE_CODE (cbase) == ADDR_EXPR)
4368 tree usym, csym;
4370 usym = TREE_OPERAND (ubase, 0);
4371 csym = TREE_OPERAND (cbase, 0);
4372 if (TREE_CODE (usym) == ARRAY_REF)
4374 tree ind = TREE_OPERAND (usym, 1);
4375 if (TREE_CODE (ind) == INTEGER_CST
4376 && tree_fits_shwi_p (ind)
4377 && tree_to_shwi (ind) == 0)
4378 usym = TREE_OPERAND (usym, 0);
4380 if (TREE_CODE (csym) == ARRAY_REF)
4382 tree ind = TREE_OPERAND (csym, 1);
4383 if (TREE_CODE (ind) == INTEGER_CST
4384 && tree_fits_shwi_p (ind)
4385 && tree_to_shwi (ind) == 0)
4386 csym = TREE_OPERAND (csym, 0);
4388 if (operand_equal_p (usym, csym, 0))
4389 return -1;
4391 /* Now do more complex comparison */
4392 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4393 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4394 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4395 return -1;
4398 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4399 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4401 aff_combination_scale (&cbase_aff, -1 * ratio);
4402 aff_combination_add (&ubase_aff, &cbase_aff);
4403 expr = aff_combination_to_tree (&ubase_aff);
4404 return get_expr_id (data, expr);
4409 /* Determines the cost of the computation by that USE is expressed
4410 from induction variable CAND. If ADDRESS_P is true, we just need
4411 to create an address from it, otherwise we want to get it into
4412 register. A set of invariants we depend on is stored in
4413 DEPENDS_ON. AT is the statement at that the value is computed.
4414 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4415 addressing is likely. */
4417 static comp_cost
4418 get_computation_cost_at (struct ivopts_data *data,
4419 struct iv_use *use, struct iv_cand *cand,
4420 bool address_p, bitmap *depends_on, gimple at,
4421 bool *can_autoinc,
4422 int *inv_expr_id)
4424 tree ubase = use->iv->base, ustep = use->iv->step;
4425 tree cbase, cstep;
4426 tree utype = TREE_TYPE (ubase), ctype;
4427 unsigned HOST_WIDE_INT cstepi, offset = 0;
4428 HOST_WIDE_INT ratio, aratio;
4429 bool var_present, symbol_present, stmt_is_after_inc;
4430 comp_cost cost;
4431 widest_int rat;
4432 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4433 machine_mode mem_mode = (address_p
4434 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4435 : VOIDmode);
4437 *depends_on = NULL;
4439 /* Only consider real candidates. */
4440 if (!cand->iv)
4441 return infinite_cost;
4443 cbase = cand->iv->base;
4444 cstep = cand->iv->step;
4445 ctype = TREE_TYPE (cbase);
4447 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4449 /* We do not have a precision to express the values of use. */
4450 return infinite_cost;
4453 if (address_p
4454 || (use->iv->base_object
4455 && cand->iv->base_object
4456 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4457 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4459 /* Do not try to express address of an object with computation based
4460 on address of a different object. This may cause problems in rtl
4461 level alias analysis (that does not expect this to be happening,
4462 as this is illegal in C), and would be unlikely to be useful
4463 anyway. */
4464 if (use->iv->base_object
4465 && cand->iv->base_object
4466 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4467 return infinite_cost;
4470 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4472 /* TODO -- add direct handling of this case. */
4473 goto fallback;
4476 /* CSTEPI is removed from the offset in case statement is after the
4477 increment. If the step is not constant, we use zero instead.
4478 This is a bit imprecise (there is the extra addition), but
4479 redundancy elimination is likely to transform the code so that
4480 it uses value of the variable before increment anyway,
4481 so it is not that much unrealistic. */
4482 if (cst_and_fits_in_hwi (cstep))
4483 cstepi = int_cst_value (cstep);
4484 else
4485 cstepi = 0;
4487 if (!constant_multiple_of (ustep, cstep, &rat))
4488 return infinite_cost;
4490 if (wi::fits_shwi_p (rat))
4491 ratio = rat.to_shwi ();
4492 else
4493 return infinite_cost;
4495 STRIP_NOPS (cbase);
4496 ctype = TREE_TYPE (cbase);
4498 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4500 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4501 or ratio == 1, it is better to handle this like
4503 ubase - ratio * cbase + ratio * var
4505 (also holds in the case ratio == -1, TODO. */
4507 if (cst_and_fits_in_hwi (cbase))
4509 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4510 cost = difference_cost (data,
4511 ubase, build_int_cst (utype, 0),
4512 &symbol_present, &var_present, &offset,
4513 depends_on);
4514 cost.cost /= avg_loop_niter (data->current_loop);
4516 else if (ratio == 1)
4518 tree real_cbase = cbase;
4520 /* Check to see if any adjustment is needed. */
4521 if (cstepi == 0 && stmt_is_after_inc)
4523 aff_tree real_cbase_aff;
4524 aff_tree cstep_aff;
4526 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4527 &real_cbase_aff);
4528 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4530 aff_combination_add (&real_cbase_aff, &cstep_aff);
4531 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4534 cost = difference_cost (data,
4535 ubase, real_cbase,
4536 &symbol_present, &var_present, &offset,
4537 depends_on);
4538 cost.cost /= avg_loop_niter (data->current_loop);
4540 else if (address_p
4541 && !POINTER_TYPE_P (ctype)
4542 && multiplier_allowed_in_address_p
4543 (ratio, mem_mode,
4544 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4546 cbase
4547 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4548 cost = difference_cost (data,
4549 ubase, cbase,
4550 &symbol_present, &var_present, &offset,
4551 depends_on);
4552 cost.cost /= avg_loop_niter (data->current_loop);
4554 else
4556 cost = force_var_cost (data, cbase, depends_on);
4557 cost = add_costs (cost,
4558 difference_cost (data,
4559 ubase, build_int_cst (utype, 0),
4560 &symbol_present, &var_present,
4561 &offset, depends_on));
4562 cost.cost /= avg_loop_niter (data->current_loop);
4563 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4566 /* Set of invariants depended on by sub use has already been computed
4567 for the first use in the group. */
4568 if (use->sub_id)
4570 cost.cost = 0;
4571 if (depends_on && *depends_on)
4572 bitmap_clear (*depends_on);
4574 else if (inv_expr_id)
4576 *inv_expr_id =
4577 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4578 /* Clear depends on. */
4579 if (*inv_expr_id != -1 && depends_on && *depends_on)
4580 bitmap_clear (*depends_on);
4583 /* If we are after the increment, the value of the candidate is higher by
4584 one iteration. */
4585 if (stmt_is_after_inc)
4586 offset -= ratio * cstepi;
4588 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4589 (symbol/var1/const parts may be omitted). If we are looking for an
4590 address, find the cost of addressing this. */
4591 if (address_p)
4592 return add_costs (cost,
4593 get_address_cost (symbol_present, var_present,
4594 offset, ratio, cstepi,
4595 mem_mode,
4596 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4597 speed, stmt_is_after_inc,
4598 can_autoinc));
4600 /* Otherwise estimate the costs for computing the expression. */
4601 if (!symbol_present && !var_present && !offset)
4603 if (ratio != 1)
4604 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4605 return cost;
4608 /* Symbol + offset should be compile-time computable so consider that they
4609 are added once to the variable, if present. */
4610 if (var_present && (symbol_present || offset))
4611 cost.cost += adjust_setup_cost (data,
4612 add_cost (speed, TYPE_MODE (ctype)));
4614 /* Having offset does not affect runtime cost in case it is added to
4615 symbol, but it increases complexity. */
4616 if (offset)
4617 cost.complexity++;
4619 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4621 aratio = ratio > 0 ? ratio : -ratio;
4622 if (aratio != 1)
4623 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4624 return cost;
4626 fallback:
4627 if (can_autoinc)
4628 *can_autoinc = false;
4631 /* Just get the expression, expand it and measure the cost. */
4632 tree comp = get_computation_at (data->current_loop, use, cand, at);
4634 if (!comp)
4635 return infinite_cost;
4637 if (address_p)
4638 comp = build_simple_mem_ref (comp);
4640 return new_cost (computation_cost (comp, speed), 0);
4644 /* Determines the cost of the computation by that USE is expressed
4645 from induction variable CAND. If ADDRESS_P is true, we just need
4646 to create an address from it, otherwise we want to get it into
4647 register. A set of invariants we depend on is stored in
4648 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4649 autoinc addressing is likely. */
4651 static comp_cost
4652 get_computation_cost (struct ivopts_data *data,
4653 struct iv_use *use, struct iv_cand *cand,
4654 bool address_p, bitmap *depends_on,
4655 bool *can_autoinc, int *inv_expr_id)
4657 return get_computation_cost_at (data,
4658 use, cand, address_p, depends_on, use->stmt,
4659 can_autoinc, inv_expr_id);
4662 /* Determines cost of basing replacement of USE on CAND in a generic
4663 expression. */
4665 static bool
4666 determine_use_iv_cost_generic (struct ivopts_data *data,
4667 struct iv_use *use, struct iv_cand *cand)
4669 bitmap depends_on;
4670 comp_cost cost;
4671 int inv_expr_id = -1;
4673 /* The simple case first -- if we need to express value of the preserved
4674 original biv, the cost is 0. This also prevents us from counting the
4675 cost of increment twice -- once at this use and once in the cost of
4676 the candidate. */
4677 if (cand->pos == IP_ORIGINAL
4678 && cand->incremented_at == use->stmt)
4680 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4681 ERROR_MARK, -1);
4682 return true;
4685 cost = get_computation_cost (data, use, cand, false, &depends_on,
4686 NULL, &inv_expr_id);
4688 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4689 inv_expr_id);
4691 return !infinite_cost_p (cost);
4694 /* Determines cost of basing replacement of USE on CAND in an address. */
4696 static bool
4697 determine_use_iv_cost_address (struct ivopts_data *data,
4698 struct iv_use *use, struct iv_cand *cand)
4700 bitmap depends_on;
4701 bool can_autoinc;
4702 int inv_expr_id = -1;
4703 struct iv_use *sub_use;
4704 comp_cost sub_cost;
4705 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4706 &can_autoinc, &inv_expr_id);
4708 if (cand->ainc_use == use)
4710 if (can_autoinc)
4711 cost.cost -= cand->cost_step;
4712 /* If we generated the candidate solely for exploiting autoincrement
4713 opportunities, and it turns out it can't be used, set the cost to
4714 infinity to make sure we ignore it. */
4715 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4716 cost = infinite_cost;
4718 for (sub_use = use->next;
4719 sub_use && !infinite_cost_p (cost);
4720 sub_use = sub_use->next)
4722 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4723 &can_autoinc, &inv_expr_id);
4724 cost = add_costs (cost, sub_cost);
4727 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4728 inv_expr_id);
4730 return !infinite_cost_p (cost);
4733 /* Computes value of candidate CAND at position AT in iteration NITER, and
4734 stores it to VAL. */
4736 static void
4737 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4738 aff_tree *val)
4740 aff_tree step, delta, nit;
4741 struct iv *iv = cand->iv;
4742 tree type = TREE_TYPE (iv->base);
4743 tree steptype = type;
4744 if (POINTER_TYPE_P (type))
4745 steptype = sizetype;
4746 steptype = unsigned_type_for (type);
4748 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4749 aff_combination_convert (&step, steptype);
4750 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4751 aff_combination_convert (&nit, steptype);
4752 aff_combination_mult (&nit, &step, &delta);
4753 if (stmt_after_increment (loop, cand, at))
4754 aff_combination_add (&delta, &step);
4756 tree_to_aff_combination (iv->base, type, val);
4757 if (!POINTER_TYPE_P (type))
4758 aff_combination_convert (val, steptype);
4759 aff_combination_add (val, &delta);
4762 /* Returns period of induction variable iv. */
4764 static tree
4765 iv_period (struct iv *iv)
4767 tree step = iv->step, period, type;
4768 tree pow2div;
4770 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4772 type = unsigned_type_for (TREE_TYPE (step));
4773 /* Period of the iv is lcm (step, type_range)/step -1,
4774 i.e., N*type_range/step - 1. Since type range is power
4775 of two, N == (step >> num_of_ending_zeros_binary (step),
4776 so the final result is
4778 (type_range >> num_of_ending_zeros_binary (step)) - 1
4781 pow2div = num_ending_zeros (step);
4783 period = build_low_bits_mask (type,
4784 (TYPE_PRECISION (type)
4785 - tree_to_uhwi (pow2div)));
4787 return period;
4790 /* Returns the comparison operator used when eliminating the iv USE. */
4792 static enum tree_code
4793 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4795 struct loop *loop = data->current_loop;
4796 basic_block ex_bb;
4797 edge exit;
4799 ex_bb = gimple_bb (use->stmt);
4800 exit = EDGE_SUCC (ex_bb, 0);
4801 if (flow_bb_inside_loop_p (loop, exit->dest))
4802 exit = EDGE_SUCC (ex_bb, 1);
4804 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4807 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4808 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4809 calculation is performed in non-wrapping type.
4811 TODO: More generally, we could test for the situation that
4812 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4813 This would require knowing the sign of OFFSET. */
4815 static bool
4816 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4818 enum tree_code code;
4819 tree e1, e2;
4820 aff_tree aff_e1, aff_e2, aff_offset;
4822 if (!nowrap_type_p (TREE_TYPE (base)))
4823 return false;
4825 base = expand_simple_operations (base);
4827 if (TREE_CODE (base) == SSA_NAME)
4829 gimple stmt = SSA_NAME_DEF_STMT (base);
4831 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4832 return false;
4834 code = gimple_assign_rhs_code (stmt);
4835 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4836 return false;
4838 e1 = gimple_assign_rhs1 (stmt);
4839 e2 = gimple_assign_rhs2 (stmt);
4841 else
4843 code = TREE_CODE (base);
4844 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4845 return false;
4846 e1 = TREE_OPERAND (base, 0);
4847 e2 = TREE_OPERAND (base, 1);
4850 /* Use affine expansion as deeper inspection to prove the equality. */
4851 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4852 &aff_e2, &data->name_expansion_cache);
4853 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4854 &aff_offset, &data->name_expansion_cache);
4855 aff_combination_scale (&aff_offset, -1);
4856 switch (code)
4858 case PLUS_EXPR:
4859 aff_combination_add (&aff_e2, &aff_offset);
4860 if (aff_combination_zero_p (&aff_e2))
4861 return true;
4863 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4864 &aff_e1, &data->name_expansion_cache);
4865 aff_combination_add (&aff_e1, &aff_offset);
4866 return aff_combination_zero_p (&aff_e1);
4868 case POINTER_PLUS_EXPR:
4869 aff_combination_add (&aff_e2, &aff_offset);
4870 return aff_combination_zero_p (&aff_e2);
4872 default:
4873 return false;
4877 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4878 comparison with CAND. NITER describes the number of iterations of
4879 the loops. If successful, the comparison in COMP_P is altered accordingly.
4881 We aim to handle the following situation:
4883 sometype *base, *p;
4884 int a, b, i;
4886 i = a;
4887 p = p_0 = base + a;
4891 bla (*p);
4892 p++;
4893 i++;
4895 while (i < b);
4897 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4898 We aim to optimize this to
4900 p = p_0 = base + a;
4903 bla (*p);
4904 p++;
4906 while (p < p_0 - a + b);
4908 This preserves the correctness, since the pointer arithmetics does not
4909 overflow. More precisely:
4911 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4912 overflow in computing it or the values of p.
4913 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4914 overflow. To prove this, we use the fact that p_0 = base + a. */
4916 static bool
4917 iv_elimination_compare_lt (struct ivopts_data *data,
4918 struct iv_cand *cand, enum tree_code *comp_p,
4919 struct tree_niter_desc *niter)
4921 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4922 struct aff_tree nit, tmpa, tmpb;
4923 enum tree_code comp;
4924 HOST_WIDE_INT step;
4926 /* We need to know that the candidate induction variable does not overflow.
4927 While more complex analysis may be used to prove this, for now just
4928 check that the variable appears in the original program and that it
4929 is computed in a type that guarantees no overflows. */
4930 cand_type = TREE_TYPE (cand->iv->base);
4931 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4932 return false;
4934 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4935 the calculation of the BOUND could overflow, making the comparison
4936 invalid. */
4937 if (!data->loop_single_exit_p)
4938 return false;
4940 /* We need to be able to decide whether candidate is increasing or decreasing
4941 in order to choose the right comparison operator. */
4942 if (!cst_and_fits_in_hwi (cand->iv->step))
4943 return false;
4944 step = int_cst_value (cand->iv->step);
4946 /* Check that the number of iterations matches the expected pattern:
4947 a + 1 > b ? 0 : b - a - 1. */
4948 mbz = niter->may_be_zero;
4949 if (TREE_CODE (mbz) == GT_EXPR)
4951 /* Handle a + 1 > b. */
4952 tree op0 = TREE_OPERAND (mbz, 0);
4953 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4955 a = TREE_OPERAND (op0, 0);
4956 b = TREE_OPERAND (mbz, 1);
4958 else
4959 return false;
4961 else if (TREE_CODE (mbz) == LT_EXPR)
4963 tree op1 = TREE_OPERAND (mbz, 1);
4965 /* Handle b < a + 1. */
4966 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4968 a = TREE_OPERAND (op1, 0);
4969 b = TREE_OPERAND (mbz, 0);
4971 else
4972 return false;
4974 else
4975 return false;
4977 /* Expected number of iterations is B - A - 1. Check that it matches
4978 the actual number, i.e., that B - A - NITER = 1. */
4979 tree_to_aff_combination (niter->niter, nit_type, &nit);
4980 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4981 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4982 aff_combination_scale (&nit, -1);
4983 aff_combination_scale (&tmpa, -1);
4984 aff_combination_add (&tmpb, &tmpa);
4985 aff_combination_add (&tmpb, &nit);
4986 if (tmpb.n != 0 || tmpb.offset != 1)
4987 return false;
4989 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4990 overflow. */
4991 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4992 cand->iv->step,
4993 fold_convert (TREE_TYPE (cand->iv->step), a));
4994 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4995 return false;
4997 /* Determine the new comparison operator. */
4998 comp = step < 0 ? GT_EXPR : LT_EXPR;
4999 if (*comp_p == NE_EXPR)
5000 *comp_p = comp;
5001 else if (*comp_p == EQ_EXPR)
5002 *comp_p = invert_tree_comparison (comp, false);
5003 else
5004 gcc_unreachable ();
5006 return true;
5009 /* Check whether it is possible to express the condition in USE by comparison
5010 of candidate CAND. If so, store the value compared with to BOUND, and the
5011 comparison operator to COMP. */
5013 static bool
5014 may_eliminate_iv (struct ivopts_data *data,
5015 struct iv_use *use, struct iv_cand *cand, tree *bound,
5016 enum tree_code *comp)
5018 basic_block ex_bb;
5019 edge exit;
5020 tree period;
5021 struct loop *loop = data->current_loop;
5022 aff_tree bnd;
5023 struct tree_niter_desc *desc = NULL;
5025 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5026 return false;
5028 /* For now works only for exits that dominate the loop latch.
5029 TODO: extend to other conditions inside loop body. */
5030 ex_bb = gimple_bb (use->stmt);
5031 if (use->stmt != last_stmt (ex_bb)
5032 || gimple_code (use->stmt) != GIMPLE_COND
5033 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5034 return false;
5036 exit = EDGE_SUCC (ex_bb, 0);
5037 if (flow_bb_inside_loop_p (loop, exit->dest))
5038 exit = EDGE_SUCC (ex_bb, 1);
5039 if (flow_bb_inside_loop_p (loop, exit->dest))
5040 return false;
5042 desc = niter_for_exit (data, exit);
5043 if (!desc)
5044 return false;
5046 /* Determine whether we can use the variable to test the exit condition.
5047 This is the case iff the period of the induction variable is greater
5048 than the number of iterations for which the exit condition is true. */
5049 period = iv_period (cand->iv);
5051 /* If the number of iterations is constant, compare against it directly. */
5052 if (TREE_CODE (desc->niter) == INTEGER_CST)
5054 /* See cand_value_at. */
5055 if (stmt_after_increment (loop, cand, use->stmt))
5057 if (!tree_int_cst_lt (desc->niter, period))
5058 return false;
5060 else
5062 if (tree_int_cst_lt (period, desc->niter))
5063 return false;
5067 /* If not, and if this is the only possible exit of the loop, see whether
5068 we can get a conservative estimate on the number of iterations of the
5069 entire loop and compare against that instead. */
5070 else
5072 widest_int period_value, max_niter;
5074 max_niter = desc->max;
5075 if (stmt_after_increment (loop, cand, use->stmt))
5076 max_niter += 1;
5077 period_value = wi::to_widest (period);
5078 if (wi::gtu_p (max_niter, period_value))
5080 /* See if we can take advantage of inferred loop bound information. */
5081 if (data->loop_single_exit_p)
5083 if (!max_loop_iterations (loop, &max_niter))
5084 return false;
5085 /* The loop bound is already adjusted by adding 1. */
5086 if (wi::gtu_p (max_niter, period_value))
5087 return false;
5089 else
5090 return false;
5094 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5096 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5097 aff_combination_to_tree (&bnd));
5098 *comp = iv_elimination_compare (data, use);
5100 /* It is unlikely that computing the number of iterations using division
5101 would be more profitable than keeping the original induction variable. */
5102 if (expression_expensive_p (*bound))
5103 return false;
5105 /* Sometimes, it is possible to handle the situation that the number of
5106 iterations may be zero unless additional assumtions by using <
5107 instead of != in the exit condition.
5109 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5110 base the exit condition on it. However, that is often too
5111 expensive. */
5112 if (!integer_zerop (desc->may_be_zero))
5113 return iv_elimination_compare_lt (data, cand, comp, desc);
5115 return true;
5118 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5119 be copied, if is is used in the loop body and DATA->body_includes_call. */
5121 static int
5122 parm_decl_cost (struct ivopts_data *data, tree bound)
5124 tree sbound = bound;
5125 STRIP_NOPS (sbound);
5127 if (TREE_CODE (sbound) == SSA_NAME
5128 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5129 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5130 && data->body_includes_call)
5131 return COSTS_N_INSNS (1);
5133 return 0;
5136 /* Determines cost of basing replacement of USE on CAND in a condition. */
5138 static bool
5139 determine_use_iv_cost_condition (struct ivopts_data *data,
5140 struct iv_use *use, struct iv_cand *cand)
5142 tree bound = NULL_TREE;
5143 struct iv *cmp_iv;
5144 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5145 comp_cost elim_cost, express_cost, cost, bound_cost;
5146 bool ok;
5147 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5148 tree *control_var, *bound_cst;
5149 enum tree_code comp = ERROR_MARK;
5151 /* Only consider real candidates. */
5152 if (!cand->iv)
5154 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5155 ERROR_MARK, -1);
5156 return false;
5159 /* Try iv elimination. */
5160 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5162 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5163 if (elim_cost.cost == 0)
5164 elim_cost.cost = parm_decl_cost (data, bound);
5165 else if (TREE_CODE (bound) == INTEGER_CST)
5166 elim_cost.cost = 0;
5167 /* If we replace a loop condition 'i < n' with 'p < base + n',
5168 depends_on_elim will have 'base' and 'n' set, which implies
5169 that both 'base' and 'n' will be live during the loop. More likely,
5170 'base + n' will be loop invariant, resulting in only one live value
5171 during the loop. So in that case we clear depends_on_elim and set
5172 elim_inv_expr_id instead. */
5173 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5175 elim_inv_expr_id = get_expr_id (data, bound);
5176 bitmap_clear (depends_on_elim);
5178 /* The bound is a loop invariant, so it will be only computed
5179 once. */
5180 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5182 else
5183 elim_cost = infinite_cost;
5185 /* Try expressing the original giv. If it is compared with an invariant,
5186 note that we cannot get rid of it. */
5187 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5188 NULL, &cmp_iv);
5189 gcc_assert (ok);
5191 /* When the condition is a comparison of the candidate IV against
5192 zero, prefer this IV.
5194 TODO: The constant that we're subtracting from the cost should
5195 be target-dependent. This information should be added to the
5196 target costs for each backend. */
5197 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5198 && integer_zerop (*bound_cst)
5199 && (operand_equal_p (*control_var, cand->var_after, 0)
5200 || operand_equal_p (*control_var, cand->var_before, 0)))
5201 elim_cost.cost -= 1;
5203 express_cost = get_computation_cost (data, use, cand, false,
5204 &depends_on_express, NULL,
5205 &express_inv_expr_id);
5206 fd_ivopts_data = data;
5207 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5209 /* Count the cost of the original bound as well. */
5210 bound_cost = force_var_cost (data, *bound_cst, NULL);
5211 if (bound_cost.cost == 0)
5212 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5213 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5214 bound_cost.cost = 0;
5215 express_cost.cost += bound_cost.cost;
5217 /* Choose the better approach, preferring the eliminated IV. */
5218 if (compare_costs (elim_cost, express_cost) <= 0)
5220 cost = elim_cost;
5221 depends_on = depends_on_elim;
5222 depends_on_elim = NULL;
5223 inv_expr_id = elim_inv_expr_id;
5225 else
5227 cost = express_cost;
5228 depends_on = depends_on_express;
5229 depends_on_express = NULL;
5230 bound = NULL_TREE;
5231 comp = ERROR_MARK;
5232 inv_expr_id = express_inv_expr_id;
5235 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5237 if (depends_on_elim)
5238 BITMAP_FREE (depends_on_elim);
5239 if (depends_on_express)
5240 BITMAP_FREE (depends_on_express);
5242 return !infinite_cost_p (cost);
5245 /* Determines cost of basing replacement of USE on CAND. Returns false
5246 if USE cannot be based on CAND. */
5248 static bool
5249 determine_use_iv_cost (struct ivopts_data *data,
5250 struct iv_use *use, struct iv_cand *cand)
5252 switch (use->type)
5254 case USE_NONLINEAR_EXPR:
5255 return determine_use_iv_cost_generic (data, use, cand);
5257 case USE_ADDRESS:
5258 return determine_use_iv_cost_address (data, use, cand);
5260 case USE_COMPARE:
5261 return determine_use_iv_cost_condition (data, use, cand);
5263 default:
5264 gcc_unreachable ();
5268 /* Return true if get_computation_cost indicates that autoincrement is
5269 a possibility for the pair of USE and CAND, false otherwise. */
5271 static bool
5272 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5273 struct iv_cand *cand)
5275 bitmap depends_on;
5276 bool can_autoinc;
5277 comp_cost cost;
5279 if (use->type != USE_ADDRESS)
5280 return false;
5282 cost = get_computation_cost (data, use, cand, true, &depends_on,
5283 &can_autoinc, NULL);
5285 BITMAP_FREE (depends_on);
5287 return !infinite_cost_p (cost) && can_autoinc;
5290 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5291 use that allows autoincrement, and set their AINC_USE if possible. */
5293 static void
5294 set_autoinc_for_original_candidates (struct ivopts_data *data)
5296 unsigned i, j;
5298 for (i = 0; i < n_iv_cands (data); i++)
5300 struct iv_cand *cand = iv_cand (data, i);
5301 struct iv_use *closest_before = NULL;
5302 struct iv_use *closest_after = NULL;
5303 if (cand->pos != IP_ORIGINAL)
5304 continue;
5306 for (j = 0; j < n_iv_uses (data); j++)
5308 struct iv_use *use = iv_use (data, j);
5309 unsigned uid = gimple_uid (use->stmt);
5311 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5312 continue;
5314 if (uid < gimple_uid (cand->incremented_at)
5315 && (closest_before == NULL
5316 || uid > gimple_uid (closest_before->stmt)))
5317 closest_before = use;
5319 if (uid > gimple_uid (cand->incremented_at)
5320 && (closest_after == NULL
5321 || uid < gimple_uid (closest_after->stmt)))
5322 closest_after = use;
5325 if (closest_before != NULL
5326 && autoinc_possible_for_pair (data, closest_before, cand))
5327 cand->ainc_use = closest_before;
5328 else if (closest_after != NULL
5329 && autoinc_possible_for_pair (data, closest_after, cand))
5330 cand->ainc_use = closest_after;
5334 /* Finds the candidates for the induction variables. */
5336 static void
5337 find_iv_candidates (struct ivopts_data *data)
5339 /* Add commonly used ivs. */
5340 add_standard_iv_candidates (data);
5342 /* Add old induction variables. */
5343 add_old_ivs_candidates (data);
5345 /* Add induction variables derived from uses. */
5346 add_derived_ivs_candidates (data);
5348 set_autoinc_for_original_candidates (data);
5350 /* Record the important candidates. */
5351 record_important_candidates (data);
5354 /* Determines costs of basing the use of the iv on an iv candidate. */
5356 static void
5357 determine_use_iv_costs (struct ivopts_data *data)
5359 unsigned i, j;
5360 struct iv_use *use;
5361 struct iv_cand *cand;
5362 bitmap to_clear = BITMAP_ALLOC (NULL);
5364 alloc_use_cost_map (data);
5366 for (i = 0; i < n_iv_uses (data); i++)
5368 use = iv_use (data, i);
5370 if (data->consider_all_candidates)
5372 for (j = 0; j < n_iv_cands (data); j++)
5374 cand = iv_cand (data, j);
5375 determine_use_iv_cost (data, use, cand);
5378 else
5380 bitmap_iterator bi;
5382 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5384 cand = iv_cand (data, j);
5385 if (!determine_use_iv_cost (data, use, cand))
5386 bitmap_set_bit (to_clear, j);
5389 /* Remove the candidates for that the cost is infinite from
5390 the list of related candidates. */
5391 bitmap_and_compl_into (use->related_cands, to_clear);
5392 bitmap_clear (to_clear);
5396 BITMAP_FREE (to_clear);
5398 if (dump_file && (dump_flags & TDF_DETAILS))
5400 fprintf (dump_file, "Use-candidate costs:\n");
5402 for (i = 0; i < n_iv_uses (data); i++)
5404 use = iv_use (data, i);
5406 fprintf (dump_file, "Use %d:\n", i);
5407 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5408 for (j = 0; j < use->n_map_members; j++)
5410 if (!use->cost_map[j].cand
5411 || infinite_cost_p (use->cost_map[j].cost))
5412 continue;
5414 fprintf (dump_file, " %d\t%d\t%d\t",
5415 use->cost_map[j].cand->id,
5416 use->cost_map[j].cost.cost,
5417 use->cost_map[j].cost.complexity);
5418 if (use->cost_map[j].depends_on)
5419 bitmap_print (dump_file,
5420 use->cost_map[j].depends_on, "","");
5421 if (use->cost_map[j].inv_expr_id != -1)
5422 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5423 fprintf (dump_file, "\n");
5426 fprintf (dump_file, "\n");
5428 fprintf (dump_file, "\n");
5432 /* Determines cost of the candidate CAND. */
5434 static void
5435 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5437 comp_cost cost_base;
5438 unsigned cost, cost_step;
5439 tree base;
5441 if (!cand->iv)
5443 cand->cost = 0;
5444 return;
5447 /* There are two costs associated with the candidate -- its increment
5448 and its initialization. The second is almost negligible for any loop
5449 that rolls enough, so we take it just very little into account. */
5451 base = cand->iv->base;
5452 cost_base = force_var_cost (data, base, NULL);
5453 /* It will be exceptional that the iv register happens to be initialized with
5454 the proper value at no cost. In general, there will at least be a regcopy
5455 or a const set. */
5456 if (cost_base.cost == 0)
5457 cost_base.cost = COSTS_N_INSNS (1);
5458 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5460 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5462 /* Prefer the original ivs unless we may gain something by replacing it.
5463 The reason is to make debugging simpler; so this is not relevant for
5464 artificial ivs created by other optimization passes. */
5465 if (cand->pos != IP_ORIGINAL
5466 || !SSA_NAME_VAR (cand->var_before)
5467 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5468 cost++;
5470 /* Prefer not to insert statements into latch unless there are some
5471 already (so that we do not create unnecessary jumps). */
5472 if (cand->pos == IP_END
5473 && empty_block_p (ip_end_pos (data->current_loop)))
5474 cost++;
5476 cand->cost = cost;
5477 cand->cost_step = cost_step;
5480 /* Determines costs of computation of the candidates. */
5482 static void
5483 determine_iv_costs (struct ivopts_data *data)
5485 unsigned i;
5487 if (dump_file && (dump_flags & TDF_DETAILS))
5489 fprintf (dump_file, "Candidate costs:\n");
5490 fprintf (dump_file, " cand\tcost\n");
5493 for (i = 0; i < n_iv_cands (data); i++)
5495 struct iv_cand *cand = iv_cand (data, i);
5497 determine_iv_cost (data, cand);
5499 if (dump_file && (dump_flags & TDF_DETAILS))
5500 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5503 if (dump_file && (dump_flags & TDF_DETAILS))
5504 fprintf (dump_file, "\n");
5507 /* Calculates cost for having SIZE induction variables. */
5509 static unsigned
5510 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5512 /* We add size to the cost, so that we prefer eliminating ivs
5513 if possible. */
5514 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5515 data->body_includes_call);
5518 /* For each size of the induction variable set determine the penalty. */
5520 static void
5521 determine_set_costs (struct ivopts_data *data)
5523 unsigned j, n;
5524 gphi *phi;
5525 gphi_iterator psi;
5526 tree op;
5527 struct loop *loop = data->current_loop;
5528 bitmap_iterator bi;
5530 if (dump_file && (dump_flags & TDF_DETAILS))
5532 fprintf (dump_file, "Global costs:\n");
5533 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5534 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5535 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5536 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5539 n = 0;
5540 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5542 phi = psi.phi ();
5543 op = PHI_RESULT (phi);
5545 if (virtual_operand_p (op))
5546 continue;
5548 if (get_iv (data, op))
5549 continue;
5551 n++;
5554 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5556 struct version_info *info = ver_info (data, j);
5558 if (info->inv_id && info->has_nonlin_use)
5559 n++;
5562 data->regs_used = n;
5563 if (dump_file && (dump_flags & TDF_DETAILS))
5564 fprintf (dump_file, " regs_used %d\n", n);
5566 if (dump_file && (dump_flags & TDF_DETAILS))
5568 fprintf (dump_file, " cost for size:\n");
5569 fprintf (dump_file, " ivs\tcost\n");
5570 for (j = 0; j <= 2 * target_avail_regs; j++)
5571 fprintf (dump_file, " %d\t%d\n", j,
5572 ivopts_global_cost_for_size (data, j));
5573 fprintf (dump_file, "\n");
5577 /* Returns true if A is a cheaper cost pair than B. */
5579 static bool
5580 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5582 int cmp;
5584 if (!a)
5585 return false;
5587 if (!b)
5588 return true;
5590 cmp = compare_costs (a->cost, b->cost);
5591 if (cmp < 0)
5592 return true;
5594 if (cmp > 0)
5595 return false;
5597 /* In case the costs are the same, prefer the cheaper candidate. */
5598 if (a->cand->cost < b->cand->cost)
5599 return true;
5601 return false;
5605 /* Returns candidate by that USE is expressed in IVS. */
5607 static struct cost_pair *
5608 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5610 return ivs->cand_for_use[use->id];
5613 /* Computes the cost field of IVS structure. */
5615 static void
5616 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5618 comp_cost cost = ivs->cand_use_cost;
5620 cost.cost += ivs->cand_cost;
5622 cost.cost += ivopts_global_cost_for_size (data,
5623 ivs->n_regs + ivs->num_used_inv_expr);
5625 ivs->cost = cost;
5628 /* Remove invariants in set INVS to set IVS. */
5630 static void
5631 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5633 bitmap_iterator bi;
5634 unsigned iid;
5636 if (!invs)
5637 return;
5639 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5641 ivs->n_invariant_uses[iid]--;
5642 if (ivs->n_invariant_uses[iid] == 0)
5643 ivs->n_regs--;
5647 /* Set USE not to be expressed by any candidate in IVS. */
5649 static void
5650 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5651 struct iv_use *use)
5653 unsigned uid = use->id, cid;
5654 struct cost_pair *cp;
5656 cp = ivs->cand_for_use[uid];
5657 if (!cp)
5658 return;
5659 cid = cp->cand->id;
5661 ivs->bad_uses++;
5662 ivs->cand_for_use[uid] = NULL;
5663 ivs->n_cand_uses[cid]--;
5665 if (ivs->n_cand_uses[cid] == 0)
5667 bitmap_clear_bit (ivs->cands, cid);
5668 /* Do not count the pseudocandidates. */
5669 if (cp->cand->iv)
5670 ivs->n_regs--;
5671 ivs->n_cands--;
5672 ivs->cand_cost -= cp->cand->cost;
5674 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5677 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5679 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5681 if (cp->inv_expr_id != -1)
5683 ivs->used_inv_expr[cp->inv_expr_id]--;
5684 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5685 ivs->num_used_inv_expr--;
5687 iv_ca_recount_cost (data, ivs);
5690 /* Add invariants in set INVS to set IVS. */
5692 static void
5693 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5695 bitmap_iterator bi;
5696 unsigned iid;
5698 if (!invs)
5699 return;
5701 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5703 ivs->n_invariant_uses[iid]++;
5704 if (ivs->n_invariant_uses[iid] == 1)
5705 ivs->n_regs++;
5709 /* Set cost pair for USE in set IVS to CP. */
5711 static void
5712 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5713 struct iv_use *use, struct cost_pair *cp)
5715 unsigned uid = use->id, cid;
5717 if (ivs->cand_for_use[uid] == cp)
5718 return;
5720 if (ivs->cand_for_use[uid])
5721 iv_ca_set_no_cp (data, ivs, use);
5723 if (cp)
5725 cid = cp->cand->id;
5727 ivs->bad_uses--;
5728 ivs->cand_for_use[uid] = cp;
5729 ivs->n_cand_uses[cid]++;
5730 if (ivs->n_cand_uses[cid] == 1)
5732 bitmap_set_bit (ivs->cands, cid);
5733 /* Do not count the pseudocandidates. */
5734 if (cp->cand->iv)
5735 ivs->n_regs++;
5736 ivs->n_cands++;
5737 ivs->cand_cost += cp->cand->cost;
5739 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5742 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5743 iv_ca_set_add_invariants (ivs, cp->depends_on);
5745 if (cp->inv_expr_id != -1)
5747 ivs->used_inv_expr[cp->inv_expr_id]++;
5748 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5749 ivs->num_used_inv_expr++;
5751 iv_ca_recount_cost (data, ivs);
5755 /* Extend set IVS by expressing USE by some of the candidates in it
5756 if possible. Consider all important candidates if candidates in
5757 set IVS don't give any result. */
5759 static void
5760 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5761 struct iv_use *use)
5763 struct cost_pair *best_cp = NULL, *cp;
5764 bitmap_iterator bi;
5765 unsigned i;
5766 struct iv_cand *cand;
5768 gcc_assert (ivs->upto >= use->id);
5769 ivs->upto++;
5770 ivs->bad_uses++;
5772 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5774 cand = iv_cand (data, i);
5775 cp = get_use_iv_cost (data, use, cand);
5776 if (cheaper_cost_pair (cp, best_cp))
5777 best_cp = cp;
5780 if (best_cp == NULL)
5782 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5784 cand = iv_cand (data, i);
5785 cp = get_use_iv_cost (data, use, cand);
5786 if (cheaper_cost_pair (cp, best_cp))
5787 best_cp = cp;
5791 iv_ca_set_cp (data, ivs, use, best_cp);
5794 /* Get cost for assignment IVS. */
5796 static comp_cost
5797 iv_ca_cost (struct iv_ca *ivs)
5799 /* This was a conditional expression but it triggered a bug in
5800 Sun C 5.5. */
5801 if (ivs->bad_uses)
5802 return infinite_cost;
5803 else
5804 return ivs->cost;
5807 /* Returns true if all dependences of CP are among invariants in IVS. */
5809 static bool
5810 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5812 unsigned i;
5813 bitmap_iterator bi;
5815 if (!cp->depends_on)
5816 return true;
5818 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5820 if (ivs->n_invariant_uses[i] == 0)
5821 return false;
5824 return true;
5827 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5828 it before NEXT_CHANGE. */
5830 static struct iv_ca_delta *
5831 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5832 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5834 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5836 change->use = use;
5837 change->old_cp = old_cp;
5838 change->new_cp = new_cp;
5839 change->next_change = next_change;
5841 return change;
5844 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5845 are rewritten. */
5847 static struct iv_ca_delta *
5848 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5850 struct iv_ca_delta *last;
5852 if (!l2)
5853 return l1;
5855 if (!l1)
5856 return l2;
5858 for (last = l1; last->next_change; last = last->next_change)
5859 continue;
5860 last->next_change = l2;
5862 return l1;
5865 /* Reverse the list of changes DELTA, forming the inverse to it. */
5867 static struct iv_ca_delta *
5868 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5870 struct iv_ca_delta *act, *next, *prev = NULL;
5872 for (act = delta; act; act = next)
5874 next = act->next_change;
5875 act->next_change = prev;
5876 prev = act;
5878 std::swap (act->old_cp, act->new_cp);
5881 return prev;
5884 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5885 reverted instead. */
5887 static void
5888 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5889 struct iv_ca_delta *delta, bool forward)
5891 struct cost_pair *from, *to;
5892 struct iv_ca_delta *act;
5894 if (!forward)
5895 delta = iv_ca_delta_reverse (delta);
5897 for (act = delta; act; act = act->next_change)
5899 from = act->old_cp;
5900 to = act->new_cp;
5901 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5902 iv_ca_set_cp (data, ivs, act->use, to);
5905 if (!forward)
5906 iv_ca_delta_reverse (delta);
5909 /* Returns true if CAND is used in IVS. */
5911 static bool
5912 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5914 return ivs->n_cand_uses[cand->id] > 0;
5917 /* Returns number of induction variable candidates in the set IVS. */
5919 static unsigned
5920 iv_ca_n_cands (struct iv_ca *ivs)
5922 return ivs->n_cands;
5925 /* Free the list of changes DELTA. */
5927 static void
5928 iv_ca_delta_free (struct iv_ca_delta **delta)
5930 struct iv_ca_delta *act, *next;
5932 for (act = *delta; act; act = next)
5934 next = act->next_change;
5935 free (act);
5938 *delta = NULL;
5941 /* Allocates new iv candidates assignment. */
5943 static struct iv_ca *
5944 iv_ca_new (struct ivopts_data *data)
5946 struct iv_ca *nw = XNEW (struct iv_ca);
5948 nw->upto = 0;
5949 nw->bad_uses = 0;
5950 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5951 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5952 nw->cands = BITMAP_ALLOC (NULL);
5953 nw->n_cands = 0;
5954 nw->n_regs = 0;
5955 nw->cand_use_cost = no_cost;
5956 nw->cand_cost = 0;
5957 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5958 nw->cost = no_cost;
5959 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5960 nw->num_used_inv_expr = 0;
5962 return nw;
5965 /* Free memory occupied by the set IVS. */
5967 static void
5968 iv_ca_free (struct iv_ca **ivs)
5970 free ((*ivs)->cand_for_use);
5971 free ((*ivs)->n_cand_uses);
5972 BITMAP_FREE ((*ivs)->cands);
5973 free ((*ivs)->n_invariant_uses);
5974 free ((*ivs)->used_inv_expr);
5975 free (*ivs);
5976 *ivs = NULL;
5979 /* Dumps IVS to FILE. */
5981 static void
5982 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5984 const char *pref = " invariants ";
5985 unsigned i;
5986 comp_cost cost = iv_ca_cost (ivs);
5988 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5989 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5990 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5991 bitmap_print (file, ivs->cands, " candidates: ","\n");
5993 for (i = 0; i < ivs->upto; i++)
5995 struct iv_use *use = iv_use (data, i);
5996 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5997 if (cp)
5998 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5999 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6000 else
6001 fprintf (file, " use:%d --> ??\n", use->id);
6004 for (i = 1; i <= data->max_inv_id; i++)
6005 if (ivs->n_invariant_uses[i])
6007 fprintf (file, "%s%d", pref, i);
6008 pref = ", ";
6010 fprintf (file, "\n\n");
6013 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6014 new set, and store differences in DELTA. Number of induction variables
6015 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6016 the function will try to find a solution with mimimal iv candidates. */
6018 static comp_cost
6019 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6020 struct iv_cand *cand, struct iv_ca_delta **delta,
6021 unsigned *n_ivs, bool min_ncand)
6023 unsigned i;
6024 comp_cost cost;
6025 struct iv_use *use;
6026 struct cost_pair *old_cp, *new_cp;
6028 *delta = NULL;
6029 for (i = 0; i < ivs->upto; i++)
6031 use = iv_use (data, i);
6032 old_cp = iv_ca_cand_for_use (ivs, use);
6034 if (old_cp
6035 && old_cp->cand == cand)
6036 continue;
6038 new_cp = get_use_iv_cost (data, use, cand);
6039 if (!new_cp)
6040 continue;
6042 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6043 continue;
6045 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6046 continue;
6048 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6051 iv_ca_delta_commit (data, ivs, *delta, true);
6052 cost = iv_ca_cost (ivs);
6053 if (n_ivs)
6054 *n_ivs = iv_ca_n_cands (ivs);
6055 iv_ca_delta_commit (data, ivs, *delta, false);
6057 return cost;
6060 /* Try narrowing set IVS by removing CAND. Return the cost of
6061 the new set and store the differences in DELTA. START is
6062 the candidate with which we start narrowing. */
6064 static comp_cost
6065 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6066 struct iv_cand *cand, struct iv_cand *start,
6067 struct iv_ca_delta **delta)
6069 unsigned i, ci;
6070 struct iv_use *use;
6071 struct cost_pair *old_cp, *new_cp, *cp;
6072 bitmap_iterator bi;
6073 struct iv_cand *cnd;
6074 comp_cost cost, best_cost, acost;
6076 *delta = NULL;
6077 for (i = 0; i < n_iv_uses (data); i++)
6079 use = iv_use (data, i);
6081 old_cp = iv_ca_cand_for_use (ivs, use);
6082 if (old_cp->cand != cand)
6083 continue;
6085 best_cost = iv_ca_cost (ivs);
6086 /* Start narrowing with START. */
6087 new_cp = get_use_iv_cost (data, use, start);
6089 if (data->consider_all_candidates)
6091 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6093 if (ci == cand->id || (start && ci == start->id))
6094 continue;
6096 cnd = iv_cand (data, ci);
6098 cp = get_use_iv_cost (data, use, cnd);
6099 if (!cp)
6100 continue;
6102 iv_ca_set_cp (data, ivs, use, cp);
6103 acost = iv_ca_cost (ivs);
6105 if (compare_costs (acost, best_cost) < 0)
6107 best_cost = acost;
6108 new_cp = cp;
6112 else
6114 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6116 if (ci == cand->id || (start && ci == start->id))
6117 continue;
6119 cnd = iv_cand (data, ci);
6121 cp = get_use_iv_cost (data, use, cnd);
6122 if (!cp)
6123 continue;
6125 iv_ca_set_cp (data, ivs, use, cp);
6126 acost = iv_ca_cost (ivs);
6128 if (compare_costs (acost, best_cost) < 0)
6130 best_cost = acost;
6131 new_cp = cp;
6135 /* Restore to old cp for use. */
6136 iv_ca_set_cp (data, ivs, use, old_cp);
6138 if (!new_cp)
6140 iv_ca_delta_free (delta);
6141 return infinite_cost;
6144 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6147 iv_ca_delta_commit (data, ivs, *delta, true);
6148 cost = iv_ca_cost (ivs);
6149 iv_ca_delta_commit (data, ivs, *delta, false);
6151 return cost;
6154 /* Try optimizing the set of candidates IVS by removing candidates different
6155 from to EXCEPT_CAND from it. Return cost of the new set, and store
6156 differences in DELTA. */
6158 static comp_cost
6159 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6160 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6162 bitmap_iterator bi;
6163 struct iv_ca_delta *act_delta, *best_delta;
6164 unsigned i;
6165 comp_cost best_cost, acost;
6166 struct iv_cand *cand;
6168 best_delta = NULL;
6169 best_cost = iv_ca_cost (ivs);
6171 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6173 cand = iv_cand (data, i);
6175 if (cand == except_cand)
6176 continue;
6178 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6180 if (compare_costs (acost, best_cost) < 0)
6182 best_cost = acost;
6183 iv_ca_delta_free (&best_delta);
6184 best_delta = act_delta;
6186 else
6187 iv_ca_delta_free (&act_delta);
6190 if (!best_delta)
6192 *delta = NULL;
6193 return best_cost;
6196 /* Recurse to possibly remove other unnecessary ivs. */
6197 iv_ca_delta_commit (data, ivs, best_delta, true);
6198 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6199 iv_ca_delta_commit (data, ivs, best_delta, false);
6200 *delta = iv_ca_delta_join (best_delta, *delta);
6201 return best_cost;
6204 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6205 cheaper local cost for USE than BEST_CP. Return pointer to
6206 the corresponding cost_pair, otherwise just return BEST_CP. */
6208 static struct cost_pair*
6209 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6210 unsigned int cand_idx, struct iv_cand *old_cand,
6211 struct cost_pair *best_cp)
6213 struct iv_cand *cand;
6214 struct cost_pair *cp;
6216 gcc_assert (old_cand != NULL && best_cp != NULL);
6217 if (cand_idx == old_cand->id)
6218 return best_cp;
6220 cand = iv_cand (data, cand_idx);
6221 cp = get_use_iv_cost (data, use, cand);
6222 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6223 return cp;
6225 return best_cp;
6228 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6229 which are used by more than one iv uses. For each of those candidates,
6230 this function tries to represent iv uses under that candidate using
6231 other ones with lower local cost, then tries to prune the new set.
6232 If the new set has lower cost, It returns the new cost after recording
6233 candidate replacement in list DELTA. */
6235 static comp_cost
6236 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6237 struct iv_ca_delta **delta)
6239 bitmap_iterator bi, bj;
6240 unsigned int i, j, k;
6241 struct iv_use *use;
6242 struct iv_cand *cand;
6243 comp_cost orig_cost, acost;
6244 struct iv_ca_delta *act_delta, *tmp_delta;
6245 struct cost_pair *old_cp, *best_cp = NULL;
6247 *delta = NULL;
6248 orig_cost = iv_ca_cost (ivs);
6250 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6252 if (ivs->n_cand_uses[i] == 1
6253 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6254 continue;
6256 cand = iv_cand (data, i);
6258 act_delta = NULL;
6259 /* Represent uses under current candidate using other ones with
6260 lower local cost. */
6261 for (j = 0; j < ivs->upto; j++)
6263 use = iv_use (data, j);
6264 old_cp = iv_ca_cand_for_use (ivs, use);
6266 if (old_cp->cand != cand)
6267 continue;
6269 best_cp = old_cp;
6270 if (data->consider_all_candidates)
6271 for (k = 0; k < n_iv_cands (data); k++)
6272 best_cp = cheaper_cost_with_cand (data, use, k,
6273 old_cp->cand, best_cp);
6274 else
6275 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6276 best_cp = cheaper_cost_with_cand (data, use, k,
6277 old_cp->cand, best_cp);
6279 if (best_cp == old_cp)
6280 continue;
6282 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6284 /* No need for further prune. */
6285 if (!act_delta)
6286 continue;
6288 /* Prune the new candidate set. */
6289 iv_ca_delta_commit (data, ivs, act_delta, true);
6290 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6291 iv_ca_delta_commit (data, ivs, act_delta, false);
6292 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6294 if (compare_costs (acost, orig_cost) < 0)
6296 *delta = act_delta;
6297 return acost;
6299 else
6300 iv_ca_delta_free (&act_delta);
6303 return orig_cost;
6306 /* Tries to extend the sets IVS in the best possible way in order
6307 to express the USE. If ORIGINALP is true, prefer candidates from
6308 the original set of IVs, otherwise favor important candidates not
6309 based on any memory object. */
6311 static bool
6312 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6313 struct iv_use *use, bool originalp)
6315 comp_cost best_cost, act_cost;
6316 unsigned i;
6317 bitmap_iterator bi;
6318 struct iv_cand *cand;
6319 struct iv_ca_delta *best_delta = NULL, *act_delta;
6320 struct cost_pair *cp;
6322 iv_ca_add_use (data, ivs, use);
6323 best_cost = iv_ca_cost (ivs);
6324 cp = iv_ca_cand_for_use (ivs, use);
6325 if (cp)
6327 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6328 iv_ca_set_no_cp (data, ivs, use);
6331 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6332 first try important candidates not based on any memory object. Only if
6333 this fails, try the specific ones. Rationale -- in loops with many
6334 variables the best choice often is to use just one generic biv. If we
6335 added here many ivs specific to the uses, the optimization algorithm later
6336 would be likely to get stuck in a local minimum, thus causing us to create
6337 too many ivs. The approach from few ivs to more seems more likely to be
6338 successful -- starting from few ivs, replacing an expensive use by a
6339 specific iv should always be a win. */
6340 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6342 cand = iv_cand (data, i);
6344 if (originalp && cand->pos !=IP_ORIGINAL)
6345 continue;
6347 if (!originalp && cand->iv->base_object != NULL_TREE)
6348 continue;
6350 if (iv_ca_cand_used_p (ivs, cand))
6351 continue;
6353 cp = get_use_iv_cost (data, use, cand);
6354 if (!cp)
6355 continue;
6357 iv_ca_set_cp (data, ivs, use, cp);
6358 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6359 true);
6360 iv_ca_set_no_cp (data, ivs, use);
6361 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6363 if (compare_costs (act_cost, best_cost) < 0)
6365 best_cost = act_cost;
6367 iv_ca_delta_free (&best_delta);
6368 best_delta = act_delta;
6370 else
6371 iv_ca_delta_free (&act_delta);
6374 if (infinite_cost_p (best_cost))
6376 for (i = 0; i < use->n_map_members; i++)
6378 cp = use->cost_map + i;
6379 cand = cp->cand;
6380 if (!cand)
6381 continue;
6383 /* Already tried this. */
6384 if (cand->important)
6386 if (originalp && cand->pos == IP_ORIGINAL)
6387 continue;
6388 if (!originalp && cand->iv->base_object == NULL_TREE)
6389 continue;
6392 if (iv_ca_cand_used_p (ivs, cand))
6393 continue;
6395 act_delta = NULL;
6396 iv_ca_set_cp (data, ivs, use, cp);
6397 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6398 iv_ca_set_no_cp (data, ivs, use);
6399 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6400 cp, act_delta);
6402 if (compare_costs (act_cost, best_cost) < 0)
6404 best_cost = act_cost;
6406 if (best_delta)
6407 iv_ca_delta_free (&best_delta);
6408 best_delta = act_delta;
6410 else
6411 iv_ca_delta_free (&act_delta);
6415 iv_ca_delta_commit (data, ivs, best_delta, true);
6416 iv_ca_delta_free (&best_delta);
6418 return !infinite_cost_p (best_cost);
6421 /* Finds an initial assignment of candidates to uses. */
6423 static struct iv_ca *
6424 get_initial_solution (struct ivopts_data *data, bool originalp)
6426 struct iv_ca *ivs = iv_ca_new (data);
6427 unsigned i;
6429 for (i = 0; i < n_iv_uses (data); i++)
6430 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6432 iv_ca_free (&ivs);
6433 return NULL;
6436 return ivs;
6439 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6440 points to a bool variable, this function tries to break local
6441 optimal fixed-point by replacing candidates in IVS if it's true. */
6443 static bool
6444 try_improve_iv_set (struct ivopts_data *data,
6445 struct iv_ca *ivs, bool *try_replace_p)
6447 unsigned i, n_ivs;
6448 comp_cost acost, best_cost = iv_ca_cost (ivs);
6449 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6450 struct iv_cand *cand;
6452 /* Try extending the set of induction variables by one. */
6453 for (i = 0; i < n_iv_cands (data); i++)
6455 cand = iv_cand (data, i);
6457 if (iv_ca_cand_used_p (ivs, cand))
6458 continue;
6460 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6461 if (!act_delta)
6462 continue;
6464 /* If we successfully added the candidate and the set is small enough,
6465 try optimizing it by removing other candidates. */
6466 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6468 iv_ca_delta_commit (data, ivs, act_delta, true);
6469 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6470 iv_ca_delta_commit (data, ivs, act_delta, false);
6471 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6474 if (compare_costs (acost, best_cost) < 0)
6476 best_cost = acost;
6477 iv_ca_delta_free (&best_delta);
6478 best_delta = act_delta;
6480 else
6481 iv_ca_delta_free (&act_delta);
6484 if (!best_delta)
6486 /* Try removing the candidates from the set instead. */
6487 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6489 if (!best_delta && *try_replace_p)
6491 *try_replace_p = false;
6492 /* So far candidate selecting algorithm tends to choose fewer IVs
6493 so that it can handle cases in which loops have many variables
6494 but the best choice is often to use only one general biv. One
6495 weakness is it can't handle opposite cases, in which different
6496 candidates should be chosen with respect to each use. To solve
6497 the problem, we replace candidates in a manner described by the
6498 comments of iv_ca_replace, thus give general algorithm a chance
6499 to break local optimal fixed-point in these cases. */
6500 best_cost = iv_ca_replace (data, ivs, &best_delta);
6503 if (!best_delta)
6504 return false;
6507 iv_ca_delta_commit (data, ivs, best_delta, true);
6508 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6509 iv_ca_delta_free (&best_delta);
6510 return true;
6513 /* Attempts to find the optimal set of induction variables. We do simple
6514 greedy heuristic -- we try to replace at most one candidate in the selected
6515 solution and remove the unused ivs while this improves the cost. */
6517 static struct iv_ca *
6518 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6520 struct iv_ca *set;
6521 bool try_replace_p = true;
6523 /* Get the initial solution. */
6524 set = get_initial_solution (data, originalp);
6525 if (!set)
6527 if (dump_file && (dump_flags & TDF_DETAILS))
6528 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6529 return NULL;
6532 if (dump_file && (dump_flags & TDF_DETAILS))
6534 fprintf (dump_file, "Initial set of candidates:\n");
6535 iv_ca_dump (data, dump_file, set);
6538 while (try_improve_iv_set (data, set, &try_replace_p))
6540 if (dump_file && (dump_flags & TDF_DETAILS))
6542 fprintf (dump_file, "Improved to:\n");
6543 iv_ca_dump (data, dump_file, set);
6547 return set;
6550 static struct iv_ca *
6551 find_optimal_iv_set (struct ivopts_data *data)
6553 unsigned i;
6554 struct iv_ca *set, *origset;
6555 struct iv_use *use;
6556 comp_cost cost, origcost;
6558 /* Determine the cost based on a strategy that starts with original IVs,
6559 and try again using a strategy that prefers candidates not based
6560 on any IVs. */
6561 origset = find_optimal_iv_set_1 (data, true);
6562 set = find_optimal_iv_set_1 (data, false);
6564 if (!origset && !set)
6565 return NULL;
6567 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6568 cost = set ? iv_ca_cost (set) : infinite_cost;
6570 if (dump_file && (dump_flags & TDF_DETAILS))
6572 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6573 origcost.cost, origcost.complexity);
6574 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6575 cost.cost, cost.complexity);
6578 /* Choose the one with the best cost. */
6579 if (compare_costs (origcost, cost) <= 0)
6581 if (set)
6582 iv_ca_free (&set);
6583 set = origset;
6585 else if (origset)
6586 iv_ca_free (&origset);
6588 for (i = 0; i < n_iv_uses (data); i++)
6590 use = iv_use (data, i);
6591 use->selected = iv_ca_cand_for_use (set, use)->cand;
6594 return set;
6597 /* Creates a new induction variable corresponding to CAND. */
6599 static void
6600 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6602 gimple_stmt_iterator incr_pos;
6603 tree base;
6604 bool after = false;
6606 if (!cand->iv)
6607 return;
6609 switch (cand->pos)
6611 case IP_NORMAL:
6612 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6613 break;
6615 case IP_END:
6616 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6617 after = true;
6618 break;
6620 case IP_AFTER_USE:
6621 after = true;
6622 /* fall through */
6623 case IP_BEFORE_USE:
6624 incr_pos = gsi_for_stmt (cand->incremented_at);
6625 break;
6627 case IP_ORIGINAL:
6628 /* Mark that the iv is preserved. */
6629 name_info (data, cand->var_before)->preserve_biv = true;
6630 name_info (data, cand->var_after)->preserve_biv = true;
6632 /* Rewrite the increment so that it uses var_before directly. */
6633 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6634 return;
6637 gimple_add_tmp_var (cand->var_before);
6639 base = unshare_expr (cand->iv->base);
6641 create_iv (base, unshare_expr (cand->iv->step),
6642 cand->var_before, data->current_loop,
6643 &incr_pos, after, &cand->var_before, &cand->var_after);
6646 /* Creates new induction variables described in SET. */
6648 static void
6649 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6651 unsigned i;
6652 struct iv_cand *cand;
6653 bitmap_iterator bi;
6655 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6657 cand = iv_cand (data, i);
6658 create_new_iv (data, cand);
6661 if (dump_file && (dump_flags & TDF_DETAILS))
6663 fprintf (dump_file, "Selected IV set for loop %d",
6664 data->current_loop->num);
6665 if (data->loop_loc != UNKNOWN_LOCATION)
6666 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6667 LOCATION_LINE (data->loop_loc));
6668 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6669 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6671 cand = iv_cand (data, i);
6672 dump_cand (dump_file, cand);
6674 fprintf (dump_file, "\n");
6678 /* Rewrites USE (definition of iv used in a nonlinear expression)
6679 using candidate CAND. */
6681 static void
6682 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6683 struct iv_use *use, struct iv_cand *cand)
6685 tree comp;
6686 tree op, tgt;
6687 gassign *ass;
6688 gimple_stmt_iterator bsi;
6690 /* An important special case -- if we are asked to express value of
6691 the original iv by itself, just exit; there is no need to
6692 introduce a new computation (that might also need casting the
6693 variable to unsigned and back). */
6694 if (cand->pos == IP_ORIGINAL
6695 && cand->incremented_at == use->stmt)
6697 enum tree_code stmt_code;
6699 gcc_assert (is_gimple_assign (use->stmt));
6700 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6702 /* Check whether we may leave the computation unchanged.
6703 This is the case only if it does not rely on other
6704 computations in the loop -- otherwise, the computation
6705 we rely upon may be removed in remove_unused_ivs,
6706 thus leading to ICE. */
6707 stmt_code = gimple_assign_rhs_code (use->stmt);
6708 if (stmt_code == PLUS_EXPR
6709 || stmt_code == MINUS_EXPR
6710 || stmt_code == POINTER_PLUS_EXPR)
6712 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6713 op = gimple_assign_rhs2 (use->stmt);
6714 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6715 op = gimple_assign_rhs1 (use->stmt);
6716 else
6717 op = NULL_TREE;
6719 else
6720 op = NULL_TREE;
6722 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6723 return;
6726 comp = get_computation (data->current_loop, use, cand);
6727 gcc_assert (comp != NULL_TREE);
6729 switch (gimple_code (use->stmt))
6731 case GIMPLE_PHI:
6732 tgt = PHI_RESULT (use->stmt);
6734 /* If we should keep the biv, do not replace it. */
6735 if (name_info (data, tgt)->preserve_biv)
6736 return;
6738 bsi = gsi_after_labels (gimple_bb (use->stmt));
6739 break;
6741 case GIMPLE_ASSIGN:
6742 tgt = gimple_assign_lhs (use->stmt);
6743 bsi = gsi_for_stmt (use->stmt);
6744 break;
6746 default:
6747 gcc_unreachable ();
6750 if (!valid_gimple_rhs_p (comp)
6751 || (gimple_code (use->stmt) != GIMPLE_PHI
6752 /* We can't allow re-allocating the stmt as it might be pointed
6753 to still. */
6754 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6755 >= gimple_num_ops (gsi_stmt (bsi)))))
6757 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6758 true, GSI_SAME_STMT);
6759 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6761 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6762 /* As this isn't a plain copy we have to reset alignment
6763 information. */
6764 if (SSA_NAME_PTR_INFO (comp))
6765 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6769 if (gimple_code (use->stmt) == GIMPLE_PHI)
6771 ass = gimple_build_assign (tgt, comp);
6772 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6774 bsi = gsi_for_stmt (use->stmt);
6775 remove_phi_node (&bsi, false);
6777 else
6779 gimple_assign_set_rhs_from_tree (&bsi, comp);
6780 use->stmt = gsi_stmt (bsi);
6784 /* Performs a peephole optimization to reorder the iv update statement with
6785 a mem ref to enable instruction combining in later phases. The mem ref uses
6786 the iv value before the update, so the reordering transformation requires
6787 adjustment of the offset. CAND is the selected IV_CAND.
6789 Example:
6791 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6792 iv2 = iv1 + 1;
6794 if (t < val) (1)
6795 goto L;
6796 goto Head;
6799 directly propagating t over to (1) will introduce overlapping live range
6800 thus increase register pressure. This peephole transform it into:
6803 iv2 = iv1 + 1;
6804 t = MEM_REF (base, iv2, 8, 8);
6805 if (t < val)
6806 goto L;
6807 goto Head;
6810 static void
6811 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6813 tree var_after;
6814 gimple iv_update, stmt;
6815 basic_block bb;
6816 gimple_stmt_iterator gsi, gsi_iv;
6818 if (cand->pos != IP_NORMAL)
6819 return;
6821 var_after = cand->var_after;
6822 iv_update = SSA_NAME_DEF_STMT (var_after);
6824 bb = gimple_bb (iv_update);
6825 gsi = gsi_last_nondebug_bb (bb);
6826 stmt = gsi_stmt (gsi);
6828 /* Only handle conditional statement for now. */
6829 if (gimple_code (stmt) != GIMPLE_COND)
6830 return;
6832 gsi_prev_nondebug (&gsi);
6833 stmt = gsi_stmt (gsi);
6834 if (stmt != iv_update)
6835 return;
6837 gsi_prev_nondebug (&gsi);
6838 if (gsi_end_p (gsi))
6839 return;
6841 stmt = gsi_stmt (gsi);
6842 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6843 return;
6845 if (stmt != use->stmt)
6846 return;
6848 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6849 return;
6851 if (dump_file && (dump_flags & TDF_DETAILS))
6853 fprintf (dump_file, "Reordering \n");
6854 print_gimple_stmt (dump_file, iv_update, 0, 0);
6855 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6856 fprintf (dump_file, "\n");
6859 gsi = gsi_for_stmt (use->stmt);
6860 gsi_iv = gsi_for_stmt (iv_update);
6861 gsi_move_before (&gsi_iv, &gsi);
6863 cand->pos = IP_BEFORE_USE;
6864 cand->incremented_at = use->stmt;
6867 /* Rewrites USE (address that is an iv) using candidate CAND. */
6869 static void
6870 rewrite_use_address_1 (struct ivopts_data *data,
6871 struct iv_use *use, struct iv_cand *cand)
6873 aff_tree aff;
6874 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6875 tree base_hint = NULL_TREE;
6876 tree ref, iv;
6877 bool ok;
6879 adjust_iv_update_pos (cand, use);
6880 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6881 gcc_assert (ok);
6882 unshare_aff_combination (&aff);
6884 /* To avoid undefined overflow problems, all IV candidates use unsigned
6885 integer types. The drawback is that this makes it impossible for
6886 create_mem_ref to distinguish an IV that is based on a memory object
6887 from one that represents simply an offset.
6889 To work around this problem, we pass a hint to create_mem_ref that
6890 indicates which variable (if any) in aff is an IV based on a memory
6891 object. Note that we only consider the candidate. If this is not
6892 based on an object, the base of the reference is in some subexpression
6893 of the use -- but these will use pointer types, so they are recognized
6894 by the create_mem_ref heuristics anyway. */
6895 if (cand->iv->base_object)
6896 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6898 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6899 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6900 reference_alias_ptr_type (*use->op_p),
6901 iv, base_hint, data->speed);
6902 copy_ref_info (ref, *use->op_p);
6903 *use->op_p = ref;
6906 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6907 first use of a group, rewrites sub uses in the group too. */
6909 static void
6910 rewrite_use_address (struct ivopts_data *data,
6911 struct iv_use *use, struct iv_cand *cand)
6913 struct iv_use *next;
6915 gcc_assert (use->sub_id == 0);
6916 rewrite_use_address_1 (data, use, cand);
6917 update_stmt (use->stmt);
6919 for (next = use->next; next != NULL; next = next->next)
6921 rewrite_use_address_1 (data, next, cand);
6922 update_stmt (next->stmt);
6925 return;
6928 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6929 candidate CAND. */
6931 static void
6932 rewrite_use_compare (struct ivopts_data *data,
6933 struct iv_use *use, struct iv_cand *cand)
6935 tree comp, *var_p, op, bound;
6936 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6937 enum tree_code compare;
6938 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6939 bool ok;
6941 bound = cp->value;
6942 if (bound)
6944 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6945 tree var_type = TREE_TYPE (var);
6946 gimple_seq stmts;
6948 if (dump_file && (dump_flags & TDF_DETAILS))
6950 fprintf (dump_file, "Replacing exit test: ");
6951 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6953 compare = cp->comp;
6954 bound = unshare_expr (fold_convert (var_type, bound));
6955 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6956 if (stmts)
6957 gsi_insert_seq_on_edge_immediate (
6958 loop_preheader_edge (data->current_loop),
6959 stmts);
6961 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6962 gimple_cond_set_lhs (cond_stmt, var);
6963 gimple_cond_set_code (cond_stmt, compare);
6964 gimple_cond_set_rhs (cond_stmt, op);
6965 return;
6968 /* The induction variable elimination failed; just express the original
6969 giv. */
6970 comp = get_computation (data->current_loop, use, cand);
6971 gcc_assert (comp != NULL_TREE);
6973 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6974 gcc_assert (ok);
6976 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6977 true, GSI_SAME_STMT);
6980 /* Rewrites USE using candidate CAND. */
6982 static void
6983 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6985 switch (use->type)
6987 case USE_NONLINEAR_EXPR:
6988 rewrite_use_nonlinear_expr (data, use, cand);
6989 break;
6991 case USE_ADDRESS:
6992 rewrite_use_address (data, use, cand);
6993 break;
6995 case USE_COMPARE:
6996 rewrite_use_compare (data, use, cand);
6997 break;
6999 default:
7000 gcc_unreachable ();
7003 update_stmt (use->stmt);
7006 /* Rewrite the uses using the selected induction variables. */
7008 static void
7009 rewrite_uses (struct ivopts_data *data)
7011 unsigned i;
7012 struct iv_cand *cand;
7013 struct iv_use *use;
7015 for (i = 0; i < n_iv_uses (data); i++)
7017 use = iv_use (data, i);
7018 cand = use->selected;
7019 gcc_assert (cand);
7021 rewrite_use (data, use, cand);
7025 /* Removes the ivs that are not used after rewriting. */
7027 static void
7028 remove_unused_ivs (struct ivopts_data *data)
7030 unsigned j;
7031 bitmap_iterator bi;
7032 bitmap toremove = BITMAP_ALLOC (NULL);
7034 /* Figure out an order in which to release SSA DEFs so that we don't
7035 release something that we'd have to propagate into a debug stmt
7036 afterwards. */
7037 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7039 struct version_info *info;
7041 info = ver_info (data, j);
7042 if (info->iv
7043 && !integer_zerop (info->iv->step)
7044 && !info->inv_id
7045 && !info->iv->have_use_for
7046 && !info->preserve_biv)
7048 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7050 tree def = info->iv->ssa_name;
7052 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7054 imm_use_iterator imm_iter;
7055 use_operand_p use_p;
7056 gimple stmt;
7057 int count = 0;
7059 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7061 if (!gimple_debug_bind_p (stmt))
7062 continue;
7064 /* We just want to determine whether to do nothing
7065 (count == 0), to substitute the computed
7066 expression into a single use of the SSA DEF by
7067 itself (count == 1), or to use a debug temp
7068 because the SSA DEF is used multiple times or as
7069 part of a larger expression (count > 1). */
7070 count++;
7071 if (gimple_debug_bind_get_value (stmt) != def)
7072 count++;
7074 if (count > 1)
7075 BREAK_FROM_IMM_USE_STMT (imm_iter);
7078 if (!count)
7079 continue;
7081 struct iv_use dummy_use;
7082 struct iv_cand *best_cand = NULL, *cand;
7083 unsigned i, best_pref = 0, cand_pref;
7085 memset (&dummy_use, 0, sizeof (dummy_use));
7086 dummy_use.iv = info->iv;
7087 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7089 cand = iv_use (data, i)->selected;
7090 if (cand == best_cand)
7091 continue;
7092 cand_pref = operand_equal_p (cand->iv->step,
7093 info->iv->step, 0)
7094 ? 4 : 0;
7095 cand_pref
7096 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7097 == TYPE_MODE (TREE_TYPE (info->iv->base))
7098 ? 2 : 0;
7099 cand_pref
7100 += TREE_CODE (cand->iv->base) == INTEGER_CST
7101 ? 1 : 0;
7102 if (best_cand == NULL || best_pref < cand_pref)
7104 best_cand = cand;
7105 best_pref = cand_pref;
7109 if (!best_cand)
7110 continue;
7112 tree comp = get_computation_at (data->current_loop,
7113 &dummy_use, best_cand,
7114 SSA_NAME_DEF_STMT (def));
7115 if (!comp)
7116 continue;
7118 if (count > 1)
7120 tree vexpr = make_node (DEBUG_EXPR_DECL);
7121 DECL_ARTIFICIAL (vexpr) = 1;
7122 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7123 if (SSA_NAME_VAR (def))
7124 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7125 else
7126 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7127 gdebug *def_temp
7128 = gimple_build_debug_bind (vexpr, comp, NULL);
7129 gimple_stmt_iterator gsi;
7131 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7132 gsi = gsi_after_labels (gimple_bb
7133 (SSA_NAME_DEF_STMT (def)));
7134 else
7135 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7137 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7138 comp = vexpr;
7141 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7143 if (!gimple_debug_bind_p (stmt))
7144 continue;
7146 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7147 SET_USE (use_p, comp);
7149 update_stmt (stmt);
7155 release_defs_bitset (toremove);
7157 BITMAP_FREE (toremove);
7160 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7161 for hash_map::traverse. */
7163 bool
7164 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7166 free (value);
7167 return true;
7170 /* Frees data allocated by the optimization of a single loop. */
7172 static void
7173 free_loop_data (struct ivopts_data *data)
7175 unsigned i, j;
7176 bitmap_iterator bi;
7177 tree obj;
7179 if (data->niters)
7181 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7182 delete data->niters;
7183 data->niters = NULL;
7186 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7188 struct version_info *info;
7190 info = ver_info (data, i);
7191 free (info->iv);
7192 info->iv = NULL;
7193 info->has_nonlin_use = false;
7194 info->preserve_biv = false;
7195 info->inv_id = 0;
7197 bitmap_clear (data->relevant);
7198 bitmap_clear (data->important_candidates);
7200 for (i = 0; i < n_iv_uses (data); i++)
7202 struct iv_use *use = iv_use (data, i);
7203 struct iv_use *pre = use, *sub = use->next;
7205 while (sub)
7207 gcc_assert (sub->related_cands == NULL);
7208 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7210 free (sub->iv);
7211 pre = sub;
7212 sub = sub->next;
7213 free (pre);
7216 free (use->iv);
7217 BITMAP_FREE (use->related_cands);
7218 for (j = 0; j < use->n_map_members; j++)
7219 if (use->cost_map[j].depends_on)
7220 BITMAP_FREE (use->cost_map[j].depends_on);
7221 free (use->cost_map);
7222 free (use);
7224 data->iv_uses.truncate (0);
7226 for (i = 0; i < n_iv_cands (data); i++)
7228 struct iv_cand *cand = iv_cand (data, i);
7230 free (cand->iv);
7231 if (cand->depends_on)
7232 BITMAP_FREE (cand->depends_on);
7233 free (cand);
7235 data->iv_candidates.truncate (0);
7237 if (data->version_info_size < num_ssa_names)
7239 data->version_info_size = 2 * num_ssa_names;
7240 free (data->version_info);
7241 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7244 data->max_inv_id = 0;
7246 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7247 SET_DECL_RTL (obj, NULL_RTX);
7249 decl_rtl_to_reset.truncate (0);
7251 data->inv_expr_tab->empty ();
7252 data->inv_expr_id = 0;
7255 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7256 loop tree. */
7258 static void
7259 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7261 free_loop_data (data);
7262 free (data->version_info);
7263 BITMAP_FREE (data->relevant);
7264 BITMAP_FREE (data->important_candidates);
7266 decl_rtl_to_reset.release ();
7267 data->iv_uses.release ();
7268 data->iv_candidates.release ();
7269 delete data->inv_expr_tab;
7270 data->inv_expr_tab = NULL;
7271 free_affine_expand_cache (&data->name_expansion_cache);
7274 /* Returns true if the loop body BODY includes any function calls. */
7276 static bool
7277 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7279 gimple_stmt_iterator gsi;
7280 unsigned i;
7282 for (i = 0; i < num_nodes; i++)
7283 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7285 gimple stmt = gsi_stmt (gsi);
7286 if (is_gimple_call (stmt)
7287 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7288 return true;
7290 return false;
7293 /* Optimizes the LOOP. Returns true if anything changed. */
7295 static bool
7296 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7298 bool changed = false;
7299 struct iv_ca *iv_ca;
7300 edge exit = single_dom_exit (loop);
7301 basic_block *body;
7303 gcc_assert (!data->niters);
7304 data->current_loop = loop;
7305 data->loop_loc = find_loop_location (loop);
7306 data->speed = optimize_loop_for_speed_p (loop);
7308 if (dump_file && (dump_flags & TDF_DETAILS))
7310 fprintf (dump_file, "Processing loop %d", loop->num);
7311 if (data->loop_loc != UNKNOWN_LOCATION)
7312 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7313 LOCATION_LINE (data->loop_loc));
7314 fprintf (dump_file, "\n");
7316 if (exit)
7318 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7319 exit->src->index, exit->dest->index);
7320 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7321 fprintf (dump_file, "\n");
7324 fprintf (dump_file, "\n");
7327 body = get_loop_body (loop);
7328 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7329 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7330 free (body);
7332 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7334 /* For each ssa name determines whether it behaves as an induction variable
7335 in some loop. */
7336 if (!find_induction_variables (data))
7337 goto finish;
7339 /* Finds interesting uses (item 1). */
7340 find_interesting_uses (data);
7341 group_address_uses (data);
7342 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7343 goto finish;
7345 /* Finds candidates for the induction variables (item 2). */
7346 find_iv_candidates (data);
7348 /* Calculates the costs (item 3, part 1). */
7349 determine_iv_costs (data);
7350 determine_use_iv_costs (data);
7351 determine_set_costs (data);
7353 /* Find the optimal set of induction variables (item 3, part 2). */
7354 iv_ca = find_optimal_iv_set (data);
7355 if (!iv_ca)
7356 goto finish;
7357 changed = true;
7359 /* Create the new induction variables (item 4, part 1). */
7360 create_new_ivs (data, iv_ca);
7361 iv_ca_free (&iv_ca);
7363 /* Rewrite the uses (item 4, part 2). */
7364 rewrite_uses (data);
7366 /* Remove the ivs that are unused after rewriting. */
7367 remove_unused_ivs (data);
7369 /* We have changed the structure of induction variables; it might happen
7370 that definitions in the scev database refer to some of them that were
7371 eliminated. */
7372 scev_reset ();
7374 finish:
7375 free_loop_data (data);
7377 return changed;
7380 /* Main entry point. Optimizes induction variables in loops. */
7382 void
7383 tree_ssa_iv_optimize (void)
7385 struct loop *loop;
7386 struct ivopts_data data;
7388 tree_ssa_iv_optimize_init (&data);
7390 /* Optimize the loops starting with the innermost ones. */
7391 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7393 if (dump_file && (dump_flags & TDF_DETAILS))
7394 flow_loop_dump (loop, dump_file, NULL, 1);
7396 tree_ssa_iv_optimize_loop (&data, loop);
7399 tree_ssa_iv_optimize_finalize (&data);