2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobe94a0bca69a57d81bfab350296c347ca3d37c4fb
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "input.h"
69 #include "alias.h"
70 #include "symtab.h"
71 #include "tree.h"
72 #include "fold-const.h"
73 #include "stor-layout.h"
74 #include "tm_p.h"
75 #include "predict.h"
76 #include "hard-reg-set.h"
77 #include "function.h"
78 #include "dominance.h"
79 #include "cfg.h"
80 #include "basic-block.h"
81 #include "gimple-pretty-print.h"
82 #include "tree-ssa-alias.h"
83 #include "internal-fn.h"
84 #include "tree-eh.h"
85 #include "gimple-expr.h"
86 #include "is-a.h"
87 #include "gimple.h"
88 #include "gimplify.h"
89 #include "gimple-iterator.h"
90 #include "gimplify-me.h"
91 #include "gimple-ssa.h"
92 #include "plugin-api.h"
93 #include "ipa-ref.h"
94 #include "cgraph.h"
95 #include "tree-cfg.h"
96 #include "tree-phinodes.h"
97 #include "ssa-iterators.h"
98 #include "stringpool.h"
99 #include "tree-ssanames.h"
100 #include "tree-ssa-loop-ivopts.h"
101 #include "tree-ssa-loop-manip.h"
102 #include "tree-ssa-loop-niter.h"
103 #include "tree-ssa-loop.h"
104 #include "rtl.h"
105 #include "flags.h"
106 #include "insn-config.h"
107 #include "expmed.h"
108 #include "dojump.h"
109 #include "explow.h"
110 #include "calls.h"
111 #include "emit-rtl.h"
112 #include "varasm.h"
113 #include "stmt.h"
114 #include "expr.h"
115 #include "tree-dfa.h"
116 #include "tree-ssa.h"
117 #include "cfgloop.h"
118 #include "tree-pass.h"
119 #include "tree-chrec.h"
120 #include "tree-scalar-evolution.h"
121 #include "params.h"
122 #include "langhooks.h"
123 #include "tree-affine.h"
124 #include "target.h"
125 #include "tree-inline.h"
126 #include "tree-ssa-propagate.h"
127 #include "tree-ssa-address.h"
128 #include "builtins.h"
129 #include "tree-vectorizer.h"
131 /* FIXME: Expressions are expanded to RTL in this pass to determine the
132 cost of different addressing modes. This should be moved to a TBD
133 interface between the GIMPLE and RTL worlds. */
134 #include "recog.h"
136 /* The infinite cost. */
137 #define INFTY 10000000
139 #define AVG_LOOP_NITER(LOOP) 5
141 /* Returns the expected number of loop iterations for LOOP.
142 The average trip count is computed from profile data if it
143 exists. */
145 static inline HOST_WIDE_INT
146 avg_loop_niter (struct loop *loop)
148 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
149 if (niter == -1)
150 return AVG_LOOP_NITER (loop);
152 return niter;
155 /* Representation of the induction variable. */
156 struct iv
158 tree base; /* Initial value of the iv. */
159 tree base_object; /* A memory object to that the induction variable points. */
160 tree step; /* Step of the iv (constant only). */
161 tree ssa_name; /* The ssa name with the value. */
162 unsigned use_id; /* The identifier in the use if it is the case. */
163 bool biv_p; /* Is it a biv? */
164 bool have_use_for; /* Do we already have a use for it? */
165 bool no_overflow; /* True if the iv doesn't overflow. */
168 /* Per-ssa version information (induction variable descriptions, etc.). */
169 struct version_info
171 tree name; /* The ssa name. */
172 struct iv *iv; /* Induction variable description. */
173 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
174 an expression that is not an induction variable. */
175 bool preserve_biv; /* For the original biv, whether to preserve it. */
176 unsigned inv_id; /* Id of an invariant. */
179 /* Types of uses. */
180 enum use_type
182 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
183 USE_ADDRESS, /* Use in an address. */
184 USE_COMPARE /* Use is a compare. */
187 /* Cost of a computation. */
188 typedef struct
190 int cost; /* The runtime cost. */
191 unsigned complexity; /* The estimate of the complexity of the code for
192 the computation (in no concrete units --
193 complexity field should be larger for more
194 complex expressions and addressing modes). */
195 } comp_cost;
197 static const comp_cost no_cost = {0, 0};
198 static const comp_cost infinite_cost = {INFTY, INFTY};
200 /* The candidate - cost pair. */
201 struct cost_pair
203 struct iv_cand *cand; /* The candidate. */
204 comp_cost cost; /* The cost. */
205 bitmap depends_on; /* The list of invariants that have to be
206 preserved. */
207 tree value; /* For final value elimination, the expression for
208 the final value of the iv. For iv elimination,
209 the new bound to compare with. */
210 enum tree_code comp; /* For iv elimination, the comparison. */
211 int inv_expr_id; /* Loop invariant expression id. */
214 /* Use. */
215 struct iv_use
217 unsigned id; /* The id of the use. */
218 unsigned sub_id; /* The id of the sub use. */
219 enum use_type type; /* Type of the use. */
220 struct iv *iv; /* The induction variable it is based on. */
221 gimple stmt; /* Statement in that it occurs. */
222 tree *op_p; /* The place where it occurs. */
223 bitmap related_cands; /* The set of "related" iv candidates, plus the common
224 important ones. */
226 unsigned n_map_members; /* Number of candidates in the cost_map list. */
227 struct cost_pair *cost_map;
228 /* The costs wrto the iv candidates. */
230 struct iv_cand *selected;
231 /* The selected candidate. */
233 struct iv_use *next; /* The next sub use. */
234 tree addr_base; /* Base address with const offset stripped. */
235 unsigned HOST_WIDE_INT addr_offset;
236 /* Const offset stripped from base address. */
239 /* The position where the iv is computed. */
240 enum iv_position
242 IP_NORMAL, /* At the end, just before the exit condition. */
243 IP_END, /* At the end of the latch block. */
244 IP_BEFORE_USE, /* Immediately before a specific use. */
245 IP_AFTER_USE, /* Immediately after a specific use. */
246 IP_ORIGINAL /* The original biv. */
249 /* The induction variable candidate. */
250 struct iv_cand
252 unsigned id; /* The number of the candidate. */
253 bool important; /* Whether this is an "important" candidate, i.e. such
254 that it should be considered by all uses. */
255 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
256 gimple incremented_at;/* For original biv, the statement where it is
257 incremented. */
258 tree var_before; /* The variable used for it before increment. */
259 tree var_after; /* The variable used for it after increment. */
260 struct iv *iv; /* The value of the candidate. NULL for
261 "pseudocandidate" used to indicate the possibility
262 to replace the final value of an iv by direct
263 computation of the value. */
264 unsigned cost; /* Cost of the candidate. */
265 unsigned cost_step; /* Cost of the candidate's increment operation. */
266 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
267 where it is incremented. */
268 bitmap depends_on; /* The list of invariants that are used in step of the
269 biv. */
272 /* Loop invariant expression hashtable entry. */
273 struct iv_inv_expr_ent
275 tree expr;
276 int id;
277 hashval_t hash;
280 /* The data used by the induction variable optimizations. */
282 typedef struct iv_use *iv_use_p;
284 typedef struct iv_cand *iv_cand_p;
286 /* Hashtable helpers. */
288 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
290 typedef iv_inv_expr_ent *value_type;
291 typedef iv_inv_expr_ent *compare_type;
292 static inline hashval_t hash (const iv_inv_expr_ent *);
293 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
296 /* Hash function for loop invariant expressions. */
298 inline hashval_t
299 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
301 return expr->hash;
304 /* Hash table equality function for expressions. */
306 inline bool
307 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
308 const iv_inv_expr_ent *expr2)
310 return expr1->hash == expr2->hash
311 && operand_equal_p (expr1->expr, expr2->expr, 0);
314 struct ivopts_data
316 /* The currently optimized loop. */
317 struct loop *current_loop;
318 source_location loop_loc;
320 /* Numbers of iterations for all exits of the current loop. */
321 hash_map<edge, tree_niter_desc *> *niters;
323 /* Number of registers used in it. */
324 unsigned regs_used;
326 /* The size of version_info array allocated. */
327 unsigned version_info_size;
329 /* The array of information for the ssa names. */
330 struct version_info *version_info;
332 /* The hashtable of loop invariant expressions created
333 by ivopt. */
334 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
336 /* Loop invariant expression id. */
337 int inv_expr_id;
339 /* The bitmap of indices in version_info whose value was changed. */
340 bitmap relevant;
342 /* The uses of induction variables. */
343 vec<iv_use_p> iv_uses;
345 /* The candidates. */
346 vec<iv_cand_p> iv_candidates;
348 /* A bitmap of important candidates. */
349 bitmap important_candidates;
351 /* Cache used by tree_to_aff_combination_expand. */
352 hash_map<tree, name_expansion *> *name_expansion_cache;
354 /* The maximum invariant id. */
355 unsigned max_inv_id;
357 /* Whether to consider just related and important candidates when replacing a
358 use. */
359 bool consider_all_candidates;
361 /* Are we optimizing for speed? */
362 bool speed;
364 /* Whether the loop body includes any function calls. */
365 bool body_includes_call;
367 /* Whether the loop body can only be exited via single exit. */
368 bool loop_single_exit_p;
371 /* An assignment of iv candidates to uses. */
373 struct iv_ca
375 /* The number of uses covered by the assignment. */
376 unsigned upto;
378 /* Number of uses that cannot be expressed by the candidates in the set. */
379 unsigned bad_uses;
381 /* Candidate assigned to a use, together with the related costs. */
382 struct cost_pair **cand_for_use;
384 /* Number of times each candidate is used. */
385 unsigned *n_cand_uses;
387 /* The candidates used. */
388 bitmap cands;
390 /* The number of candidates in the set. */
391 unsigned n_cands;
393 /* Total number of registers needed. */
394 unsigned n_regs;
396 /* Total cost of expressing uses. */
397 comp_cost cand_use_cost;
399 /* Total cost of candidates. */
400 unsigned cand_cost;
402 /* Number of times each invariant is used. */
403 unsigned *n_invariant_uses;
405 /* The array holding the number of uses of each loop
406 invariant expressions created by ivopt. */
407 unsigned *used_inv_expr;
409 /* The number of created loop invariants. */
410 unsigned num_used_inv_expr;
412 /* Total cost of the assignment. */
413 comp_cost cost;
416 /* Difference of two iv candidate assignments. */
418 struct iv_ca_delta
420 /* Changed use. */
421 struct iv_use *use;
423 /* An old assignment (for rollback purposes). */
424 struct cost_pair *old_cp;
426 /* A new assignment. */
427 struct cost_pair *new_cp;
429 /* Next change in the list. */
430 struct iv_ca_delta *next_change;
433 /* Bound on number of candidates below that all candidates are considered. */
435 #define CONSIDER_ALL_CANDIDATES_BOUND \
436 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
438 /* If there are more iv occurrences, we just give up (it is quite unlikely that
439 optimizing such a loop would help, and it would take ages). */
441 #define MAX_CONSIDERED_USES \
442 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
444 /* If there are at most this number of ivs in the set, try removing unnecessary
445 ivs from the set always. */
447 #define ALWAYS_PRUNE_CAND_SET_BOUND \
448 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
450 /* The list of trees for that the decl_rtl field must be reset is stored
451 here. */
453 static vec<tree> decl_rtl_to_reset;
455 static comp_cost force_expr_to_var_cost (tree, bool);
457 /* Number of uses recorded in DATA. */
459 static inline unsigned
460 n_iv_uses (struct ivopts_data *data)
462 return data->iv_uses.length ();
465 /* Ith use recorded in DATA. */
467 static inline struct iv_use *
468 iv_use (struct ivopts_data *data, unsigned i)
470 return data->iv_uses[i];
473 /* Number of candidates recorded in DATA. */
475 static inline unsigned
476 n_iv_cands (struct ivopts_data *data)
478 return data->iv_candidates.length ();
481 /* Ith candidate recorded in DATA. */
483 static inline struct iv_cand *
484 iv_cand (struct ivopts_data *data, unsigned i)
486 return data->iv_candidates[i];
489 /* The single loop exit if it dominates the latch, NULL otherwise. */
491 edge
492 single_dom_exit (struct loop *loop)
494 edge exit = single_exit (loop);
496 if (!exit)
497 return NULL;
499 if (!just_once_each_iteration_p (loop, exit->src))
500 return NULL;
502 return exit;
505 /* Dumps information about the induction variable IV to FILE. */
507 void
508 dump_iv (FILE *file, struct iv *iv, bool dump_name)
510 if (iv->ssa_name && dump_name)
512 fprintf (file, "ssa name ");
513 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
514 fprintf (file, "\n");
517 fprintf (file, " type ");
518 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
519 fprintf (file, "\n");
521 if (iv->step)
523 fprintf (file, " base ");
524 print_generic_expr (file, iv->base, TDF_SLIM);
525 fprintf (file, "\n");
527 fprintf (file, " step ");
528 print_generic_expr (file, iv->step, TDF_SLIM);
529 fprintf (file, "\n");
531 else
533 fprintf (file, " invariant ");
534 print_generic_expr (file, iv->base, TDF_SLIM);
535 fprintf (file, "\n");
538 if (iv->base_object)
540 fprintf (file, " base object ");
541 print_generic_expr (file, iv->base_object, TDF_SLIM);
542 fprintf (file, "\n");
545 if (iv->biv_p)
546 fprintf (file, " is a biv\n");
549 /* Dumps information about the USE to FILE. */
551 void
552 dump_use (FILE *file, struct iv_use *use)
554 fprintf (file, "use %d", use->id);
555 if (use->sub_id)
556 fprintf (file, ".%d", use->sub_id);
558 fprintf (file, "\n");
560 switch (use->type)
562 case USE_NONLINEAR_EXPR:
563 fprintf (file, " generic\n");
564 break;
566 case USE_ADDRESS:
567 fprintf (file, " address\n");
568 break;
570 case USE_COMPARE:
571 fprintf (file, " compare\n");
572 break;
574 default:
575 gcc_unreachable ();
578 fprintf (file, " in statement ");
579 print_gimple_stmt (file, use->stmt, 0, 0);
580 fprintf (file, "\n");
582 fprintf (file, " at position ");
583 if (use->op_p)
584 print_generic_expr (file, *use->op_p, TDF_SLIM);
585 fprintf (file, "\n");
587 dump_iv (file, use->iv, false);
589 if (use->related_cands)
591 fprintf (file, " related candidates ");
592 dump_bitmap (file, use->related_cands);
596 /* Dumps information about the uses to FILE. */
598 void
599 dump_uses (FILE *file, struct ivopts_data *data)
601 unsigned i;
602 struct iv_use *use;
604 for (i = 0; i < n_iv_uses (data); i++)
606 use = iv_use (data, i);
609 dump_use (file, use);
610 use = use->next;
612 while (use);
613 fprintf (file, "\n");
617 /* Dumps information about induction variable candidate CAND to FILE. */
619 void
620 dump_cand (FILE *file, struct iv_cand *cand)
622 struct iv *iv = cand->iv;
624 fprintf (file, "candidate %d%s\n",
625 cand->id, cand->important ? " (important)" : "");
627 if (cand->depends_on)
629 fprintf (file, " depends on ");
630 dump_bitmap (file, cand->depends_on);
633 if (!iv)
635 fprintf (file, " final value replacement\n");
636 return;
639 if (cand->var_before)
641 fprintf (file, " var_before ");
642 print_generic_expr (file, cand->var_before, TDF_SLIM);
643 fprintf (file, "\n");
645 if (cand->var_after)
647 fprintf (file, " var_after ");
648 print_generic_expr (file, cand->var_after, TDF_SLIM);
649 fprintf (file, "\n");
652 switch (cand->pos)
654 case IP_NORMAL:
655 fprintf (file, " incremented before exit test\n");
656 break;
658 case IP_BEFORE_USE:
659 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
660 break;
662 case IP_AFTER_USE:
663 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
664 break;
666 case IP_END:
667 fprintf (file, " incremented at end\n");
668 break;
670 case IP_ORIGINAL:
671 fprintf (file, " original biv\n");
672 break;
675 dump_iv (file, iv, false);
678 /* Returns the info for ssa version VER. */
680 static inline struct version_info *
681 ver_info (struct ivopts_data *data, unsigned ver)
683 return data->version_info + ver;
686 /* Returns the info for ssa name NAME. */
688 static inline struct version_info *
689 name_info (struct ivopts_data *data, tree name)
691 return ver_info (data, SSA_NAME_VERSION (name));
694 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
695 emitted in LOOP. */
697 static bool
698 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
700 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
702 gcc_assert (bb);
704 if (sbb == loop->latch)
705 return true;
707 if (sbb != bb)
708 return false;
710 return stmt == last_stmt (bb);
713 /* Returns true if STMT if after the place where the original induction
714 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
715 if the positions are identical. */
717 static bool
718 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
720 basic_block cand_bb = gimple_bb (cand->incremented_at);
721 basic_block stmt_bb = gimple_bb (stmt);
723 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
724 return false;
726 if (stmt_bb != cand_bb)
727 return true;
729 if (true_if_equal
730 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
731 return true;
732 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
735 /* Returns true if STMT if after the place where the induction variable
736 CAND is incremented in LOOP. */
738 static bool
739 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
741 switch (cand->pos)
743 case IP_END:
744 return false;
746 case IP_NORMAL:
747 return stmt_after_ip_normal_pos (loop, stmt);
749 case IP_ORIGINAL:
750 case IP_AFTER_USE:
751 return stmt_after_inc_pos (cand, stmt, false);
753 case IP_BEFORE_USE:
754 return stmt_after_inc_pos (cand, stmt, true);
756 default:
757 gcc_unreachable ();
761 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
763 static bool
764 abnormal_ssa_name_p (tree exp)
766 if (!exp)
767 return false;
769 if (TREE_CODE (exp) != SSA_NAME)
770 return false;
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
775 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
776 abnormal phi node. Callback for for_each_index. */
778 static bool
779 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
780 void *data ATTRIBUTE_UNUSED)
782 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
784 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
785 return false;
786 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
787 return false;
790 return !abnormal_ssa_name_p (*index);
793 /* Returns true if EXPR contains a ssa name that occurs in an
794 abnormal phi node. */
796 bool
797 contains_abnormal_ssa_name_p (tree expr)
799 enum tree_code code;
800 enum tree_code_class codeclass;
802 if (!expr)
803 return false;
805 code = TREE_CODE (expr);
806 codeclass = TREE_CODE_CLASS (code);
808 if (code == SSA_NAME)
809 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
811 if (code == INTEGER_CST
812 || is_gimple_min_invariant (expr))
813 return false;
815 if (code == ADDR_EXPR)
816 return !for_each_index (&TREE_OPERAND (expr, 0),
817 idx_contains_abnormal_ssa_name_p,
818 NULL);
820 if (code == COND_EXPR)
821 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
822 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
823 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
825 switch (codeclass)
827 case tcc_binary:
828 case tcc_comparison:
829 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
830 return true;
832 /* Fallthru. */
833 case tcc_unary:
834 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
835 return true;
837 break;
839 default:
840 gcc_unreachable ();
843 return false;
846 /* Returns the structure describing number of iterations determined from
847 EXIT of DATA->current_loop, or NULL if something goes wrong. */
849 static struct tree_niter_desc *
850 niter_for_exit (struct ivopts_data *data, edge exit)
852 struct tree_niter_desc *desc;
853 tree_niter_desc **slot;
855 if (!data->niters)
857 data->niters = new hash_map<edge, tree_niter_desc *>;
858 slot = NULL;
860 else
861 slot = data->niters->get (exit);
863 if (!slot)
865 /* Try to determine number of iterations. We cannot safely work with ssa
866 names that appear in phi nodes on abnormal edges, so that we do not
867 create overlapping life ranges for them (PR 27283). */
868 desc = XNEW (struct tree_niter_desc);
869 if (!number_of_iterations_exit (data->current_loop,
870 exit, desc, true)
871 || contains_abnormal_ssa_name_p (desc->niter))
873 XDELETE (desc);
874 desc = NULL;
876 data->niters->put (exit, desc);
878 else
879 desc = *slot;
881 return desc;
884 /* Returns the structure describing number of iterations determined from
885 single dominating exit of DATA->current_loop, or NULL if something
886 goes wrong. */
888 static struct tree_niter_desc *
889 niter_for_single_dom_exit (struct ivopts_data *data)
891 edge exit = single_dom_exit (data->current_loop);
893 if (!exit)
894 return NULL;
896 return niter_for_exit (data, exit);
899 /* Initializes data structures used by the iv optimization pass, stored
900 in DATA. */
902 static void
903 tree_ssa_iv_optimize_init (struct ivopts_data *data)
905 data->version_info_size = 2 * num_ssa_names;
906 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
907 data->relevant = BITMAP_ALLOC (NULL);
908 data->important_candidates = BITMAP_ALLOC (NULL);
909 data->max_inv_id = 0;
910 data->niters = NULL;
911 data->iv_uses.create (20);
912 data->iv_candidates.create (20);
913 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
914 data->inv_expr_id = 0;
915 data->name_expansion_cache = NULL;
916 decl_rtl_to_reset.create (20);
919 /* Returns a memory object to that EXPR points. In case we are able to
920 determine that it does not point to any such object, NULL is returned. */
922 static tree
923 determine_base_object (tree expr)
925 enum tree_code code = TREE_CODE (expr);
926 tree base, obj;
928 /* If this is a pointer casted to any type, we need to determine
929 the base object for the pointer; so handle conversions before
930 throwing away non-pointer expressions. */
931 if (CONVERT_EXPR_P (expr))
932 return determine_base_object (TREE_OPERAND (expr, 0));
934 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
935 return NULL_TREE;
937 switch (code)
939 case INTEGER_CST:
940 return NULL_TREE;
942 case ADDR_EXPR:
943 obj = TREE_OPERAND (expr, 0);
944 base = get_base_address (obj);
946 if (!base)
947 return expr;
949 if (TREE_CODE (base) == MEM_REF)
950 return determine_base_object (TREE_OPERAND (base, 0));
952 return fold_convert (ptr_type_node,
953 build_fold_addr_expr (base));
955 case POINTER_PLUS_EXPR:
956 return determine_base_object (TREE_OPERAND (expr, 0));
958 case PLUS_EXPR:
959 case MINUS_EXPR:
960 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
961 gcc_unreachable ();
963 default:
964 return fold_convert (ptr_type_node, expr);
968 /* Return true if address expression with non-DECL_P operand appears
969 in EXPR. */
971 static bool
972 contain_complex_addr_expr (tree expr)
974 bool res = false;
976 STRIP_NOPS (expr);
977 switch (TREE_CODE (expr))
979 case POINTER_PLUS_EXPR:
980 case PLUS_EXPR:
981 case MINUS_EXPR:
982 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
983 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
984 break;
986 case ADDR_EXPR:
987 return (!DECL_P (TREE_OPERAND (expr, 0)));
989 default:
990 return false;
993 return res;
996 /* Allocates an induction variable with given initial value BASE and step STEP
997 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
999 static struct iv *
1000 alloc_iv (tree base, tree step, bool no_overflow = false)
1002 tree expr = base;
1003 struct iv *iv = XCNEW (struct iv);
1004 gcc_assert (step != NULL_TREE);
1006 /* Lower address expression in base except ones with DECL_P as operand.
1007 By doing this:
1008 1) More accurate cost can be computed for address expressions;
1009 2) Duplicate candidates won't be created for bases in different
1010 forms, like &a[0] and &a. */
1011 STRIP_NOPS (expr);
1012 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1013 || contain_complex_addr_expr (expr))
1015 aff_tree comb;
1016 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1017 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1020 iv->base = base;
1021 iv->base_object = determine_base_object (base);
1022 iv->step = step;
1023 iv->biv_p = false;
1024 iv->have_use_for = false;
1025 iv->use_id = 0;
1026 iv->ssa_name = NULL_TREE;
1027 iv->no_overflow = no_overflow;
1029 return iv;
1032 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1033 doesn't overflow. */
1035 static void
1036 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1037 bool no_overflow)
1039 struct version_info *info = name_info (data, iv);
1041 gcc_assert (!info->iv);
1043 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1044 info->iv = alloc_iv (base, step, no_overflow);
1045 info->iv->ssa_name = iv;
1048 /* Finds induction variable declaration for VAR. */
1050 static struct iv *
1051 get_iv (struct ivopts_data *data, tree var)
1053 basic_block bb;
1054 tree type = TREE_TYPE (var);
1056 if (!POINTER_TYPE_P (type)
1057 && !INTEGRAL_TYPE_P (type))
1058 return NULL;
1060 if (!name_info (data, var)->iv)
1062 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064 if (!bb
1065 || !flow_bb_inside_loop_p (data->current_loop, bb))
1066 set_iv (data, var, var, build_int_cst (type, 0), true);
1069 return name_info (data, var)->iv;
1072 /* Return the first non-invariant ssa var found in EXPR. */
1074 static tree
1075 extract_single_var_from_expr (tree expr)
1077 int i, n;
1078 tree tmp;
1079 enum tree_code code;
1081 if (!expr || is_gimple_min_invariant (expr))
1082 return NULL;
1084 code = TREE_CODE (expr);
1085 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1087 n = TREE_OPERAND_LENGTH (expr);
1088 for (i = 0; i < n; i++)
1090 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1092 if (tmp)
1093 return tmp;
1096 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1099 /* Finds basic ivs. */
1101 static bool
1102 find_bivs (struct ivopts_data *data)
1104 gphi *phi;
1105 affine_iv iv;
1106 tree step, type, base, stop;
1107 bool found = false;
1108 struct loop *loop = data->current_loop;
1109 gphi_iterator psi;
1111 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1113 phi = psi.phi ();
1115 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1116 continue;
1118 if (virtual_operand_p (PHI_RESULT (phi)))
1119 continue;
1121 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1122 continue;
1124 if (integer_zerop (iv.step))
1125 continue;
1127 step = iv.step;
1128 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1129 /* Stop expanding iv base at the first ssa var referred by iv step.
1130 Ideally we should stop at any ssa var, because that's expensive
1131 and unusual to happen, we just do it on the first one.
1133 See PR64705 for the rationale. */
1134 stop = extract_single_var_from_expr (step);
1135 base = expand_simple_operations (base, stop);
1136 if (contains_abnormal_ssa_name_p (base)
1137 || contains_abnormal_ssa_name_p (step))
1138 continue;
1140 type = TREE_TYPE (PHI_RESULT (phi));
1141 base = fold_convert (type, base);
1142 if (step)
1144 if (POINTER_TYPE_P (type))
1145 step = convert_to_ptrofftype (step);
1146 else
1147 step = fold_convert (type, step);
1150 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1151 found = true;
1154 return found;
1157 /* Marks basic ivs. */
1159 static void
1160 mark_bivs (struct ivopts_data *data)
1162 gphi *phi;
1163 gimple def;
1164 tree var;
1165 struct iv *iv, *incr_iv;
1166 struct loop *loop = data->current_loop;
1167 basic_block incr_bb;
1168 gphi_iterator psi;
1170 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1172 phi = psi.phi ();
1174 iv = get_iv (data, PHI_RESULT (phi));
1175 if (!iv)
1176 continue;
1178 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179 def = SSA_NAME_DEF_STMT (var);
1180 /* Don't mark iv peeled from other one as biv. */
1181 if (def
1182 && gimple_code (def) == GIMPLE_PHI
1183 && gimple_bb (def) == loop->header)
1184 continue;
1186 incr_iv = get_iv (data, var);
1187 if (!incr_iv)
1188 continue;
1190 /* If the increment is in the subloop, ignore it. */
1191 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192 if (incr_bb->loop_father != data->current_loop
1193 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194 continue;
1196 iv->biv_p = true;
1197 incr_iv->biv_p = true;
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202 parameters to IV. */
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1207 tree lhs, stop;
1208 struct loop *loop = data->current_loop;
1210 iv->base = NULL_TREE;
1211 iv->step = NULL_TREE;
1213 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214 return false;
1216 lhs = gimple_assign_lhs (stmt);
1217 if (TREE_CODE (lhs) != SSA_NAME)
1218 return false;
1220 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221 return false;
1223 /* Stop expanding iv base at the first ssa var referred by iv step.
1224 Ideally we should stop at any ssa var, because that's expensive
1225 and unusual to happen, we just do it on the first one.
1227 See PR64705 for the rationale. */
1228 stop = extract_single_var_from_expr (iv->step);
1229 iv->base = expand_simple_operations (iv->base, stop);
1230 if (contains_abnormal_ssa_name_p (iv->base)
1231 || contains_abnormal_ssa_name_p (iv->step))
1232 return false;
1234 /* If STMT could throw, then do not consider STMT as defining a GIV.
1235 While this will suppress optimizations, we can not safely delete this
1236 GIV and associated statements, even if it appears it is not used. */
1237 if (stmt_could_throw_p (stmt))
1238 return false;
1240 return true;
1243 /* Finds general ivs in statement STMT. */
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1248 affine_iv iv;
1250 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251 return;
1253 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1256 /* Finds general ivs in basic block BB. */
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1261 gimple_stmt_iterator bsi;
1263 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264 find_givs_in_stmt (data, gsi_stmt (bsi));
1267 /* Finds general ivs. */
1269 static void
1270 find_givs (struct ivopts_data *data)
1272 struct loop *loop = data->current_loop;
1273 basic_block *body = get_loop_body_in_dom_order (loop);
1274 unsigned i;
1276 for (i = 0; i < loop->num_nodes; i++)
1277 find_givs_in_bb (data, body[i]);
1278 free (body);
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282 variable and if so, its initial value and step. */
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1287 unsigned i;
1288 bitmap_iterator bi;
1290 if (!find_bivs (data))
1291 return false;
1293 find_givs (data);
1294 mark_bivs (data);
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1298 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1300 if (niter)
1302 fprintf (dump_file, " number of iterations ");
1303 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304 if (!integer_zerop (niter->may_be_zero))
1306 fprintf (dump_file, "; zero if ");
1307 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1309 fprintf (dump_file, "\n\n");
1312 fprintf (dump_file, "Induction variables:\n\n");
1314 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1316 if (ver_info (data, i)->iv)
1317 dump_iv (dump_file, ver_info (data, i)->iv, true);
1321 return true;
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326 is the const offset stripped from IV base. For uses of other types,
1327 ADDR_BASE and ADDR_OFFSET are zero by default. */
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1332 unsigned HOST_WIDE_INT addr_offset = 0)
1334 struct iv_use *use = XCNEW (struct iv_use);
1336 use->id = n_iv_uses (data);
1337 use->sub_id = 0;
1338 use->type = use_type;
1339 use->iv = iv;
1340 use->stmt = stmt;
1341 use->op_p = use_p;
1342 use->related_cands = BITMAP_ALLOC (NULL);
1343 use->next = NULL;
1344 use->addr_base = addr_base;
1345 use->addr_offset = addr_offset;
1347 data->iv_uses.safe_push (use);
1349 return use;
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353 The sub use is recorded under the one whose use id is ID_GROUP. */
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357 struct iv *iv, gimple stmt, enum use_type use_type,
1358 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359 unsigned int id_group)
1361 struct iv_use *use = XCNEW (struct iv_use);
1362 struct iv_use *group = iv_use (data, id_group);
1364 use->id = group->id;
1365 use->sub_id = 0;
1366 use->type = use_type;
1367 use->iv = iv;
1368 use->stmt = stmt;
1369 use->op_p = use_p;
1370 use->related_cands = NULL;
1371 use->addr_base = addr_base;
1372 use->addr_offset = addr_offset;
1374 /* Sub use list is maintained in offset ascending order. */
1375 if (addr_offset <= group->addr_offset)
1377 use->related_cands = group->related_cands;
1378 group->related_cands = NULL;
1379 use->next = group;
1380 data->iv_uses[id_group] = use;
1382 else
1384 struct iv_use *pre;
1387 pre = group;
1388 group = group->next;
1390 while (group && addr_offset > group->addr_offset);
1391 use->next = pre->next;
1392 pre->next = use;
1395 /* To avoid showing ssa name in the dumps, if it was not reset by the
1396 caller. */
1397 iv->ssa_name = NULL_TREE;
1399 return use;
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403 NONLINEAR_USE is true if the invariant is used in a way we do not
1404 handle specially. */
1406 static void
1407 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1409 basic_block bb;
1410 struct version_info *info;
1412 if (TREE_CODE (op) != SSA_NAME
1413 || virtual_operand_p (op))
1414 return;
1416 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1417 if (bb
1418 && flow_bb_inside_loop_p (data->current_loop, bb))
1419 return;
1421 info = name_info (data, op);
1422 info->name = op;
1423 info->has_nonlin_use |= nonlinear_use;
1424 if (!info->inv_id)
1425 info->inv_id = ++data->max_inv_id;
1426 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1429 /* Checks whether the use OP is interesting and if so, records it. */
1431 static struct iv_use *
1432 find_interesting_uses_op (struct ivopts_data *data, tree op)
1434 struct iv *iv;
1435 struct iv *civ;
1436 gimple stmt;
1437 struct iv_use *use;
1439 if (TREE_CODE (op) != SSA_NAME)
1440 return NULL;
1442 iv = get_iv (data, op);
1443 if (!iv)
1444 return NULL;
1446 if (iv->have_use_for)
1448 use = iv_use (data, iv->use_id);
1450 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1451 return use;
1454 if (integer_zerop (iv->step))
1456 record_invariant (data, op, true);
1457 return NULL;
1459 iv->have_use_for = true;
1461 civ = XNEW (struct iv);
1462 *civ = *iv;
1464 stmt = SSA_NAME_DEF_STMT (op);
1465 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1466 || is_gimple_assign (stmt));
1468 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1469 iv->use_id = use->id;
1471 return use;
1474 /* Given a condition in statement STMT, checks whether it is a compare
1475 of an induction variable and an invariant. If this is the case,
1476 CONTROL_VAR is set to location of the iv, BOUND to the location of
1477 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1478 induction variable descriptions, and true is returned. If this is not
1479 the case, CONTROL_VAR and BOUND are set to the arguments of the
1480 condition and false is returned. */
1482 static bool
1483 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1484 tree **control_var, tree **bound,
1485 struct iv **iv_var, struct iv **iv_bound)
1487 /* The objects returned when COND has constant operands. */
1488 static struct iv const_iv;
1489 static tree zero;
1490 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1491 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1492 bool ret = false;
1494 if (gimple_code (stmt) == GIMPLE_COND)
1496 gcond *cond_stmt = as_a <gcond *> (stmt);
1497 op0 = gimple_cond_lhs_ptr (cond_stmt);
1498 op1 = gimple_cond_rhs_ptr (cond_stmt);
1500 else
1502 op0 = gimple_assign_rhs1_ptr (stmt);
1503 op1 = gimple_assign_rhs2_ptr (stmt);
1506 zero = integer_zero_node;
1507 const_iv.step = integer_zero_node;
1509 if (TREE_CODE (*op0) == SSA_NAME)
1510 iv0 = get_iv (data, *op0);
1511 if (TREE_CODE (*op1) == SSA_NAME)
1512 iv1 = get_iv (data, *op1);
1514 /* Exactly one of the compared values must be an iv, and the other one must
1515 be an invariant. */
1516 if (!iv0 || !iv1)
1517 goto end;
1519 if (integer_zerop (iv0->step))
1521 /* Control variable may be on the other side. */
1522 tmp_op = op0; op0 = op1; op1 = tmp_op;
1523 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1525 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1527 end:
1528 if (control_var)
1529 *control_var = op0;
1530 if (iv_var)
1531 *iv_var = iv0;
1532 if (bound)
1533 *bound = op1;
1534 if (iv_bound)
1535 *iv_bound = iv1;
1537 return ret;
1540 /* Checks whether the condition in STMT is interesting and if so,
1541 records it. */
1543 static void
1544 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1546 tree *var_p, *bound_p;
1547 struct iv *var_iv, *civ;
1549 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1551 find_interesting_uses_op (data, *var_p);
1552 find_interesting_uses_op (data, *bound_p);
1553 return;
1556 civ = XNEW (struct iv);
1557 *civ = *var_iv;
1558 record_use (data, NULL, civ, stmt, USE_COMPARE);
1561 /* Returns the outermost loop EXPR is obviously invariant in
1562 relative to the loop LOOP, i.e. if all its operands are defined
1563 outside of the returned loop. Returns NULL if EXPR is not
1564 even obviously invariant in LOOP. */
1566 struct loop *
1567 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1569 basic_block def_bb;
1570 unsigned i, len;
1572 if (is_gimple_min_invariant (expr))
1573 return current_loops->tree_root;
1575 if (TREE_CODE (expr) == SSA_NAME)
1577 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1578 if (def_bb)
1580 if (flow_bb_inside_loop_p (loop, def_bb))
1581 return NULL;
1582 return superloop_at_depth (loop,
1583 loop_depth (def_bb->loop_father) + 1);
1586 return current_loops->tree_root;
1589 if (!EXPR_P (expr))
1590 return NULL;
1592 unsigned maxdepth = 0;
1593 len = TREE_OPERAND_LENGTH (expr);
1594 for (i = 0; i < len; i++)
1596 struct loop *ivloop;
1597 if (!TREE_OPERAND (expr, i))
1598 continue;
1600 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1601 if (!ivloop)
1602 return NULL;
1603 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1606 return superloop_at_depth (loop, maxdepth);
1609 /* Returns true if expression EXPR is obviously invariant in LOOP,
1610 i.e. if all its operands are defined outside of the LOOP. LOOP
1611 should not be the function body. */
1613 bool
1614 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1616 basic_block def_bb;
1617 unsigned i, len;
1619 gcc_assert (loop_depth (loop) > 0);
1621 if (is_gimple_min_invariant (expr))
1622 return true;
1624 if (TREE_CODE (expr) == SSA_NAME)
1626 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1627 if (def_bb
1628 && flow_bb_inside_loop_p (loop, def_bb))
1629 return false;
1631 return true;
1634 if (!EXPR_P (expr))
1635 return false;
1637 len = TREE_OPERAND_LENGTH (expr);
1638 for (i = 0; i < len; i++)
1639 if (TREE_OPERAND (expr, i)
1640 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1641 return false;
1643 return true;
1646 /* Cumulates the steps of indices into DATA and replaces their values with the
1647 initial ones. Returns false when the value of the index cannot be determined.
1648 Callback for for_each_index. */
1650 struct ifs_ivopts_data
1652 struct ivopts_data *ivopts_data;
1653 gimple stmt;
1654 tree step;
1657 static bool
1658 idx_find_step (tree base, tree *idx, void *data)
1660 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1661 struct iv *iv;
1662 bool use_overflow_semantics = false;
1663 tree step, iv_base, iv_step, lbound, off;
1664 struct loop *loop = dta->ivopts_data->current_loop;
1666 /* If base is a component ref, require that the offset of the reference
1667 be invariant. */
1668 if (TREE_CODE (base) == COMPONENT_REF)
1670 off = component_ref_field_offset (base);
1671 return expr_invariant_in_loop_p (loop, off);
1674 /* If base is array, first check whether we will be able to move the
1675 reference out of the loop (in order to take its address in strength
1676 reduction). In order for this to work we need both lower bound
1677 and step to be loop invariants. */
1678 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1680 /* Moreover, for a range, the size needs to be invariant as well. */
1681 if (TREE_CODE (base) == ARRAY_RANGE_REF
1682 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1683 return false;
1685 step = array_ref_element_size (base);
1686 lbound = array_ref_low_bound (base);
1688 if (!expr_invariant_in_loop_p (loop, step)
1689 || !expr_invariant_in_loop_p (loop, lbound))
1690 return false;
1693 if (TREE_CODE (*idx) != SSA_NAME)
1694 return true;
1696 iv = get_iv (dta->ivopts_data, *idx);
1697 if (!iv)
1698 return false;
1700 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1701 *&x[0], which is not folded and does not trigger the
1702 ARRAY_REF path below. */
1703 *idx = iv->base;
1705 if (integer_zerop (iv->step))
1706 return true;
1708 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1710 step = array_ref_element_size (base);
1712 /* We only handle addresses whose step is an integer constant. */
1713 if (TREE_CODE (step) != INTEGER_CST)
1714 return false;
1716 else
1717 /* The step for pointer arithmetics already is 1 byte. */
1718 step = size_one_node;
1720 iv_base = iv->base;
1721 iv_step = iv->step;
1722 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1723 use_overflow_semantics = true;
1725 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1726 sizetype, &iv_base, &iv_step, dta->stmt,
1727 use_overflow_semantics))
1729 /* The index might wrap. */
1730 return false;
1733 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1734 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1736 return true;
1739 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1740 object is passed to it in DATA. */
1742 static bool
1743 idx_record_use (tree base, tree *idx,
1744 void *vdata)
1746 struct ivopts_data *data = (struct ivopts_data *) vdata;
1747 find_interesting_uses_op (data, *idx);
1748 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1750 find_interesting_uses_op (data, array_ref_element_size (base));
1751 find_interesting_uses_op (data, array_ref_low_bound (base));
1753 return true;
1756 /* If we can prove that TOP = cst * BOT for some constant cst,
1757 store cst to MUL and return true. Otherwise return false.
1758 The returned value is always sign-extended, regardless of the
1759 signedness of TOP and BOT. */
1761 static bool
1762 constant_multiple_of (tree top, tree bot, widest_int *mul)
1764 tree mby;
1765 enum tree_code code;
1766 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1767 widest_int res, p0, p1;
1769 STRIP_NOPS (top);
1770 STRIP_NOPS (bot);
1772 if (operand_equal_p (top, bot, 0))
1774 *mul = 1;
1775 return true;
1778 code = TREE_CODE (top);
1779 switch (code)
1781 case MULT_EXPR:
1782 mby = TREE_OPERAND (top, 1);
1783 if (TREE_CODE (mby) != INTEGER_CST)
1784 return false;
1786 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1787 return false;
1789 *mul = wi::sext (res * wi::to_widest (mby), precision);
1790 return true;
1792 case PLUS_EXPR:
1793 case MINUS_EXPR:
1794 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1795 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1796 return false;
1798 if (code == MINUS_EXPR)
1799 p1 = -p1;
1800 *mul = wi::sext (p0 + p1, precision);
1801 return true;
1803 case INTEGER_CST:
1804 if (TREE_CODE (bot) != INTEGER_CST)
1805 return false;
1807 p0 = widest_int::from (top, SIGNED);
1808 p1 = widest_int::from (bot, SIGNED);
1809 if (p1 == 0)
1810 return false;
1811 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1812 return res == 0;
1814 default:
1815 return false;
1819 /* Return true if memory reference REF with step STEP may be unaligned. */
1821 static bool
1822 may_be_unaligned_p (tree ref, tree step)
1824 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1825 thus they are not misaligned. */
1826 if (TREE_CODE (ref) == TARGET_MEM_REF)
1827 return false;
1829 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1830 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1831 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1833 unsigned HOST_WIDE_INT bitpos;
1834 unsigned int ref_align;
1835 get_object_alignment_1 (ref, &ref_align, &bitpos);
1836 if (ref_align < align
1837 || (bitpos % align) != 0
1838 || (bitpos % BITS_PER_UNIT) != 0)
1839 return true;
1841 unsigned int trailing_zeros = tree_ctz (step);
1842 if (trailing_zeros < HOST_BITS_PER_INT
1843 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1844 return true;
1846 return false;
1849 /* Return true if EXPR may be non-addressable. */
1851 bool
1852 may_be_nonaddressable_p (tree expr)
1854 switch (TREE_CODE (expr))
1856 case TARGET_MEM_REF:
1857 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1858 target, thus they are always addressable. */
1859 return false;
1861 case COMPONENT_REF:
1862 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1863 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1865 case VIEW_CONVERT_EXPR:
1866 /* This kind of view-conversions may wrap non-addressable objects
1867 and make them look addressable. After some processing the
1868 non-addressability may be uncovered again, causing ADDR_EXPRs
1869 of inappropriate objects to be built. */
1870 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1871 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1872 return true;
1874 /* ... fall through ... */
1876 case ARRAY_REF:
1877 case ARRAY_RANGE_REF:
1878 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1880 CASE_CONVERT:
1881 return true;
1883 default:
1884 break;
1887 return false;
1890 static tree
1891 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1893 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1894 If there is an existing use which has same stripped iv base and step,
1895 this function records this one as a sub use to that; otherwise records
1896 it as a normal one. */
1898 static struct iv_use *
1899 record_group_use (struct ivopts_data *data, tree *use_p,
1900 struct iv *iv, gimple stmt, enum use_type use_type)
1902 unsigned int i;
1903 struct iv_use *use;
1904 tree addr_base;
1905 unsigned HOST_WIDE_INT addr_offset;
1907 /* Only support sub use for address type uses, that is, with base
1908 object. */
1909 if (!iv->base_object)
1910 return record_use (data, use_p, iv, stmt, use_type);
1912 addr_base = strip_offset (iv->base, &addr_offset);
1913 for (i = 0; i < n_iv_uses (data); i++)
1915 use = iv_use (data, i);
1916 if (use->type != USE_ADDRESS || !use->iv->base_object)
1917 continue;
1919 /* Check if it has the same stripped base and step. */
1920 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1921 && operand_equal_p (iv->step, use->iv->step, 0)
1922 && operand_equal_p (addr_base, use->addr_base, 0))
1923 break;
1926 if (i == n_iv_uses (data))
1927 return record_use (data, use_p, iv, stmt,
1928 use_type, addr_base, addr_offset);
1929 else
1930 return record_sub_use (data, use_p, iv, stmt,
1931 use_type, addr_base, addr_offset, i);
1934 /* Finds addresses in *OP_P inside STMT. */
1936 static void
1937 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1939 tree base = *op_p, step = size_zero_node;
1940 struct iv *civ;
1941 struct ifs_ivopts_data ifs_ivopts_data;
1943 /* Do not play with volatile memory references. A bit too conservative,
1944 perhaps, but safe. */
1945 if (gimple_has_volatile_ops (stmt))
1946 goto fail;
1948 /* Ignore bitfields for now. Not really something terribly complicated
1949 to handle. TODO. */
1950 if (TREE_CODE (base) == BIT_FIELD_REF)
1951 goto fail;
1953 base = unshare_expr (base);
1955 if (TREE_CODE (base) == TARGET_MEM_REF)
1957 tree type = build_pointer_type (TREE_TYPE (base));
1958 tree astep;
1960 if (TMR_BASE (base)
1961 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1963 civ = get_iv (data, TMR_BASE (base));
1964 if (!civ)
1965 goto fail;
1967 TMR_BASE (base) = civ->base;
1968 step = civ->step;
1970 if (TMR_INDEX2 (base)
1971 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1973 civ = get_iv (data, TMR_INDEX2 (base));
1974 if (!civ)
1975 goto fail;
1977 TMR_INDEX2 (base) = civ->base;
1978 step = civ->step;
1980 if (TMR_INDEX (base)
1981 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1983 civ = get_iv (data, TMR_INDEX (base));
1984 if (!civ)
1985 goto fail;
1987 TMR_INDEX (base) = civ->base;
1988 astep = civ->step;
1990 if (astep)
1992 if (TMR_STEP (base))
1993 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1995 step = fold_build2 (PLUS_EXPR, type, step, astep);
1999 if (integer_zerop (step))
2000 goto fail;
2001 base = tree_mem_ref_addr (type, base);
2003 else
2005 ifs_ivopts_data.ivopts_data = data;
2006 ifs_ivopts_data.stmt = stmt;
2007 ifs_ivopts_data.step = size_zero_node;
2008 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2009 || integer_zerop (ifs_ivopts_data.step))
2010 goto fail;
2011 step = ifs_ivopts_data.step;
2013 /* Check that the base expression is addressable. This needs
2014 to be done after substituting bases of IVs into it. */
2015 if (may_be_nonaddressable_p (base))
2016 goto fail;
2018 /* Moreover, on strict alignment platforms, check that it is
2019 sufficiently aligned. */
2020 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2021 goto fail;
2023 base = build_fold_addr_expr (base);
2025 /* Substituting bases of IVs into the base expression might
2026 have caused folding opportunities. */
2027 if (TREE_CODE (base) == ADDR_EXPR)
2029 tree *ref = &TREE_OPERAND (base, 0);
2030 while (handled_component_p (*ref))
2031 ref = &TREE_OPERAND (*ref, 0);
2032 if (TREE_CODE (*ref) == MEM_REF)
2034 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2035 TREE_OPERAND (*ref, 0),
2036 TREE_OPERAND (*ref, 1));
2037 if (tem)
2038 *ref = tem;
2043 civ = alloc_iv (base, step);
2044 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2045 return;
2047 fail:
2048 for_each_index (op_p, idx_record_use, data);
2051 /* Finds and records invariants used in STMT. */
2053 static void
2054 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2056 ssa_op_iter iter;
2057 use_operand_p use_p;
2058 tree op;
2060 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2062 op = USE_FROM_PTR (use_p);
2063 record_invariant (data, op, false);
2067 /* Finds interesting uses of induction variables in the statement STMT. */
2069 static void
2070 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2072 struct iv *iv;
2073 tree op, *lhs, *rhs;
2074 ssa_op_iter iter;
2075 use_operand_p use_p;
2076 enum tree_code code;
2078 find_invariants_stmt (data, stmt);
2080 if (gimple_code (stmt) == GIMPLE_COND)
2082 find_interesting_uses_cond (data, stmt);
2083 return;
2086 if (is_gimple_assign (stmt))
2088 lhs = gimple_assign_lhs_ptr (stmt);
2089 rhs = gimple_assign_rhs1_ptr (stmt);
2091 if (TREE_CODE (*lhs) == SSA_NAME)
2093 /* If the statement defines an induction variable, the uses are not
2094 interesting by themselves. */
2096 iv = get_iv (data, *lhs);
2098 if (iv && !integer_zerop (iv->step))
2099 return;
2102 code = gimple_assign_rhs_code (stmt);
2103 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2104 && (REFERENCE_CLASS_P (*rhs)
2105 || is_gimple_val (*rhs)))
2107 if (REFERENCE_CLASS_P (*rhs))
2108 find_interesting_uses_address (data, stmt, rhs);
2109 else
2110 find_interesting_uses_op (data, *rhs);
2112 if (REFERENCE_CLASS_P (*lhs))
2113 find_interesting_uses_address (data, stmt, lhs);
2114 return;
2116 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2118 find_interesting_uses_cond (data, stmt);
2119 return;
2122 /* TODO -- we should also handle address uses of type
2124 memory = call (whatever);
2128 call (memory). */
2131 if (gimple_code (stmt) == GIMPLE_PHI
2132 && gimple_bb (stmt) == data->current_loop->header)
2134 iv = get_iv (data, PHI_RESULT (stmt));
2136 if (iv && !integer_zerop (iv->step))
2137 return;
2140 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2142 op = USE_FROM_PTR (use_p);
2144 if (TREE_CODE (op) != SSA_NAME)
2145 continue;
2147 iv = get_iv (data, op);
2148 if (!iv)
2149 continue;
2151 find_interesting_uses_op (data, op);
2155 /* Finds interesting uses of induction variables outside of loops
2156 on loop exit edge EXIT. */
2158 static void
2159 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2161 gphi *phi;
2162 gphi_iterator psi;
2163 tree def;
2165 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2167 phi = psi.phi ();
2168 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2169 if (!virtual_operand_p (def))
2170 find_interesting_uses_op (data, def);
2174 /* Finds uses of the induction variables that are interesting. */
2176 static void
2177 find_interesting_uses (struct ivopts_data *data)
2179 basic_block bb;
2180 gimple_stmt_iterator bsi;
2181 basic_block *body = get_loop_body (data->current_loop);
2182 unsigned i;
2183 struct version_info *info;
2184 edge e;
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 fprintf (dump_file, "Uses:\n\n");
2189 for (i = 0; i < data->current_loop->num_nodes; i++)
2191 edge_iterator ei;
2192 bb = body[i];
2194 FOR_EACH_EDGE (e, ei, bb->succs)
2195 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2196 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2197 find_interesting_uses_outside (data, e);
2199 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2200 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2201 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2202 if (!is_gimple_debug (gsi_stmt (bsi)))
2203 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2206 if (dump_file && (dump_flags & TDF_DETAILS))
2208 bitmap_iterator bi;
2210 fprintf (dump_file, "\n");
2212 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2214 info = ver_info (data, i);
2215 if (info->inv_id)
2217 fprintf (dump_file, " ");
2218 print_generic_expr (dump_file, info->name, TDF_SLIM);
2219 fprintf (dump_file, " is invariant (%d)%s\n",
2220 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2224 fprintf (dump_file, "\n");
2227 free (body);
2230 /* Compute maximum offset of [base + offset] addressing mode
2231 for memory reference represented by USE. */
2233 static HOST_WIDE_INT
2234 compute_max_addr_offset (struct iv_use *use)
2236 int width;
2237 rtx reg, addr;
2238 HOST_WIDE_INT i, off;
2239 unsigned list_index, num;
2240 addr_space_t as;
2241 machine_mode mem_mode, addr_mode;
2242 static vec<HOST_WIDE_INT> max_offset_list;
2244 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2245 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2247 num = max_offset_list.length ();
2248 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2249 if (list_index >= num)
2251 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2252 for (; num < max_offset_list.length (); num++)
2253 max_offset_list[num] = -1;
2256 off = max_offset_list[list_index];
2257 if (off != -1)
2258 return off;
2260 addr_mode = targetm.addr_space.address_mode (as);
2261 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2262 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2264 width = GET_MODE_BITSIZE (addr_mode) - 1;
2265 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2266 width = HOST_BITS_PER_WIDE_INT - 1;
2268 for (i = width; i > 0; i--)
2270 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2271 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2272 if (memory_address_addr_space_p (mem_mode, addr, as))
2273 break;
2275 /* For some strict-alignment targets, the offset must be naturally
2276 aligned. Try an aligned offset if mem_mode is not QImode. */
2277 off = ((unsigned HOST_WIDE_INT) 1 << i);
2278 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2280 off -= GET_MODE_SIZE (mem_mode);
2281 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2282 if (memory_address_addr_space_p (mem_mode, addr, as))
2283 break;
2286 if (i == 0)
2287 off = 0;
2289 max_offset_list[list_index] = off;
2290 return off;
2293 /* Check if all small groups should be split. Return true if and
2294 only if:
2296 1) At least one groups contain two uses with different offsets.
2297 2) No group contains more than two uses with different offsets.
2299 Return false otherwise. We want to split such groups because:
2301 1) Small groups don't have much benefit and may interfer with
2302 general candidate selection.
2303 2) Size for problem with only small groups is usually small and
2304 general algorithm can handle it well.
2306 TODO -- Above claim may not hold when auto increment is supported. */
2308 static bool
2309 split_all_small_groups (struct ivopts_data *data)
2311 bool split_p = false;
2312 unsigned int i, n, distinct;
2313 struct iv_use *pre, *use;
2315 n = n_iv_uses (data);
2316 for (i = 0; i < n; i++)
2318 use = iv_use (data, i);
2319 if (!use->next)
2320 continue;
2322 distinct = 1;
2323 gcc_assert (use->type == USE_ADDRESS);
2324 for (pre = use, use = use->next; use; pre = use, use = use->next)
2326 if (pre->addr_offset != use->addr_offset)
2327 distinct++;
2329 if (distinct > 2)
2330 return false;
2332 if (distinct == 2)
2333 split_p = true;
2336 return split_p;
2339 /* For each group of address type uses, this function further groups
2340 these uses according to the maximum offset supported by target's
2341 [base + offset] addressing mode. */
2343 static void
2344 group_address_uses (struct ivopts_data *data)
2346 HOST_WIDE_INT max_offset = -1;
2347 unsigned int i, n, sub_id;
2348 struct iv_use *pre, *use;
2349 unsigned HOST_WIDE_INT addr_offset_first;
2351 /* Reset max offset to split all small groups. */
2352 if (split_all_small_groups (data))
2353 max_offset = 0;
2355 n = n_iv_uses (data);
2356 for (i = 0; i < n; i++)
2358 use = iv_use (data, i);
2359 if (!use->next)
2360 continue;
2362 gcc_assert (use->type == USE_ADDRESS);
2363 if (max_offset != 0)
2364 max_offset = compute_max_addr_offset (use);
2366 while (use)
2368 sub_id = 0;
2369 addr_offset_first = use->addr_offset;
2370 /* Only uses with offset that can fit in offset part against
2371 the first use can be grouped together. */
2372 for (pre = use, use = use->next;
2373 use && (use->addr_offset - addr_offset_first
2374 <= (unsigned HOST_WIDE_INT) max_offset);
2375 pre = use, use = use->next)
2377 use->id = pre->id;
2378 use->sub_id = ++sub_id;
2381 /* Break the list and create new group. */
2382 if (use)
2384 pre->next = NULL;
2385 use->id = n_iv_uses (data);
2386 use->related_cands = BITMAP_ALLOC (NULL);
2387 data->iv_uses.safe_push (use);
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 dump_uses (dump_file, data);
2396 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2397 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2398 we are at the top-level of the processed address. */
2400 static tree
2401 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2402 HOST_WIDE_INT *offset)
2404 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2405 enum tree_code code;
2406 tree type, orig_type = TREE_TYPE (expr);
2407 HOST_WIDE_INT off0, off1, st;
2408 tree orig_expr = expr;
2410 STRIP_NOPS (expr);
2412 type = TREE_TYPE (expr);
2413 code = TREE_CODE (expr);
2414 *offset = 0;
2416 switch (code)
2418 case INTEGER_CST:
2419 if (!cst_and_fits_in_hwi (expr)
2420 || integer_zerop (expr))
2421 return orig_expr;
2423 *offset = int_cst_value (expr);
2424 return build_int_cst (orig_type, 0);
2426 case POINTER_PLUS_EXPR:
2427 case PLUS_EXPR:
2428 case MINUS_EXPR:
2429 op0 = TREE_OPERAND (expr, 0);
2430 op1 = TREE_OPERAND (expr, 1);
2432 op0 = strip_offset_1 (op0, false, false, &off0);
2433 op1 = strip_offset_1 (op1, false, false, &off1);
2435 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2436 if (op0 == TREE_OPERAND (expr, 0)
2437 && op1 == TREE_OPERAND (expr, 1))
2438 return orig_expr;
2440 if (integer_zerop (op1))
2441 expr = op0;
2442 else if (integer_zerop (op0))
2444 if (code == MINUS_EXPR)
2445 expr = fold_build1 (NEGATE_EXPR, type, op1);
2446 else
2447 expr = op1;
2449 else
2450 expr = fold_build2 (code, type, op0, op1);
2452 return fold_convert (orig_type, expr);
2454 case MULT_EXPR:
2455 op1 = TREE_OPERAND (expr, 1);
2456 if (!cst_and_fits_in_hwi (op1))
2457 return orig_expr;
2459 op0 = TREE_OPERAND (expr, 0);
2460 op0 = strip_offset_1 (op0, false, false, &off0);
2461 if (op0 == TREE_OPERAND (expr, 0))
2462 return orig_expr;
2464 *offset = off0 * int_cst_value (op1);
2465 if (integer_zerop (op0))
2466 expr = op0;
2467 else
2468 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2470 return fold_convert (orig_type, expr);
2472 case ARRAY_REF:
2473 case ARRAY_RANGE_REF:
2474 if (!inside_addr)
2475 return orig_expr;
2477 step = array_ref_element_size (expr);
2478 if (!cst_and_fits_in_hwi (step))
2479 break;
2481 st = int_cst_value (step);
2482 op1 = TREE_OPERAND (expr, 1);
2483 op1 = strip_offset_1 (op1, false, false, &off1);
2484 *offset = off1 * st;
2486 if (top_compref
2487 && integer_zerop (op1))
2489 /* Strip the component reference completely. */
2490 op0 = TREE_OPERAND (expr, 0);
2491 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2492 *offset += off0;
2493 return op0;
2495 break;
2497 case COMPONENT_REF:
2499 tree field;
2501 if (!inside_addr)
2502 return orig_expr;
2504 tmp = component_ref_field_offset (expr);
2505 field = TREE_OPERAND (expr, 1);
2506 if (top_compref
2507 && cst_and_fits_in_hwi (tmp)
2508 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2510 HOST_WIDE_INT boffset, abs_off;
2512 /* Strip the component reference completely. */
2513 op0 = TREE_OPERAND (expr, 0);
2514 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2515 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2516 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2517 if (boffset < 0)
2518 abs_off = -abs_off;
2520 *offset = off0 + int_cst_value (tmp) + abs_off;
2521 return op0;
2524 break;
2526 case ADDR_EXPR:
2527 op0 = TREE_OPERAND (expr, 0);
2528 op0 = strip_offset_1 (op0, true, true, &off0);
2529 *offset += off0;
2531 if (op0 == TREE_OPERAND (expr, 0))
2532 return orig_expr;
2534 expr = build_fold_addr_expr (op0);
2535 return fold_convert (orig_type, expr);
2537 case MEM_REF:
2538 /* ??? Offset operand? */
2539 inside_addr = false;
2540 break;
2542 default:
2543 return orig_expr;
2546 /* Default handling of expressions for that we want to recurse into
2547 the first operand. */
2548 op0 = TREE_OPERAND (expr, 0);
2549 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2550 *offset += off0;
2552 if (op0 == TREE_OPERAND (expr, 0)
2553 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2554 return orig_expr;
2556 expr = copy_node (expr);
2557 TREE_OPERAND (expr, 0) = op0;
2558 if (op1)
2559 TREE_OPERAND (expr, 1) = op1;
2561 /* Inside address, we might strip the top level component references,
2562 thus changing type of the expression. Handling of ADDR_EXPR
2563 will fix that. */
2564 expr = fold_convert (orig_type, expr);
2566 return expr;
2569 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2571 static tree
2572 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2574 HOST_WIDE_INT off;
2575 tree core = strip_offset_1 (expr, false, false, &off);
2576 *offset = off;
2577 return core;
2580 /* Returns variant of TYPE that can be used as base for different uses.
2581 We return unsigned type with the same precision, which avoids problems
2582 with overflows. */
2584 static tree
2585 generic_type_for (tree type)
2587 if (POINTER_TYPE_P (type))
2588 return unsigned_type_for (type);
2590 if (TYPE_UNSIGNED (type))
2591 return type;
2593 return unsigned_type_for (type);
2596 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2597 the bitmap to that we should store it. */
2599 static struct ivopts_data *fd_ivopts_data;
2600 static tree
2601 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2603 bitmap *depends_on = (bitmap *) data;
2604 struct version_info *info;
2606 if (TREE_CODE (*expr_p) != SSA_NAME)
2607 return NULL_TREE;
2608 info = name_info (fd_ivopts_data, *expr_p);
2610 if (!info->inv_id || info->has_nonlin_use)
2611 return NULL_TREE;
2613 if (!*depends_on)
2614 *depends_on = BITMAP_ALLOC (NULL);
2615 bitmap_set_bit (*depends_on, info->inv_id);
2617 return NULL_TREE;
2620 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2621 position to POS. If USE is not NULL, the candidate is set as related to
2622 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2623 replacement of the final value of the iv by a direct computation. */
2625 static struct iv_cand *
2626 add_candidate_1 (struct ivopts_data *data,
2627 tree base, tree step, bool important, enum iv_position pos,
2628 struct iv_use *use, gimple incremented_at)
2630 unsigned i;
2631 struct iv_cand *cand = NULL;
2632 tree type, orig_type;
2634 /* For non-original variables, make sure their values are computed in a type
2635 that does not invoke undefined behavior on overflows (since in general,
2636 we cannot prove that these induction variables are non-wrapping). */
2637 if (pos != IP_ORIGINAL)
2639 orig_type = TREE_TYPE (base);
2640 type = generic_type_for (orig_type);
2641 if (type != orig_type)
2643 base = fold_convert (type, base);
2644 step = fold_convert (type, step);
2648 for (i = 0; i < n_iv_cands (data); i++)
2650 cand = iv_cand (data, i);
2652 if (cand->pos != pos)
2653 continue;
2655 if (cand->incremented_at != incremented_at
2656 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2657 && cand->ainc_use != use))
2658 continue;
2660 if (!cand->iv)
2662 if (!base && !step)
2663 break;
2665 continue;
2668 if (!base && !step)
2669 continue;
2671 if (operand_equal_p (base, cand->iv->base, 0)
2672 && operand_equal_p (step, cand->iv->step, 0)
2673 && (TYPE_PRECISION (TREE_TYPE (base))
2674 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2675 break;
2678 if (i == n_iv_cands (data))
2680 cand = XCNEW (struct iv_cand);
2681 cand->id = i;
2683 if (!base && !step)
2684 cand->iv = NULL;
2685 else
2686 cand->iv = alloc_iv (base, step);
2688 cand->pos = pos;
2689 if (pos != IP_ORIGINAL && cand->iv)
2691 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2692 cand->var_after = cand->var_before;
2694 cand->important = important;
2695 cand->incremented_at = incremented_at;
2696 data->iv_candidates.safe_push (cand);
2698 if (step
2699 && TREE_CODE (step) != INTEGER_CST)
2701 fd_ivopts_data = data;
2702 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2705 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2706 cand->ainc_use = use;
2707 else
2708 cand->ainc_use = NULL;
2710 if (dump_file && (dump_flags & TDF_DETAILS))
2711 dump_cand (dump_file, cand);
2714 if (important && !cand->important)
2716 cand->important = true;
2717 if (dump_file && (dump_flags & TDF_DETAILS))
2718 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2721 if (use)
2723 bitmap_set_bit (use->related_cands, i);
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 fprintf (dump_file, "Candidate %d is related to use %d\n",
2726 cand->id, use->id);
2729 return cand;
2732 /* Returns true if incrementing the induction variable at the end of the LOOP
2733 is allowed.
2735 The purpose is to avoid splitting latch edge with a biv increment, thus
2736 creating a jump, possibly confusing other optimization passes and leaving
2737 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2738 is not available (so we do not have a better alternative), or if the latch
2739 edge is already nonempty. */
2741 static bool
2742 allow_ip_end_pos_p (struct loop *loop)
2744 if (!ip_normal_pos (loop))
2745 return true;
2747 if (!empty_block_p (ip_end_pos (loop)))
2748 return true;
2750 return false;
2753 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2754 Important field is set to IMPORTANT. */
2756 static void
2757 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2758 bool important, struct iv_use *use)
2760 basic_block use_bb = gimple_bb (use->stmt);
2761 machine_mode mem_mode;
2762 unsigned HOST_WIDE_INT cstepi;
2764 /* If we insert the increment in any position other than the standard
2765 ones, we must ensure that it is incremented once per iteration.
2766 It must not be in an inner nested loop, or one side of an if
2767 statement. */
2768 if (use_bb->loop_father != data->current_loop
2769 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2770 || stmt_could_throw_p (use->stmt)
2771 || !cst_and_fits_in_hwi (step))
2772 return;
2774 cstepi = int_cst_value (step);
2776 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2777 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2778 || USE_STORE_PRE_INCREMENT (mem_mode))
2779 && GET_MODE_SIZE (mem_mode) == cstepi)
2780 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2781 || USE_STORE_PRE_DECREMENT (mem_mode))
2782 && GET_MODE_SIZE (mem_mode) == -cstepi))
2784 enum tree_code code = MINUS_EXPR;
2785 tree new_base;
2786 tree new_step = step;
2788 if (POINTER_TYPE_P (TREE_TYPE (base)))
2790 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2791 code = POINTER_PLUS_EXPR;
2793 else
2794 new_step = fold_convert (TREE_TYPE (base), new_step);
2795 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2796 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2797 use->stmt);
2799 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2800 || USE_STORE_POST_INCREMENT (mem_mode))
2801 && GET_MODE_SIZE (mem_mode) == cstepi)
2802 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2803 || USE_STORE_POST_DECREMENT (mem_mode))
2804 && GET_MODE_SIZE (mem_mode) == -cstepi))
2806 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2807 use->stmt);
2811 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2812 position to POS. If USE is not NULL, the candidate is set as related to
2813 it. The candidate computation is scheduled on all available positions. */
2815 static void
2816 add_candidate (struct ivopts_data *data,
2817 tree base, tree step, bool important, struct iv_use *use)
2819 gcc_assert (use == NULL || use->sub_id == 0);
2821 if (ip_normal_pos (data->current_loop))
2822 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2823 if (ip_end_pos (data->current_loop)
2824 && allow_ip_end_pos_p (data->current_loop))
2825 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2827 if (use != NULL && use->type == USE_ADDRESS)
2828 add_autoinc_candidates (data, base, step, important, use);
2831 /* Adds standard iv candidates. */
2833 static void
2834 add_standard_iv_candidates (struct ivopts_data *data)
2836 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2838 /* The same for a double-integer type if it is still fast enough. */
2839 if (TYPE_PRECISION
2840 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2841 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2842 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2843 build_int_cst (long_integer_type_node, 1), true, NULL);
2845 /* The same for a double-integer type if it is still fast enough. */
2846 if (TYPE_PRECISION
2847 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2848 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2849 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2850 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2854 /* Adds candidates bases on the old induction variable IV. */
2856 static void
2857 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2859 gimple phi;
2860 tree def;
2861 struct iv_cand *cand;
2863 add_candidate (data, iv->base, iv->step, true, NULL);
2865 /* The same, but with initial value zero. */
2866 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2867 add_candidate (data, size_int (0), iv->step, true, NULL);
2868 else
2869 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2870 iv->step, true, NULL);
2872 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2873 if (gimple_code (phi) == GIMPLE_PHI)
2875 /* Additionally record the possibility of leaving the original iv
2876 untouched. */
2877 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2878 /* Don't add candidate if it's from another PHI node because
2879 it's an affine iv appearing in the form of PEELED_CHREC. */
2880 phi = SSA_NAME_DEF_STMT (def);
2881 if (gimple_code (phi) != GIMPLE_PHI)
2883 cand = add_candidate_1 (data,
2884 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2885 SSA_NAME_DEF_STMT (def));
2886 cand->var_before = iv->ssa_name;
2887 cand->var_after = def;
2889 else
2890 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2894 /* Adds candidates based on the old induction variables. */
2896 static void
2897 add_old_ivs_candidates (struct ivopts_data *data)
2899 unsigned i;
2900 struct iv *iv;
2901 bitmap_iterator bi;
2903 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2905 iv = ver_info (data, i)->iv;
2906 if (iv && iv->biv_p && !integer_zerop (iv->step))
2907 add_old_iv_candidates (data, iv);
2911 /* Adds candidates based on the value of the induction variable IV and USE. */
2913 static void
2914 add_iv_value_candidates (struct ivopts_data *data,
2915 struct iv *iv, struct iv_use *use)
2917 unsigned HOST_WIDE_INT offset;
2918 tree base;
2919 tree basetype;
2921 add_candidate (data, iv->base, iv->step, false, use);
2923 /* The same, but with initial value zero. Make such variable important,
2924 since it is generic enough so that possibly many uses may be based
2925 on it. */
2926 basetype = TREE_TYPE (iv->base);
2927 if (POINTER_TYPE_P (basetype))
2928 basetype = sizetype;
2929 add_candidate (data, build_int_cst (basetype, 0),
2930 iv->step, true, use);
2932 /* Third, try removing the constant offset. Make sure to even
2933 add a candidate for &a[0] vs. (T *)&a. */
2934 base = strip_offset (iv->base, &offset);
2935 if (offset
2936 || base != iv->base)
2937 add_candidate (data, base, iv->step, false, use);
2940 /* Adds candidates based on the uses. */
2942 static void
2943 add_derived_ivs_candidates (struct ivopts_data *data)
2945 unsigned i;
2947 for (i = 0; i < n_iv_uses (data); i++)
2949 struct iv_use *use = iv_use (data, i);
2951 if (!use)
2952 continue;
2954 switch (use->type)
2956 case USE_NONLINEAR_EXPR:
2957 case USE_COMPARE:
2958 case USE_ADDRESS:
2959 /* Just add the ivs based on the value of the iv used here. */
2960 add_iv_value_candidates (data, use->iv, use);
2961 break;
2963 default:
2964 gcc_unreachable ();
2969 /* Record important candidates and add them to related_cands bitmaps
2970 if needed. */
2972 static void
2973 record_important_candidates (struct ivopts_data *data)
2975 unsigned i;
2976 struct iv_use *use;
2978 for (i = 0; i < n_iv_cands (data); i++)
2980 struct iv_cand *cand = iv_cand (data, i);
2982 if (cand->important)
2983 bitmap_set_bit (data->important_candidates, i);
2986 data->consider_all_candidates = (n_iv_cands (data)
2987 <= CONSIDER_ALL_CANDIDATES_BOUND);
2989 if (data->consider_all_candidates)
2991 /* We will not need "related_cands" bitmaps in this case,
2992 so release them to decrease peak memory consumption. */
2993 for (i = 0; i < n_iv_uses (data); i++)
2995 use = iv_use (data, i);
2996 BITMAP_FREE (use->related_cands);
2999 else
3001 /* Add important candidates to the related_cands bitmaps. */
3002 for (i = 0; i < n_iv_uses (data); i++)
3003 bitmap_ior_into (iv_use (data, i)->related_cands,
3004 data->important_candidates);
3008 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3009 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3010 we allocate a simple list to every use. */
3012 static void
3013 alloc_use_cost_map (struct ivopts_data *data)
3015 unsigned i, size, s;
3017 for (i = 0; i < n_iv_uses (data); i++)
3019 struct iv_use *use = iv_use (data, i);
3021 if (data->consider_all_candidates)
3022 size = n_iv_cands (data);
3023 else
3025 s = bitmap_count_bits (use->related_cands);
3027 /* Round up to the power of two, so that moduling by it is fast. */
3028 size = s ? (1 << ceil_log2 (s)) : 1;
3031 use->n_map_members = size;
3032 use->cost_map = XCNEWVEC (struct cost_pair, size);
3036 /* Returns description of computation cost of expression whose runtime
3037 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3039 static comp_cost
3040 new_cost (unsigned runtime, unsigned complexity)
3042 comp_cost cost;
3044 cost.cost = runtime;
3045 cost.complexity = complexity;
3047 return cost;
3050 /* Returns true if COST is infinite. */
3052 static bool
3053 infinite_cost_p (comp_cost cost)
3055 return cost.cost == INFTY;
3058 /* Adds costs COST1 and COST2. */
3060 static comp_cost
3061 add_costs (comp_cost cost1, comp_cost cost2)
3063 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3064 return infinite_cost;
3066 cost1.cost += cost2.cost;
3067 cost1.complexity += cost2.complexity;
3069 return cost1;
3071 /* Subtracts costs COST1 and COST2. */
3073 static comp_cost
3074 sub_costs (comp_cost cost1, comp_cost cost2)
3076 cost1.cost -= cost2.cost;
3077 cost1.complexity -= cost2.complexity;
3079 return cost1;
3082 /* Returns a negative number if COST1 < COST2, a positive number if
3083 COST1 > COST2, and 0 if COST1 = COST2. */
3085 static int
3086 compare_costs (comp_cost cost1, comp_cost cost2)
3088 if (cost1.cost == cost2.cost)
3089 return cost1.complexity - cost2.complexity;
3091 return cost1.cost - cost2.cost;
3094 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3095 on invariants DEPENDS_ON and that the value used in expressing it
3096 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3098 static void
3099 set_use_iv_cost (struct ivopts_data *data,
3100 struct iv_use *use, struct iv_cand *cand,
3101 comp_cost cost, bitmap depends_on, tree value,
3102 enum tree_code comp, int inv_expr_id)
3104 unsigned i, s;
3106 if (infinite_cost_p (cost))
3108 BITMAP_FREE (depends_on);
3109 return;
3112 if (data->consider_all_candidates)
3114 use->cost_map[cand->id].cand = cand;
3115 use->cost_map[cand->id].cost = cost;
3116 use->cost_map[cand->id].depends_on = depends_on;
3117 use->cost_map[cand->id].value = value;
3118 use->cost_map[cand->id].comp = comp;
3119 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3120 return;
3123 /* n_map_members is a power of two, so this computes modulo. */
3124 s = cand->id & (use->n_map_members - 1);
3125 for (i = s; i < use->n_map_members; i++)
3126 if (!use->cost_map[i].cand)
3127 goto found;
3128 for (i = 0; i < s; i++)
3129 if (!use->cost_map[i].cand)
3130 goto found;
3132 gcc_unreachable ();
3134 found:
3135 use->cost_map[i].cand = cand;
3136 use->cost_map[i].cost = cost;
3137 use->cost_map[i].depends_on = depends_on;
3138 use->cost_map[i].value = value;
3139 use->cost_map[i].comp = comp;
3140 use->cost_map[i].inv_expr_id = inv_expr_id;
3143 /* Gets cost of (USE, CANDIDATE) pair. */
3145 static struct cost_pair *
3146 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3147 struct iv_cand *cand)
3149 unsigned i, s;
3150 struct cost_pair *ret;
3152 if (!cand)
3153 return NULL;
3155 if (data->consider_all_candidates)
3157 ret = use->cost_map + cand->id;
3158 if (!ret->cand)
3159 return NULL;
3161 return ret;
3164 /* n_map_members is a power of two, so this computes modulo. */
3165 s = cand->id & (use->n_map_members - 1);
3166 for (i = s; i < use->n_map_members; i++)
3167 if (use->cost_map[i].cand == cand)
3168 return use->cost_map + i;
3169 else if (use->cost_map[i].cand == NULL)
3170 return NULL;
3171 for (i = 0; i < s; i++)
3172 if (use->cost_map[i].cand == cand)
3173 return use->cost_map + i;
3174 else if (use->cost_map[i].cand == NULL)
3175 return NULL;
3177 return NULL;
3180 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3181 static rtx
3182 produce_memory_decl_rtl (tree obj, int *regno)
3184 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3185 machine_mode address_mode = targetm.addr_space.address_mode (as);
3186 rtx x;
3188 gcc_assert (obj);
3189 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3191 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3192 x = gen_rtx_SYMBOL_REF (address_mode, name);
3193 SET_SYMBOL_REF_DECL (x, obj);
3194 x = gen_rtx_MEM (DECL_MODE (obj), x);
3195 set_mem_addr_space (x, as);
3196 targetm.encode_section_info (obj, x, true);
3198 else
3200 x = gen_raw_REG (address_mode, (*regno)++);
3201 x = gen_rtx_MEM (DECL_MODE (obj), x);
3202 set_mem_addr_space (x, as);
3205 return x;
3208 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3209 walk_tree. DATA contains the actual fake register number. */
3211 static tree
3212 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3214 tree obj = NULL_TREE;
3215 rtx x = NULL_RTX;
3216 int *regno = (int *) data;
3218 switch (TREE_CODE (*expr_p))
3220 case ADDR_EXPR:
3221 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3222 handled_component_p (*expr_p);
3223 expr_p = &TREE_OPERAND (*expr_p, 0))
3224 continue;
3225 obj = *expr_p;
3226 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3227 x = produce_memory_decl_rtl (obj, regno);
3228 break;
3230 case SSA_NAME:
3231 *ws = 0;
3232 obj = SSA_NAME_VAR (*expr_p);
3233 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3234 if (!obj)
3235 return NULL_TREE;
3236 if (!DECL_RTL_SET_P (obj))
3237 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3238 break;
3240 case VAR_DECL:
3241 case PARM_DECL:
3242 case RESULT_DECL:
3243 *ws = 0;
3244 obj = *expr_p;
3246 if (DECL_RTL_SET_P (obj))
3247 break;
3249 if (DECL_MODE (obj) == BLKmode)
3250 x = produce_memory_decl_rtl (obj, regno);
3251 else
3252 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3254 break;
3256 default:
3257 break;
3260 if (x)
3262 decl_rtl_to_reset.safe_push (obj);
3263 SET_DECL_RTL (obj, x);
3266 return NULL_TREE;
3269 /* Determines cost of the computation of EXPR. */
3271 static unsigned
3272 computation_cost (tree expr, bool speed)
3274 rtx_insn *seq;
3275 rtx rslt;
3276 tree type = TREE_TYPE (expr);
3277 unsigned cost;
3278 /* Avoid using hard regs in ways which may be unsupported. */
3279 int regno = LAST_VIRTUAL_REGISTER + 1;
3280 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3281 enum node_frequency real_frequency = node->frequency;
3283 node->frequency = NODE_FREQUENCY_NORMAL;
3284 crtl->maybe_hot_insn_p = speed;
3285 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3286 start_sequence ();
3287 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3288 seq = get_insns ();
3289 end_sequence ();
3290 default_rtl_profile ();
3291 node->frequency = real_frequency;
3293 cost = seq_cost (seq, speed);
3294 if (MEM_P (rslt))
3295 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3296 TYPE_ADDR_SPACE (type), speed);
3297 else if (!REG_P (rslt))
3298 cost += set_src_cost (rslt, speed);
3300 return cost;
3303 /* Returns variable containing the value of candidate CAND at statement AT. */
3305 static tree
3306 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3308 if (stmt_after_increment (loop, cand, stmt))
3309 return cand->var_after;
3310 else
3311 return cand->var_before;
3314 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3315 same precision that is at least as wide as the precision of TYPE, stores
3316 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3317 type of A and B. */
3319 static tree
3320 determine_common_wider_type (tree *a, tree *b)
3322 tree wider_type = NULL;
3323 tree suba, subb;
3324 tree atype = TREE_TYPE (*a);
3326 if (CONVERT_EXPR_P (*a))
3328 suba = TREE_OPERAND (*a, 0);
3329 wider_type = TREE_TYPE (suba);
3330 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3331 return atype;
3333 else
3334 return atype;
3336 if (CONVERT_EXPR_P (*b))
3338 subb = TREE_OPERAND (*b, 0);
3339 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3340 return atype;
3342 else
3343 return atype;
3345 *a = suba;
3346 *b = subb;
3347 return wider_type;
3350 /* Determines the expression by that USE is expressed from induction variable
3351 CAND at statement AT in LOOP. The expression is stored in a decomposed
3352 form into AFF. Returns false if USE cannot be expressed using CAND. */
3354 static bool
3355 get_computation_aff (struct loop *loop,
3356 struct iv_use *use, struct iv_cand *cand, gimple at,
3357 struct aff_tree *aff)
3359 tree ubase = use->iv->base;
3360 tree ustep = use->iv->step;
3361 tree cbase = cand->iv->base;
3362 tree cstep = cand->iv->step, cstep_common;
3363 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3364 tree common_type, var;
3365 tree uutype;
3366 aff_tree cbase_aff, var_aff;
3367 widest_int rat;
3369 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3371 /* We do not have a precision to express the values of use. */
3372 return false;
3375 var = var_at_stmt (loop, cand, at);
3376 uutype = unsigned_type_for (utype);
3378 /* If the conversion is not noop, perform it. */
3379 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3381 cstep = fold_convert (uutype, cstep);
3382 cbase = fold_convert (uutype, cbase);
3383 var = fold_convert (uutype, var);
3386 if (!constant_multiple_of (ustep, cstep, &rat))
3387 return false;
3389 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3390 type, we achieve better folding by computing their difference in this
3391 wider type, and cast the result to UUTYPE. We do not need to worry about
3392 overflows, as all the arithmetics will in the end be performed in UUTYPE
3393 anyway. */
3394 common_type = determine_common_wider_type (&ubase, &cbase);
3396 /* use = ubase - ratio * cbase + ratio * var. */
3397 tree_to_aff_combination (ubase, common_type, aff);
3398 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3399 tree_to_aff_combination (var, uutype, &var_aff);
3401 /* We need to shift the value if we are after the increment. */
3402 if (stmt_after_increment (loop, cand, at))
3404 aff_tree cstep_aff;
3406 if (common_type != uutype)
3407 cstep_common = fold_convert (common_type, cstep);
3408 else
3409 cstep_common = cstep;
3411 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3412 aff_combination_add (&cbase_aff, &cstep_aff);
3415 aff_combination_scale (&cbase_aff, -rat);
3416 aff_combination_add (aff, &cbase_aff);
3417 if (common_type != uutype)
3418 aff_combination_convert (aff, uutype);
3420 aff_combination_scale (&var_aff, rat);
3421 aff_combination_add (aff, &var_aff);
3423 return true;
3426 /* Return the type of USE. */
3428 static tree
3429 get_use_type (struct iv_use *use)
3431 tree base_type = TREE_TYPE (use->iv->base);
3432 tree type;
3434 if (use->type == USE_ADDRESS)
3436 /* The base_type may be a void pointer. Create a pointer type based on
3437 the mem_ref instead. */
3438 type = build_pointer_type (TREE_TYPE (*use->op_p));
3439 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3440 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3442 else
3443 type = base_type;
3445 return type;
3448 /* Determines the expression by that USE is expressed from induction variable
3449 CAND at statement AT in LOOP. The computation is unshared. */
3451 static tree
3452 get_computation_at (struct loop *loop,
3453 struct iv_use *use, struct iv_cand *cand, gimple at)
3455 aff_tree aff;
3456 tree type = get_use_type (use);
3458 if (!get_computation_aff (loop, use, cand, at, &aff))
3459 return NULL_TREE;
3460 unshare_aff_combination (&aff);
3461 return fold_convert (type, aff_combination_to_tree (&aff));
3464 /* Determines the expression by that USE is expressed from induction variable
3465 CAND in LOOP. The computation is unshared. */
3467 static tree
3468 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3470 return get_computation_at (loop, use, cand, use->stmt);
3473 /* Adjust the cost COST for being in loop setup rather than loop body.
3474 If we're optimizing for space, the loop setup overhead is constant;
3475 if we're optimizing for speed, amortize it over the per-iteration cost. */
3476 static unsigned
3477 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3479 if (cost == INFTY)
3480 return cost;
3481 else if (optimize_loop_for_speed_p (data->current_loop))
3482 return cost / avg_loop_niter (data->current_loop);
3483 else
3484 return cost;
3487 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3488 validity for a memory reference accessing memory of mode MODE in
3489 address space AS. */
3492 bool
3493 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3494 addr_space_t as)
3496 #define MAX_RATIO 128
3497 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3498 static vec<sbitmap> valid_mult_list;
3499 sbitmap valid_mult;
3501 if (data_index >= valid_mult_list.length ())
3502 valid_mult_list.safe_grow_cleared (data_index + 1);
3504 valid_mult = valid_mult_list[data_index];
3505 if (!valid_mult)
3507 machine_mode address_mode = targetm.addr_space.address_mode (as);
3508 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3509 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3510 rtx addr, scaled;
3511 HOST_WIDE_INT i;
3513 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3514 bitmap_clear (valid_mult);
3515 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3516 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3517 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3519 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3520 if (memory_address_addr_space_p (mode, addr, as)
3521 || memory_address_addr_space_p (mode, scaled, as))
3522 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3525 if (dump_file && (dump_flags & TDF_DETAILS))
3527 fprintf (dump_file, " allowed multipliers:");
3528 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3529 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3530 fprintf (dump_file, " %d", (int) i);
3531 fprintf (dump_file, "\n");
3532 fprintf (dump_file, "\n");
3535 valid_mult_list[data_index] = valid_mult;
3538 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3539 return false;
3541 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3544 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3545 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3546 variable is omitted. Compute the cost for a memory reference that accesses
3547 a memory location of mode MEM_MODE in address space AS.
3549 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3550 size of MEM_MODE / RATIO) is available. To make this determination, we
3551 look at the size of the increment to be made, which is given in CSTEP.
3552 CSTEP may be zero if the step is unknown.
3553 STMT_AFTER_INC is true iff the statement we're looking at is after the
3554 increment of the original biv.
3556 TODO -- there must be some better way. This all is quite crude. */
3558 enum ainc_type
3560 AINC_PRE_INC, /* Pre increment. */
3561 AINC_PRE_DEC, /* Pre decrement. */
3562 AINC_POST_INC, /* Post increment. */
3563 AINC_POST_DEC, /* Post decrement. */
3564 AINC_NONE /* Also the number of auto increment types. */
3567 typedef struct address_cost_data_s
3569 HOST_WIDE_INT min_offset, max_offset;
3570 unsigned costs[2][2][2][2];
3571 unsigned ainc_costs[AINC_NONE];
3572 } *address_cost_data;
3575 static comp_cost
3576 get_address_cost (bool symbol_present, bool var_present,
3577 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3578 HOST_WIDE_INT cstep, machine_mode mem_mode,
3579 addr_space_t as, bool speed,
3580 bool stmt_after_inc, bool *may_autoinc)
3582 machine_mode address_mode = targetm.addr_space.address_mode (as);
3583 static vec<address_cost_data> address_cost_data_list;
3584 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3585 address_cost_data data;
3586 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3587 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3588 unsigned cost, acost, complexity;
3589 enum ainc_type autoinc_type;
3590 bool offset_p, ratio_p, autoinc;
3591 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3592 unsigned HOST_WIDE_INT mask;
3593 unsigned bits;
3595 if (data_index >= address_cost_data_list.length ())
3596 address_cost_data_list.safe_grow_cleared (data_index + 1);
3598 data = address_cost_data_list[data_index];
3599 if (!data)
3601 HOST_WIDE_INT i;
3602 HOST_WIDE_INT rat, off = 0;
3603 int old_cse_not_expected, width;
3604 unsigned sym_p, var_p, off_p, rat_p, add_c;
3605 rtx_insn *seq;
3606 rtx addr, base;
3607 rtx reg0, reg1;
3609 data = (address_cost_data) xcalloc (1, sizeof (*data));
3611 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3613 width = GET_MODE_BITSIZE (address_mode) - 1;
3614 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3615 width = HOST_BITS_PER_WIDE_INT - 1;
3616 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3618 for (i = width; i >= 0; i--)
3620 off = -((unsigned HOST_WIDE_INT) 1 << i);
3621 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622 if (memory_address_addr_space_p (mem_mode, addr, as))
3623 break;
3625 data->min_offset = (i == -1? 0 : off);
3627 for (i = width; i >= 0; i--)
3629 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3630 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3631 if (memory_address_addr_space_p (mem_mode, addr, as))
3632 break;
3633 /* For some strict-alignment targets, the offset must be naturally
3634 aligned. Try an aligned offset if mem_mode is not QImode. */
3635 off = mem_mode != QImode
3636 ? ((unsigned HOST_WIDE_INT) 1 << i)
3637 - GET_MODE_SIZE (mem_mode)
3638 : 0;
3639 if (off > 0)
3641 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3642 if (memory_address_addr_space_p (mem_mode, addr, as))
3643 break;
3646 if (i == -1)
3647 off = 0;
3648 data->max_offset = off;
3650 if (dump_file && (dump_flags & TDF_DETAILS))
3652 fprintf (dump_file, "get_address_cost:\n");
3653 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3654 GET_MODE_NAME (mem_mode),
3655 data->min_offset);
3656 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3657 GET_MODE_NAME (mem_mode),
3658 data->max_offset);
3661 rat = 1;
3662 for (i = 2; i <= MAX_RATIO; i++)
3663 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3665 rat = i;
3666 break;
3669 /* Compute the cost of various addressing modes. */
3670 acost = 0;
3671 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3672 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3674 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3675 || USE_STORE_PRE_DECREMENT (mem_mode))
3677 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3678 has_predec[mem_mode]
3679 = memory_address_addr_space_p (mem_mode, addr, as);
3681 if (has_predec[mem_mode])
3682 data->ainc_costs[AINC_PRE_DEC]
3683 = address_cost (addr, mem_mode, as, speed);
3685 if (USE_LOAD_POST_DECREMENT (mem_mode)
3686 || USE_STORE_POST_DECREMENT (mem_mode))
3688 addr = gen_rtx_POST_DEC (address_mode, reg0);
3689 has_postdec[mem_mode]
3690 = memory_address_addr_space_p (mem_mode, addr, as);
3692 if (has_postdec[mem_mode])
3693 data->ainc_costs[AINC_POST_DEC]
3694 = address_cost (addr, mem_mode, as, speed);
3696 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3697 || USE_STORE_PRE_DECREMENT (mem_mode))
3699 addr = gen_rtx_PRE_INC (address_mode, reg0);
3700 has_preinc[mem_mode]
3701 = memory_address_addr_space_p (mem_mode, addr, as);
3703 if (has_preinc[mem_mode])
3704 data->ainc_costs[AINC_PRE_INC]
3705 = address_cost (addr, mem_mode, as, speed);
3707 if (USE_LOAD_POST_INCREMENT (mem_mode)
3708 || USE_STORE_POST_INCREMENT (mem_mode))
3710 addr = gen_rtx_POST_INC (address_mode, reg0);
3711 has_postinc[mem_mode]
3712 = memory_address_addr_space_p (mem_mode, addr, as);
3714 if (has_postinc[mem_mode])
3715 data->ainc_costs[AINC_POST_INC]
3716 = address_cost (addr, mem_mode, as, speed);
3718 for (i = 0; i < 16; i++)
3720 sym_p = i & 1;
3721 var_p = (i >> 1) & 1;
3722 off_p = (i >> 2) & 1;
3723 rat_p = (i >> 3) & 1;
3725 addr = reg0;
3726 if (rat_p)
3727 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3728 gen_int_mode (rat, address_mode));
3730 if (var_p)
3731 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3733 if (sym_p)
3735 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3736 /* ??? We can run into trouble with some backends by presenting
3737 it with symbols which haven't been properly passed through
3738 targetm.encode_section_info. By setting the local bit, we
3739 enhance the probability of things working. */
3740 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3742 if (off_p)
3743 base = gen_rtx_fmt_e (CONST, address_mode,
3744 gen_rtx_fmt_ee
3745 (PLUS, address_mode, base,
3746 gen_int_mode (off, address_mode)));
3748 else if (off_p)
3749 base = gen_int_mode (off, address_mode);
3750 else
3751 base = NULL_RTX;
3753 if (base)
3754 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3756 start_sequence ();
3757 /* To avoid splitting addressing modes, pretend that no cse will
3758 follow. */
3759 old_cse_not_expected = cse_not_expected;
3760 cse_not_expected = true;
3761 addr = memory_address_addr_space (mem_mode, addr, as);
3762 cse_not_expected = old_cse_not_expected;
3763 seq = get_insns ();
3764 end_sequence ();
3766 acost = seq_cost (seq, speed);
3767 acost += address_cost (addr, mem_mode, as, speed);
3769 if (!acost)
3770 acost = 1;
3771 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3774 /* On some targets, it is quite expensive to load symbol to a register,
3775 which makes addresses that contain symbols look much more expensive.
3776 However, the symbol will have to be loaded in any case before the
3777 loop (and quite likely we have it in register already), so it does not
3778 make much sense to penalize them too heavily. So make some final
3779 tweaks for the SYMBOL_PRESENT modes:
3781 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3782 var is cheaper, use this mode with small penalty.
3783 If VAR_PRESENT is true, try whether the mode with
3784 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3785 if this is the case, use it. */
3786 add_c = add_cost (speed, address_mode);
3787 for (i = 0; i < 8; i++)
3789 var_p = i & 1;
3790 off_p = (i >> 1) & 1;
3791 rat_p = (i >> 2) & 1;
3793 acost = data->costs[0][1][off_p][rat_p] + 1;
3794 if (var_p)
3795 acost += add_c;
3797 if (acost < data->costs[1][var_p][off_p][rat_p])
3798 data->costs[1][var_p][off_p][rat_p] = acost;
3801 if (dump_file && (dump_flags & TDF_DETAILS))
3803 fprintf (dump_file, "Address costs:\n");
3805 for (i = 0; i < 16; i++)
3807 sym_p = i & 1;
3808 var_p = (i >> 1) & 1;
3809 off_p = (i >> 2) & 1;
3810 rat_p = (i >> 3) & 1;
3812 fprintf (dump_file, " ");
3813 if (sym_p)
3814 fprintf (dump_file, "sym + ");
3815 if (var_p)
3816 fprintf (dump_file, "var + ");
3817 if (off_p)
3818 fprintf (dump_file, "cst + ");
3819 if (rat_p)
3820 fprintf (dump_file, "rat * ");
3822 acost = data->costs[sym_p][var_p][off_p][rat_p];
3823 fprintf (dump_file, "index costs %d\n", acost);
3825 if (has_predec[mem_mode] || has_postdec[mem_mode]
3826 || has_preinc[mem_mode] || has_postinc[mem_mode])
3827 fprintf (dump_file, " May include autoinc/dec\n");
3828 fprintf (dump_file, "\n");
3831 address_cost_data_list[data_index] = data;
3834 bits = GET_MODE_BITSIZE (address_mode);
3835 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3836 offset &= mask;
3837 if ((offset >> (bits - 1) & 1))
3838 offset |= ~mask;
3839 s_offset = offset;
3841 autoinc = false;
3842 autoinc_type = AINC_NONE;
3843 msize = GET_MODE_SIZE (mem_mode);
3844 autoinc_offset = offset;
3845 if (stmt_after_inc)
3846 autoinc_offset += ratio * cstep;
3847 if (symbol_present || var_present || ratio != 1)
3848 autoinc = false;
3849 else
3851 if (has_postinc[mem_mode] && autoinc_offset == 0
3852 && msize == cstep)
3853 autoinc_type = AINC_POST_INC;
3854 else if (has_postdec[mem_mode] && autoinc_offset == 0
3855 && msize == -cstep)
3856 autoinc_type = AINC_POST_DEC;
3857 else if (has_preinc[mem_mode] && autoinc_offset == msize
3858 && msize == cstep)
3859 autoinc_type = AINC_PRE_INC;
3860 else if (has_predec[mem_mode] && autoinc_offset == -msize
3861 && msize == -cstep)
3862 autoinc_type = AINC_PRE_DEC;
3864 if (autoinc_type != AINC_NONE)
3865 autoinc = true;
3868 cost = 0;
3869 offset_p = (s_offset != 0
3870 && data->min_offset <= s_offset
3871 && s_offset <= data->max_offset);
3872 ratio_p = (ratio != 1
3873 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3875 if (ratio != 1 && !ratio_p)
3876 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3878 if (s_offset && !offset_p && !symbol_present)
3879 cost += add_cost (speed, address_mode);
3881 if (may_autoinc)
3882 *may_autoinc = autoinc;
3883 if (autoinc)
3884 acost = data->ainc_costs[autoinc_type];
3885 else
3886 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3887 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3888 return new_cost (cost + acost, complexity);
3891 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3892 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3893 calculating the operands of EXPR. Returns true if successful, and returns
3894 the cost in COST. */
3896 static bool
3897 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3898 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3900 comp_cost res;
3901 tree op1 = TREE_OPERAND (expr, 1);
3902 tree cst = TREE_OPERAND (mult, 1);
3903 tree multop = TREE_OPERAND (mult, 0);
3904 int m = exact_log2 (int_cst_value (cst));
3905 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3906 int as_cost, sa_cost;
3907 bool mult_in_op1;
3909 if (!(m >= 0 && m < maxm))
3910 return false;
3912 mult_in_op1 = operand_equal_p (op1, mult, 0);
3914 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3916 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3917 use that in preference to a shift insn followed by an add insn. */
3918 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3919 ? shiftadd_cost (speed, mode, m)
3920 : (mult_in_op1
3921 ? shiftsub1_cost (speed, mode, m)
3922 : shiftsub0_cost (speed, mode, m)));
3924 res = new_cost (MIN (as_cost, sa_cost), 0);
3925 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3927 STRIP_NOPS (multop);
3928 if (!is_gimple_val (multop))
3929 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3931 *cost = res;
3932 return true;
3935 /* Estimates cost of forcing expression EXPR into a variable. */
3937 static comp_cost
3938 force_expr_to_var_cost (tree expr, bool speed)
3940 static bool costs_initialized = false;
3941 static unsigned integer_cost [2];
3942 static unsigned symbol_cost [2];
3943 static unsigned address_cost [2];
3944 tree op0, op1;
3945 comp_cost cost0, cost1, cost;
3946 machine_mode mode;
3948 if (!costs_initialized)
3950 tree type = build_pointer_type (integer_type_node);
3951 tree var, addr;
3952 rtx x;
3953 int i;
3955 var = create_tmp_var_raw (integer_type_node, "test_var");
3956 TREE_STATIC (var) = 1;
3957 x = produce_memory_decl_rtl (var, NULL);
3958 SET_DECL_RTL (var, x);
3960 addr = build1 (ADDR_EXPR, type, var);
3963 for (i = 0; i < 2; i++)
3965 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3966 2000), i);
3968 symbol_cost[i] = computation_cost (addr, i) + 1;
3970 address_cost[i]
3971 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3974 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3975 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3976 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3977 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3978 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3979 fprintf (dump_file, "\n");
3983 costs_initialized = true;
3986 STRIP_NOPS (expr);
3988 if (SSA_VAR_P (expr))
3989 return no_cost;
3991 if (is_gimple_min_invariant (expr))
3993 if (TREE_CODE (expr) == INTEGER_CST)
3994 return new_cost (integer_cost [speed], 0);
3996 if (TREE_CODE (expr) == ADDR_EXPR)
3998 tree obj = TREE_OPERAND (expr, 0);
4000 if (TREE_CODE (obj) == VAR_DECL
4001 || TREE_CODE (obj) == PARM_DECL
4002 || TREE_CODE (obj) == RESULT_DECL)
4003 return new_cost (symbol_cost [speed], 0);
4006 return new_cost (address_cost [speed], 0);
4009 switch (TREE_CODE (expr))
4011 case POINTER_PLUS_EXPR:
4012 case PLUS_EXPR:
4013 case MINUS_EXPR:
4014 case MULT_EXPR:
4015 op0 = TREE_OPERAND (expr, 0);
4016 op1 = TREE_OPERAND (expr, 1);
4017 STRIP_NOPS (op0);
4018 STRIP_NOPS (op1);
4019 break;
4021 CASE_CONVERT:
4022 case NEGATE_EXPR:
4023 op0 = TREE_OPERAND (expr, 0);
4024 STRIP_NOPS (op0);
4025 op1 = NULL_TREE;
4026 break;
4028 default:
4029 /* Just an arbitrary value, FIXME. */
4030 return new_cost (target_spill_cost[speed], 0);
4033 if (op0 == NULL_TREE
4034 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4035 cost0 = no_cost;
4036 else
4037 cost0 = force_expr_to_var_cost (op0, speed);
4039 if (op1 == NULL_TREE
4040 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4041 cost1 = no_cost;
4042 else
4043 cost1 = force_expr_to_var_cost (op1, speed);
4045 mode = TYPE_MODE (TREE_TYPE (expr));
4046 switch (TREE_CODE (expr))
4048 case POINTER_PLUS_EXPR:
4049 case PLUS_EXPR:
4050 case MINUS_EXPR:
4051 case NEGATE_EXPR:
4052 cost = new_cost (add_cost (speed, mode), 0);
4053 if (TREE_CODE (expr) != NEGATE_EXPR)
4055 tree mult = NULL_TREE;
4056 comp_cost sa_cost;
4057 if (TREE_CODE (op1) == MULT_EXPR)
4058 mult = op1;
4059 else if (TREE_CODE (op0) == MULT_EXPR)
4060 mult = op0;
4062 if (mult != NULL_TREE
4063 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4064 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4065 speed, &sa_cost))
4066 return sa_cost;
4068 break;
4070 CASE_CONVERT:
4072 tree inner_mode, outer_mode;
4073 outer_mode = TREE_TYPE (expr);
4074 inner_mode = TREE_TYPE (op0);
4075 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4076 TYPE_MODE (inner_mode), speed), 0);
4078 break;
4080 case MULT_EXPR:
4081 if (cst_and_fits_in_hwi (op0))
4082 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4083 mode, speed), 0);
4084 else if (cst_and_fits_in_hwi (op1))
4085 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4086 mode, speed), 0);
4087 else
4088 return new_cost (target_spill_cost [speed], 0);
4089 break;
4091 default:
4092 gcc_unreachable ();
4095 cost = add_costs (cost, cost0);
4096 cost = add_costs (cost, cost1);
4098 /* Bound the cost by target_spill_cost. The parts of complicated
4099 computations often are either loop invariant or at least can
4100 be shared between several iv uses, so letting this grow without
4101 limits would not give reasonable results. */
4102 if (cost.cost > (int) target_spill_cost [speed])
4103 cost.cost = target_spill_cost [speed];
4105 return cost;
4108 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4109 invariants the computation depends on. */
4111 static comp_cost
4112 force_var_cost (struct ivopts_data *data,
4113 tree expr, bitmap *depends_on)
4115 if (depends_on)
4117 fd_ivopts_data = data;
4118 walk_tree (&expr, find_depends, depends_on, NULL);
4121 return force_expr_to_var_cost (expr, data->speed);
4124 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4125 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4126 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4127 invariants the computation depends on. */
4129 static comp_cost
4130 split_address_cost (struct ivopts_data *data,
4131 tree addr, bool *symbol_present, bool *var_present,
4132 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4134 tree core;
4135 HOST_WIDE_INT bitsize;
4136 HOST_WIDE_INT bitpos;
4137 tree toffset;
4138 machine_mode mode;
4139 int unsignedp, volatilep;
4141 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4142 &unsignedp, &volatilep, false);
4144 if (toffset != 0
4145 || bitpos % BITS_PER_UNIT != 0
4146 || TREE_CODE (core) != VAR_DECL)
4148 *symbol_present = false;
4149 *var_present = true;
4150 fd_ivopts_data = data;
4151 walk_tree (&addr, find_depends, depends_on, NULL);
4152 return new_cost (target_spill_cost[data->speed], 0);
4155 *offset += bitpos / BITS_PER_UNIT;
4156 if (TREE_STATIC (core)
4157 || DECL_EXTERNAL (core))
4159 *symbol_present = true;
4160 *var_present = false;
4161 return no_cost;
4164 *symbol_present = false;
4165 *var_present = true;
4166 return no_cost;
4169 /* Estimates cost of expressing difference of addresses E1 - E2 as
4170 var + symbol + offset. The value of offset is added to OFFSET,
4171 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4172 part is missing. DEPENDS_ON is a set of the invariants the computation
4173 depends on. */
4175 static comp_cost
4176 ptr_difference_cost (struct ivopts_data *data,
4177 tree e1, tree e2, bool *symbol_present, bool *var_present,
4178 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4180 HOST_WIDE_INT diff = 0;
4181 aff_tree aff_e1, aff_e2;
4182 tree type;
4184 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4186 if (ptr_difference_const (e1, e2, &diff))
4188 *offset += diff;
4189 *symbol_present = false;
4190 *var_present = false;
4191 return no_cost;
4194 if (integer_zerop (e2))
4195 return split_address_cost (data, TREE_OPERAND (e1, 0),
4196 symbol_present, var_present, offset, depends_on);
4198 *symbol_present = false;
4199 *var_present = true;
4201 type = signed_type_for (TREE_TYPE (e1));
4202 tree_to_aff_combination (e1, type, &aff_e1);
4203 tree_to_aff_combination (e2, type, &aff_e2);
4204 aff_combination_scale (&aff_e2, -1);
4205 aff_combination_add (&aff_e1, &aff_e2);
4207 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4210 /* Estimates cost of expressing difference E1 - E2 as
4211 var + symbol + offset. The value of offset is added to OFFSET,
4212 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4213 part is missing. DEPENDS_ON is a set of the invariants the computation
4214 depends on. */
4216 static comp_cost
4217 difference_cost (struct ivopts_data *data,
4218 tree e1, tree e2, bool *symbol_present, bool *var_present,
4219 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4221 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4222 unsigned HOST_WIDE_INT off1, off2;
4223 aff_tree aff_e1, aff_e2;
4224 tree type;
4226 e1 = strip_offset (e1, &off1);
4227 e2 = strip_offset (e2, &off2);
4228 *offset += off1 - off2;
4230 STRIP_NOPS (e1);
4231 STRIP_NOPS (e2);
4233 if (TREE_CODE (e1) == ADDR_EXPR)
4234 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4235 offset, depends_on);
4236 *symbol_present = false;
4238 if (operand_equal_p (e1, e2, 0))
4240 *var_present = false;
4241 return no_cost;
4244 *var_present = true;
4246 if (integer_zerop (e2))
4247 return force_var_cost (data, e1, depends_on);
4249 if (integer_zerop (e1))
4251 comp_cost cost = force_var_cost (data, e2, depends_on);
4252 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4253 return cost;
4256 type = signed_type_for (TREE_TYPE (e1));
4257 tree_to_aff_combination (e1, type, &aff_e1);
4258 tree_to_aff_combination (e2, type, &aff_e2);
4259 aff_combination_scale (&aff_e2, -1);
4260 aff_combination_add (&aff_e1, &aff_e2);
4262 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4265 /* Returns true if AFF1 and AFF2 are identical. */
4267 static bool
4268 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4270 unsigned i;
4272 if (aff1->n != aff2->n)
4273 return false;
4275 for (i = 0; i < aff1->n; i++)
4277 if (aff1->elts[i].coef != aff2->elts[i].coef)
4278 return false;
4280 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4281 return false;
4283 return true;
4286 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4288 static int
4289 get_expr_id (struct ivopts_data *data, tree expr)
4291 struct iv_inv_expr_ent ent;
4292 struct iv_inv_expr_ent **slot;
4294 ent.expr = expr;
4295 ent.hash = iterative_hash_expr (expr, 0);
4296 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4297 if (*slot)
4298 return (*slot)->id;
4300 *slot = XNEW (struct iv_inv_expr_ent);
4301 (*slot)->expr = expr;
4302 (*slot)->hash = ent.hash;
4303 (*slot)->id = data->inv_expr_id++;
4304 return (*slot)->id;
4307 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4308 requires a new compiler generated temporary. Returns -1 otherwise.
4309 ADDRESS_P is a flag indicating if the expression is for address
4310 computation. */
4312 static int
4313 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4314 tree cbase, HOST_WIDE_INT ratio,
4315 bool address_p)
4317 aff_tree ubase_aff, cbase_aff;
4318 tree expr, ub, cb;
4320 STRIP_NOPS (ubase);
4321 STRIP_NOPS (cbase);
4322 ub = ubase;
4323 cb = cbase;
4325 if ((TREE_CODE (ubase) == INTEGER_CST)
4326 && (TREE_CODE (cbase) == INTEGER_CST))
4327 return -1;
4329 /* Strips the constant part. */
4330 if (TREE_CODE (ubase) == PLUS_EXPR
4331 || TREE_CODE (ubase) == MINUS_EXPR
4332 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4334 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4335 ubase = TREE_OPERAND (ubase, 0);
4338 /* Strips the constant part. */
4339 if (TREE_CODE (cbase) == PLUS_EXPR
4340 || TREE_CODE (cbase) == MINUS_EXPR
4341 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4343 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4344 cbase = TREE_OPERAND (cbase, 0);
4347 if (address_p)
4349 if (((TREE_CODE (ubase) == SSA_NAME)
4350 || (TREE_CODE (ubase) == ADDR_EXPR
4351 && is_gimple_min_invariant (ubase)))
4352 && (TREE_CODE (cbase) == INTEGER_CST))
4353 return -1;
4355 if (((TREE_CODE (cbase) == SSA_NAME)
4356 || (TREE_CODE (cbase) == ADDR_EXPR
4357 && is_gimple_min_invariant (cbase)))
4358 && (TREE_CODE (ubase) == INTEGER_CST))
4359 return -1;
4362 if (ratio == 1)
4364 if (operand_equal_p (ubase, cbase, 0))
4365 return -1;
4367 if (TREE_CODE (ubase) == ADDR_EXPR
4368 && TREE_CODE (cbase) == ADDR_EXPR)
4370 tree usym, csym;
4372 usym = TREE_OPERAND (ubase, 0);
4373 csym = TREE_OPERAND (cbase, 0);
4374 if (TREE_CODE (usym) == ARRAY_REF)
4376 tree ind = TREE_OPERAND (usym, 1);
4377 if (TREE_CODE (ind) == INTEGER_CST
4378 && tree_fits_shwi_p (ind)
4379 && tree_to_shwi (ind) == 0)
4380 usym = TREE_OPERAND (usym, 0);
4382 if (TREE_CODE (csym) == ARRAY_REF)
4384 tree ind = TREE_OPERAND (csym, 1);
4385 if (TREE_CODE (ind) == INTEGER_CST
4386 && tree_fits_shwi_p (ind)
4387 && tree_to_shwi (ind) == 0)
4388 csym = TREE_OPERAND (csym, 0);
4390 if (operand_equal_p (usym, csym, 0))
4391 return -1;
4393 /* Now do more complex comparison */
4394 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4395 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4396 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4397 return -1;
4400 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4401 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4403 aff_combination_scale (&cbase_aff, -1 * ratio);
4404 aff_combination_add (&ubase_aff, &cbase_aff);
4405 expr = aff_combination_to_tree (&ubase_aff);
4406 return get_expr_id (data, expr);
4411 /* Determines the cost of the computation by that USE is expressed
4412 from induction variable CAND. If ADDRESS_P is true, we just need
4413 to create an address from it, otherwise we want to get it into
4414 register. A set of invariants we depend on is stored in
4415 DEPENDS_ON. AT is the statement at that the value is computed.
4416 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4417 addressing is likely. */
4419 static comp_cost
4420 get_computation_cost_at (struct ivopts_data *data,
4421 struct iv_use *use, struct iv_cand *cand,
4422 bool address_p, bitmap *depends_on, gimple at,
4423 bool *can_autoinc,
4424 int *inv_expr_id)
4426 tree ubase = use->iv->base, ustep = use->iv->step;
4427 tree cbase, cstep;
4428 tree utype = TREE_TYPE (ubase), ctype;
4429 unsigned HOST_WIDE_INT cstepi, offset = 0;
4430 HOST_WIDE_INT ratio, aratio;
4431 bool var_present, symbol_present, stmt_is_after_inc;
4432 comp_cost cost;
4433 widest_int rat;
4434 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4435 machine_mode mem_mode = (address_p
4436 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4437 : VOIDmode);
4439 *depends_on = NULL;
4441 /* Only consider real candidates. */
4442 if (!cand->iv)
4443 return infinite_cost;
4445 cbase = cand->iv->base;
4446 cstep = cand->iv->step;
4447 ctype = TREE_TYPE (cbase);
4449 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4451 /* We do not have a precision to express the values of use. */
4452 return infinite_cost;
4455 if (address_p
4456 || (use->iv->base_object
4457 && cand->iv->base_object
4458 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4459 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4461 /* Do not try to express address of an object with computation based
4462 on address of a different object. This may cause problems in rtl
4463 level alias analysis (that does not expect this to be happening,
4464 as this is illegal in C), and would be unlikely to be useful
4465 anyway. */
4466 if (use->iv->base_object
4467 && cand->iv->base_object
4468 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4469 return infinite_cost;
4472 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4474 /* TODO -- add direct handling of this case. */
4475 goto fallback;
4478 /* CSTEPI is removed from the offset in case statement is after the
4479 increment. If the step is not constant, we use zero instead.
4480 This is a bit imprecise (there is the extra addition), but
4481 redundancy elimination is likely to transform the code so that
4482 it uses value of the variable before increment anyway,
4483 so it is not that much unrealistic. */
4484 if (cst_and_fits_in_hwi (cstep))
4485 cstepi = int_cst_value (cstep);
4486 else
4487 cstepi = 0;
4489 if (!constant_multiple_of (ustep, cstep, &rat))
4490 return infinite_cost;
4492 if (wi::fits_shwi_p (rat))
4493 ratio = rat.to_shwi ();
4494 else
4495 return infinite_cost;
4497 STRIP_NOPS (cbase);
4498 ctype = TREE_TYPE (cbase);
4500 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4502 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4503 or ratio == 1, it is better to handle this like
4505 ubase - ratio * cbase + ratio * var
4507 (also holds in the case ratio == -1, TODO. */
4509 if (cst_and_fits_in_hwi (cbase))
4511 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4512 cost = difference_cost (data,
4513 ubase, build_int_cst (utype, 0),
4514 &symbol_present, &var_present, &offset,
4515 depends_on);
4516 cost.cost /= avg_loop_niter (data->current_loop);
4518 else if (ratio == 1)
4520 tree real_cbase = cbase;
4522 /* Check to see if any adjustment is needed. */
4523 if (cstepi == 0 && stmt_is_after_inc)
4525 aff_tree real_cbase_aff;
4526 aff_tree cstep_aff;
4528 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4529 &real_cbase_aff);
4530 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4532 aff_combination_add (&real_cbase_aff, &cstep_aff);
4533 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4536 cost = difference_cost (data,
4537 ubase, real_cbase,
4538 &symbol_present, &var_present, &offset,
4539 depends_on);
4540 cost.cost /= avg_loop_niter (data->current_loop);
4542 else if (address_p
4543 && !POINTER_TYPE_P (ctype)
4544 && multiplier_allowed_in_address_p
4545 (ratio, mem_mode,
4546 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4548 cbase
4549 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4550 cost = difference_cost (data,
4551 ubase, cbase,
4552 &symbol_present, &var_present, &offset,
4553 depends_on);
4554 cost.cost /= avg_loop_niter (data->current_loop);
4556 else
4558 cost = force_var_cost (data, cbase, depends_on);
4559 cost = add_costs (cost,
4560 difference_cost (data,
4561 ubase, build_int_cst (utype, 0),
4562 &symbol_present, &var_present,
4563 &offset, depends_on));
4564 cost.cost /= avg_loop_niter (data->current_loop);
4565 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4568 /* Set of invariants depended on by sub use has already been computed
4569 for the first use in the group. */
4570 if (use->sub_id)
4572 cost.cost = 0;
4573 if (depends_on && *depends_on)
4574 bitmap_clear (*depends_on);
4576 else if (inv_expr_id)
4578 *inv_expr_id =
4579 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4580 /* Clear depends on. */
4581 if (*inv_expr_id != -1 && depends_on && *depends_on)
4582 bitmap_clear (*depends_on);
4585 /* If we are after the increment, the value of the candidate is higher by
4586 one iteration. */
4587 if (stmt_is_after_inc)
4588 offset -= ratio * cstepi;
4590 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4591 (symbol/var1/const parts may be omitted). If we are looking for an
4592 address, find the cost of addressing this. */
4593 if (address_p)
4594 return add_costs (cost,
4595 get_address_cost (symbol_present, var_present,
4596 offset, ratio, cstepi,
4597 mem_mode,
4598 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4599 speed, stmt_is_after_inc,
4600 can_autoinc));
4602 /* Otherwise estimate the costs for computing the expression. */
4603 if (!symbol_present && !var_present && !offset)
4605 if (ratio != 1)
4606 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4607 return cost;
4610 /* Symbol + offset should be compile-time computable so consider that they
4611 are added once to the variable, if present. */
4612 if (var_present && (symbol_present || offset))
4613 cost.cost += adjust_setup_cost (data,
4614 add_cost (speed, TYPE_MODE (ctype)));
4616 /* Having offset does not affect runtime cost in case it is added to
4617 symbol, but it increases complexity. */
4618 if (offset)
4619 cost.complexity++;
4621 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4623 aratio = ratio > 0 ? ratio : -ratio;
4624 if (aratio != 1)
4625 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4626 return cost;
4628 fallback:
4629 if (can_autoinc)
4630 *can_autoinc = false;
4633 /* Just get the expression, expand it and measure the cost. */
4634 tree comp = get_computation_at (data->current_loop, use, cand, at);
4636 if (!comp)
4637 return infinite_cost;
4639 if (address_p)
4640 comp = build_simple_mem_ref (comp);
4642 return new_cost (computation_cost (comp, speed), 0);
4646 /* Determines the cost of the computation by that USE is expressed
4647 from induction variable CAND. If ADDRESS_P is true, we just need
4648 to create an address from it, otherwise we want to get it into
4649 register. A set of invariants we depend on is stored in
4650 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4651 autoinc addressing is likely. */
4653 static comp_cost
4654 get_computation_cost (struct ivopts_data *data,
4655 struct iv_use *use, struct iv_cand *cand,
4656 bool address_p, bitmap *depends_on,
4657 bool *can_autoinc, int *inv_expr_id)
4659 return get_computation_cost_at (data,
4660 use, cand, address_p, depends_on, use->stmt,
4661 can_autoinc, inv_expr_id);
4664 /* Determines cost of basing replacement of USE on CAND in a generic
4665 expression. */
4667 static bool
4668 determine_use_iv_cost_generic (struct ivopts_data *data,
4669 struct iv_use *use, struct iv_cand *cand)
4671 bitmap depends_on;
4672 comp_cost cost;
4673 int inv_expr_id = -1;
4675 /* The simple case first -- if we need to express value of the preserved
4676 original biv, the cost is 0. This also prevents us from counting the
4677 cost of increment twice -- once at this use and once in the cost of
4678 the candidate. */
4679 if (cand->pos == IP_ORIGINAL
4680 && cand->incremented_at == use->stmt)
4682 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4683 ERROR_MARK, -1);
4684 return true;
4687 cost = get_computation_cost (data, use, cand, false, &depends_on,
4688 NULL, &inv_expr_id);
4690 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4691 inv_expr_id);
4693 return !infinite_cost_p (cost);
4696 /* Determines cost of basing replacement of USE on CAND in an address. */
4698 static bool
4699 determine_use_iv_cost_address (struct ivopts_data *data,
4700 struct iv_use *use, struct iv_cand *cand)
4702 bitmap depends_on;
4703 bool can_autoinc;
4704 int inv_expr_id = -1;
4705 struct iv_use *sub_use;
4706 comp_cost sub_cost;
4707 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4708 &can_autoinc, &inv_expr_id);
4710 if (cand->ainc_use == use)
4712 if (can_autoinc)
4713 cost.cost -= cand->cost_step;
4714 /* If we generated the candidate solely for exploiting autoincrement
4715 opportunities, and it turns out it can't be used, set the cost to
4716 infinity to make sure we ignore it. */
4717 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4718 cost = infinite_cost;
4720 for (sub_use = use->next;
4721 sub_use && !infinite_cost_p (cost);
4722 sub_use = sub_use->next)
4724 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4725 &can_autoinc, &inv_expr_id);
4726 cost = add_costs (cost, sub_cost);
4729 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4730 inv_expr_id);
4732 return !infinite_cost_p (cost);
4735 /* Computes value of candidate CAND at position AT in iteration NITER, and
4736 stores it to VAL. */
4738 static void
4739 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4740 aff_tree *val)
4742 aff_tree step, delta, nit;
4743 struct iv *iv = cand->iv;
4744 tree type = TREE_TYPE (iv->base);
4745 tree steptype = type;
4746 if (POINTER_TYPE_P (type))
4747 steptype = sizetype;
4748 steptype = unsigned_type_for (type);
4750 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4751 aff_combination_convert (&step, steptype);
4752 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4753 aff_combination_convert (&nit, steptype);
4754 aff_combination_mult (&nit, &step, &delta);
4755 if (stmt_after_increment (loop, cand, at))
4756 aff_combination_add (&delta, &step);
4758 tree_to_aff_combination (iv->base, type, val);
4759 if (!POINTER_TYPE_P (type))
4760 aff_combination_convert (val, steptype);
4761 aff_combination_add (val, &delta);
4764 /* Returns period of induction variable iv. */
4766 static tree
4767 iv_period (struct iv *iv)
4769 tree step = iv->step, period, type;
4770 tree pow2div;
4772 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4774 type = unsigned_type_for (TREE_TYPE (step));
4775 /* Period of the iv is lcm (step, type_range)/step -1,
4776 i.e., N*type_range/step - 1. Since type range is power
4777 of two, N == (step >> num_of_ending_zeros_binary (step),
4778 so the final result is
4780 (type_range >> num_of_ending_zeros_binary (step)) - 1
4783 pow2div = num_ending_zeros (step);
4785 period = build_low_bits_mask (type,
4786 (TYPE_PRECISION (type)
4787 - tree_to_uhwi (pow2div)));
4789 return period;
4792 /* Returns the comparison operator used when eliminating the iv USE. */
4794 static enum tree_code
4795 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4797 struct loop *loop = data->current_loop;
4798 basic_block ex_bb;
4799 edge exit;
4801 ex_bb = gimple_bb (use->stmt);
4802 exit = EDGE_SUCC (ex_bb, 0);
4803 if (flow_bb_inside_loop_p (loop, exit->dest))
4804 exit = EDGE_SUCC (ex_bb, 1);
4806 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4809 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4810 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4811 calculation is performed in non-wrapping type.
4813 TODO: More generally, we could test for the situation that
4814 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4815 This would require knowing the sign of OFFSET. */
4817 static bool
4818 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4820 enum tree_code code;
4821 tree e1, e2;
4822 aff_tree aff_e1, aff_e2, aff_offset;
4824 if (!nowrap_type_p (TREE_TYPE (base)))
4825 return false;
4827 base = expand_simple_operations (base);
4829 if (TREE_CODE (base) == SSA_NAME)
4831 gimple stmt = SSA_NAME_DEF_STMT (base);
4833 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4834 return false;
4836 code = gimple_assign_rhs_code (stmt);
4837 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4838 return false;
4840 e1 = gimple_assign_rhs1 (stmt);
4841 e2 = gimple_assign_rhs2 (stmt);
4843 else
4845 code = TREE_CODE (base);
4846 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4847 return false;
4848 e1 = TREE_OPERAND (base, 0);
4849 e2 = TREE_OPERAND (base, 1);
4852 /* Use affine expansion as deeper inspection to prove the equality. */
4853 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4854 &aff_e2, &data->name_expansion_cache);
4855 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4856 &aff_offset, &data->name_expansion_cache);
4857 aff_combination_scale (&aff_offset, -1);
4858 switch (code)
4860 case PLUS_EXPR:
4861 aff_combination_add (&aff_e2, &aff_offset);
4862 if (aff_combination_zero_p (&aff_e2))
4863 return true;
4865 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4866 &aff_e1, &data->name_expansion_cache);
4867 aff_combination_add (&aff_e1, &aff_offset);
4868 return aff_combination_zero_p (&aff_e1);
4870 case POINTER_PLUS_EXPR:
4871 aff_combination_add (&aff_e2, &aff_offset);
4872 return aff_combination_zero_p (&aff_e2);
4874 default:
4875 return false;
4879 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4880 comparison with CAND. NITER describes the number of iterations of
4881 the loops. If successful, the comparison in COMP_P is altered accordingly.
4883 We aim to handle the following situation:
4885 sometype *base, *p;
4886 int a, b, i;
4888 i = a;
4889 p = p_0 = base + a;
4893 bla (*p);
4894 p++;
4895 i++;
4897 while (i < b);
4899 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4900 We aim to optimize this to
4902 p = p_0 = base + a;
4905 bla (*p);
4906 p++;
4908 while (p < p_0 - a + b);
4910 This preserves the correctness, since the pointer arithmetics does not
4911 overflow. More precisely:
4913 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4914 overflow in computing it or the values of p.
4915 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4916 overflow. To prove this, we use the fact that p_0 = base + a. */
4918 static bool
4919 iv_elimination_compare_lt (struct ivopts_data *data,
4920 struct iv_cand *cand, enum tree_code *comp_p,
4921 struct tree_niter_desc *niter)
4923 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4924 struct aff_tree nit, tmpa, tmpb;
4925 enum tree_code comp;
4926 HOST_WIDE_INT step;
4928 /* We need to know that the candidate induction variable does not overflow.
4929 While more complex analysis may be used to prove this, for now just
4930 check that the variable appears in the original program and that it
4931 is computed in a type that guarantees no overflows. */
4932 cand_type = TREE_TYPE (cand->iv->base);
4933 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4934 return false;
4936 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4937 the calculation of the BOUND could overflow, making the comparison
4938 invalid. */
4939 if (!data->loop_single_exit_p)
4940 return false;
4942 /* We need to be able to decide whether candidate is increasing or decreasing
4943 in order to choose the right comparison operator. */
4944 if (!cst_and_fits_in_hwi (cand->iv->step))
4945 return false;
4946 step = int_cst_value (cand->iv->step);
4948 /* Check that the number of iterations matches the expected pattern:
4949 a + 1 > b ? 0 : b - a - 1. */
4950 mbz = niter->may_be_zero;
4951 if (TREE_CODE (mbz) == GT_EXPR)
4953 /* Handle a + 1 > b. */
4954 tree op0 = TREE_OPERAND (mbz, 0);
4955 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4957 a = TREE_OPERAND (op0, 0);
4958 b = TREE_OPERAND (mbz, 1);
4960 else
4961 return false;
4963 else if (TREE_CODE (mbz) == LT_EXPR)
4965 tree op1 = TREE_OPERAND (mbz, 1);
4967 /* Handle b < a + 1. */
4968 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4970 a = TREE_OPERAND (op1, 0);
4971 b = TREE_OPERAND (mbz, 0);
4973 else
4974 return false;
4976 else
4977 return false;
4979 /* Expected number of iterations is B - A - 1. Check that it matches
4980 the actual number, i.e., that B - A - NITER = 1. */
4981 tree_to_aff_combination (niter->niter, nit_type, &nit);
4982 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4983 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4984 aff_combination_scale (&nit, -1);
4985 aff_combination_scale (&tmpa, -1);
4986 aff_combination_add (&tmpb, &tmpa);
4987 aff_combination_add (&tmpb, &nit);
4988 if (tmpb.n != 0 || tmpb.offset != 1)
4989 return false;
4991 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4992 overflow. */
4993 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4994 cand->iv->step,
4995 fold_convert (TREE_TYPE (cand->iv->step), a));
4996 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4997 return false;
4999 /* Determine the new comparison operator. */
5000 comp = step < 0 ? GT_EXPR : LT_EXPR;
5001 if (*comp_p == NE_EXPR)
5002 *comp_p = comp;
5003 else if (*comp_p == EQ_EXPR)
5004 *comp_p = invert_tree_comparison (comp, false);
5005 else
5006 gcc_unreachable ();
5008 return true;
5011 /* Check whether it is possible to express the condition in USE by comparison
5012 of candidate CAND. If so, store the value compared with to BOUND, and the
5013 comparison operator to COMP. */
5015 static bool
5016 may_eliminate_iv (struct ivopts_data *data,
5017 struct iv_use *use, struct iv_cand *cand, tree *bound,
5018 enum tree_code *comp)
5020 basic_block ex_bb;
5021 edge exit;
5022 tree period;
5023 struct loop *loop = data->current_loop;
5024 aff_tree bnd;
5025 struct tree_niter_desc *desc = NULL;
5027 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5028 return false;
5030 /* For now works only for exits that dominate the loop latch.
5031 TODO: extend to other conditions inside loop body. */
5032 ex_bb = gimple_bb (use->stmt);
5033 if (use->stmt != last_stmt (ex_bb)
5034 || gimple_code (use->stmt) != GIMPLE_COND
5035 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5036 return false;
5038 exit = EDGE_SUCC (ex_bb, 0);
5039 if (flow_bb_inside_loop_p (loop, exit->dest))
5040 exit = EDGE_SUCC (ex_bb, 1);
5041 if (flow_bb_inside_loop_p (loop, exit->dest))
5042 return false;
5044 desc = niter_for_exit (data, exit);
5045 if (!desc)
5046 return false;
5048 /* Determine whether we can use the variable to test the exit condition.
5049 This is the case iff the period of the induction variable is greater
5050 than the number of iterations for which the exit condition is true. */
5051 period = iv_period (cand->iv);
5053 /* If the number of iterations is constant, compare against it directly. */
5054 if (TREE_CODE (desc->niter) == INTEGER_CST)
5056 /* See cand_value_at. */
5057 if (stmt_after_increment (loop, cand, use->stmt))
5059 if (!tree_int_cst_lt (desc->niter, period))
5060 return false;
5062 else
5064 if (tree_int_cst_lt (period, desc->niter))
5065 return false;
5069 /* If not, and if this is the only possible exit of the loop, see whether
5070 we can get a conservative estimate on the number of iterations of the
5071 entire loop and compare against that instead. */
5072 else
5074 widest_int period_value, max_niter;
5076 max_niter = desc->max;
5077 if (stmt_after_increment (loop, cand, use->stmt))
5078 max_niter += 1;
5079 period_value = wi::to_widest (period);
5080 if (wi::gtu_p (max_niter, period_value))
5082 /* See if we can take advantage of inferred loop bound information. */
5083 if (data->loop_single_exit_p)
5085 if (!max_loop_iterations (loop, &max_niter))
5086 return false;
5087 /* The loop bound is already adjusted by adding 1. */
5088 if (wi::gtu_p (max_niter, period_value))
5089 return false;
5091 else
5092 return false;
5096 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5098 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5099 aff_combination_to_tree (&bnd));
5100 *comp = iv_elimination_compare (data, use);
5102 /* It is unlikely that computing the number of iterations using division
5103 would be more profitable than keeping the original induction variable. */
5104 if (expression_expensive_p (*bound))
5105 return false;
5107 /* Sometimes, it is possible to handle the situation that the number of
5108 iterations may be zero unless additional assumtions by using <
5109 instead of != in the exit condition.
5111 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5112 base the exit condition on it. However, that is often too
5113 expensive. */
5114 if (!integer_zerop (desc->may_be_zero))
5115 return iv_elimination_compare_lt (data, cand, comp, desc);
5117 return true;
5120 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5121 be copied, if is is used in the loop body and DATA->body_includes_call. */
5123 static int
5124 parm_decl_cost (struct ivopts_data *data, tree bound)
5126 tree sbound = bound;
5127 STRIP_NOPS (sbound);
5129 if (TREE_CODE (sbound) == SSA_NAME
5130 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5131 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5132 && data->body_includes_call)
5133 return COSTS_N_INSNS (1);
5135 return 0;
5138 /* Determines cost of basing replacement of USE on CAND in a condition. */
5140 static bool
5141 determine_use_iv_cost_condition (struct ivopts_data *data,
5142 struct iv_use *use, struct iv_cand *cand)
5144 tree bound = NULL_TREE;
5145 struct iv *cmp_iv;
5146 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5147 comp_cost elim_cost, express_cost, cost, bound_cost;
5148 bool ok;
5149 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5150 tree *control_var, *bound_cst;
5151 enum tree_code comp = ERROR_MARK;
5153 /* Only consider real candidates. */
5154 if (!cand->iv)
5156 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5157 ERROR_MARK, -1);
5158 return false;
5161 /* Try iv elimination. */
5162 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5164 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5165 if (elim_cost.cost == 0)
5166 elim_cost.cost = parm_decl_cost (data, bound);
5167 else if (TREE_CODE (bound) == INTEGER_CST)
5168 elim_cost.cost = 0;
5169 /* If we replace a loop condition 'i < n' with 'p < base + n',
5170 depends_on_elim will have 'base' and 'n' set, which implies
5171 that both 'base' and 'n' will be live during the loop. More likely,
5172 'base + n' will be loop invariant, resulting in only one live value
5173 during the loop. So in that case we clear depends_on_elim and set
5174 elim_inv_expr_id instead. */
5175 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5177 elim_inv_expr_id = get_expr_id (data, bound);
5178 bitmap_clear (depends_on_elim);
5180 /* The bound is a loop invariant, so it will be only computed
5181 once. */
5182 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5184 else
5185 elim_cost = infinite_cost;
5187 /* Try expressing the original giv. If it is compared with an invariant,
5188 note that we cannot get rid of it. */
5189 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5190 NULL, &cmp_iv);
5191 gcc_assert (ok);
5193 /* When the condition is a comparison of the candidate IV against
5194 zero, prefer this IV.
5196 TODO: The constant that we're subtracting from the cost should
5197 be target-dependent. This information should be added to the
5198 target costs for each backend. */
5199 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5200 && integer_zerop (*bound_cst)
5201 && (operand_equal_p (*control_var, cand->var_after, 0)
5202 || operand_equal_p (*control_var, cand->var_before, 0)))
5203 elim_cost.cost -= 1;
5205 express_cost = get_computation_cost (data, use, cand, false,
5206 &depends_on_express, NULL,
5207 &express_inv_expr_id);
5208 fd_ivopts_data = data;
5209 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5211 /* Count the cost of the original bound as well. */
5212 bound_cost = force_var_cost (data, *bound_cst, NULL);
5213 if (bound_cost.cost == 0)
5214 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5215 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5216 bound_cost.cost = 0;
5217 express_cost.cost += bound_cost.cost;
5219 /* Choose the better approach, preferring the eliminated IV. */
5220 if (compare_costs (elim_cost, express_cost) <= 0)
5222 cost = elim_cost;
5223 depends_on = depends_on_elim;
5224 depends_on_elim = NULL;
5225 inv_expr_id = elim_inv_expr_id;
5227 else
5229 cost = express_cost;
5230 depends_on = depends_on_express;
5231 depends_on_express = NULL;
5232 bound = NULL_TREE;
5233 comp = ERROR_MARK;
5234 inv_expr_id = express_inv_expr_id;
5237 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5239 if (depends_on_elim)
5240 BITMAP_FREE (depends_on_elim);
5241 if (depends_on_express)
5242 BITMAP_FREE (depends_on_express);
5244 return !infinite_cost_p (cost);
5247 /* Determines cost of basing replacement of USE on CAND. Returns false
5248 if USE cannot be based on CAND. */
5250 static bool
5251 determine_use_iv_cost (struct ivopts_data *data,
5252 struct iv_use *use, struct iv_cand *cand)
5254 switch (use->type)
5256 case USE_NONLINEAR_EXPR:
5257 return determine_use_iv_cost_generic (data, use, cand);
5259 case USE_ADDRESS:
5260 return determine_use_iv_cost_address (data, use, cand);
5262 case USE_COMPARE:
5263 return determine_use_iv_cost_condition (data, use, cand);
5265 default:
5266 gcc_unreachable ();
5270 /* Return true if get_computation_cost indicates that autoincrement is
5271 a possibility for the pair of USE and CAND, false otherwise. */
5273 static bool
5274 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5275 struct iv_cand *cand)
5277 bitmap depends_on;
5278 bool can_autoinc;
5279 comp_cost cost;
5281 if (use->type != USE_ADDRESS)
5282 return false;
5284 cost = get_computation_cost (data, use, cand, true, &depends_on,
5285 &can_autoinc, NULL);
5287 BITMAP_FREE (depends_on);
5289 return !infinite_cost_p (cost) && can_autoinc;
5292 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5293 use that allows autoincrement, and set their AINC_USE if possible. */
5295 static void
5296 set_autoinc_for_original_candidates (struct ivopts_data *data)
5298 unsigned i, j;
5300 for (i = 0; i < n_iv_cands (data); i++)
5302 struct iv_cand *cand = iv_cand (data, i);
5303 struct iv_use *closest_before = NULL;
5304 struct iv_use *closest_after = NULL;
5305 if (cand->pos != IP_ORIGINAL)
5306 continue;
5308 for (j = 0; j < n_iv_uses (data); j++)
5310 struct iv_use *use = iv_use (data, j);
5311 unsigned uid = gimple_uid (use->stmt);
5313 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5314 continue;
5316 if (uid < gimple_uid (cand->incremented_at)
5317 && (closest_before == NULL
5318 || uid > gimple_uid (closest_before->stmt)))
5319 closest_before = use;
5321 if (uid > gimple_uid (cand->incremented_at)
5322 && (closest_after == NULL
5323 || uid < gimple_uid (closest_after->stmt)))
5324 closest_after = use;
5327 if (closest_before != NULL
5328 && autoinc_possible_for_pair (data, closest_before, cand))
5329 cand->ainc_use = closest_before;
5330 else if (closest_after != NULL
5331 && autoinc_possible_for_pair (data, closest_after, cand))
5332 cand->ainc_use = closest_after;
5336 /* Finds the candidates for the induction variables. */
5338 static void
5339 find_iv_candidates (struct ivopts_data *data)
5341 /* Add commonly used ivs. */
5342 add_standard_iv_candidates (data);
5344 /* Add old induction variables. */
5345 add_old_ivs_candidates (data);
5347 /* Add induction variables derived from uses. */
5348 add_derived_ivs_candidates (data);
5350 set_autoinc_for_original_candidates (data);
5352 /* Record the important candidates. */
5353 record_important_candidates (data);
5356 /* Determines costs of basing the use of the iv on an iv candidate. */
5358 static void
5359 determine_use_iv_costs (struct ivopts_data *data)
5361 unsigned i, j;
5362 struct iv_use *use;
5363 struct iv_cand *cand;
5364 bitmap to_clear = BITMAP_ALLOC (NULL);
5366 alloc_use_cost_map (data);
5368 for (i = 0; i < n_iv_uses (data); i++)
5370 use = iv_use (data, i);
5372 if (data->consider_all_candidates)
5374 for (j = 0; j < n_iv_cands (data); j++)
5376 cand = iv_cand (data, j);
5377 determine_use_iv_cost (data, use, cand);
5380 else
5382 bitmap_iterator bi;
5384 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5386 cand = iv_cand (data, j);
5387 if (!determine_use_iv_cost (data, use, cand))
5388 bitmap_set_bit (to_clear, j);
5391 /* Remove the candidates for that the cost is infinite from
5392 the list of related candidates. */
5393 bitmap_and_compl_into (use->related_cands, to_clear);
5394 bitmap_clear (to_clear);
5398 BITMAP_FREE (to_clear);
5400 if (dump_file && (dump_flags & TDF_DETAILS))
5402 fprintf (dump_file, "Use-candidate costs:\n");
5404 for (i = 0; i < n_iv_uses (data); i++)
5406 use = iv_use (data, i);
5408 fprintf (dump_file, "Use %d:\n", i);
5409 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5410 for (j = 0; j < use->n_map_members; j++)
5412 if (!use->cost_map[j].cand
5413 || infinite_cost_p (use->cost_map[j].cost))
5414 continue;
5416 fprintf (dump_file, " %d\t%d\t%d\t",
5417 use->cost_map[j].cand->id,
5418 use->cost_map[j].cost.cost,
5419 use->cost_map[j].cost.complexity);
5420 if (use->cost_map[j].depends_on)
5421 bitmap_print (dump_file,
5422 use->cost_map[j].depends_on, "","");
5423 if (use->cost_map[j].inv_expr_id != -1)
5424 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5425 fprintf (dump_file, "\n");
5428 fprintf (dump_file, "\n");
5430 fprintf (dump_file, "\n");
5434 /* Determines cost of the candidate CAND. */
5436 static void
5437 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5439 comp_cost cost_base;
5440 unsigned cost, cost_step;
5441 tree base;
5443 if (!cand->iv)
5445 cand->cost = 0;
5446 return;
5449 /* There are two costs associated with the candidate -- its increment
5450 and its initialization. The second is almost negligible for any loop
5451 that rolls enough, so we take it just very little into account. */
5453 base = cand->iv->base;
5454 cost_base = force_var_cost (data, base, NULL);
5455 /* It will be exceptional that the iv register happens to be initialized with
5456 the proper value at no cost. In general, there will at least be a regcopy
5457 or a const set. */
5458 if (cost_base.cost == 0)
5459 cost_base.cost = COSTS_N_INSNS (1);
5460 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5462 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5464 /* Prefer the original ivs unless we may gain something by replacing it.
5465 The reason is to make debugging simpler; so this is not relevant for
5466 artificial ivs created by other optimization passes. */
5467 if (cand->pos != IP_ORIGINAL
5468 || !SSA_NAME_VAR (cand->var_before)
5469 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5470 cost++;
5472 /* Prefer not to insert statements into latch unless there are some
5473 already (so that we do not create unnecessary jumps). */
5474 if (cand->pos == IP_END
5475 && empty_block_p (ip_end_pos (data->current_loop)))
5476 cost++;
5478 cand->cost = cost;
5479 cand->cost_step = cost_step;
5482 /* Determines costs of computation of the candidates. */
5484 static void
5485 determine_iv_costs (struct ivopts_data *data)
5487 unsigned i;
5489 if (dump_file && (dump_flags & TDF_DETAILS))
5491 fprintf (dump_file, "Candidate costs:\n");
5492 fprintf (dump_file, " cand\tcost\n");
5495 for (i = 0; i < n_iv_cands (data); i++)
5497 struct iv_cand *cand = iv_cand (data, i);
5499 determine_iv_cost (data, cand);
5501 if (dump_file && (dump_flags & TDF_DETAILS))
5502 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5505 if (dump_file && (dump_flags & TDF_DETAILS))
5506 fprintf (dump_file, "\n");
5509 /* Calculates cost for having SIZE induction variables. */
5511 static unsigned
5512 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5514 /* We add size to the cost, so that we prefer eliminating ivs
5515 if possible. */
5516 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5517 data->body_includes_call);
5520 /* For each size of the induction variable set determine the penalty. */
5522 static void
5523 determine_set_costs (struct ivopts_data *data)
5525 unsigned j, n;
5526 gphi *phi;
5527 gphi_iterator psi;
5528 tree op;
5529 struct loop *loop = data->current_loop;
5530 bitmap_iterator bi;
5532 if (dump_file && (dump_flags & TDF_DETAILS))
5534 fprintf (dump_file, "Global costs:\n");
5535 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5536 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5537 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5538 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5541 n = 0;
5542 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5544 phi = psi.phi ();
5545 op = PHI_RESULT (phi);
5547 if (virtual_operand_p (op))
5548 continue;
5550 if (get_iv (data, op))
5551 continue;
5553 n++;
5556 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5558 struct version_info *info = ver_info (data, j);
5560 if (info->inv_id && info->has_nonlin_use)
5561 n++;
5564 data->regs_used = n;
5565 if (dump_file && (dump_flags & TDF_DETAILS))
5566 fprintf (dump_file, " regs_used %d\n", n);
5568 if (dump_file && (dump_flags & TDF_DETAILS))
5570 fprintf (dump_file, " cost for size:\n");
5571 fprintf (dump_file, " ivs\tcost\n");
5572 for (j = 0; j <= 2 * target_avail_regs; j++)
5573 fprintf (dump_file, " %d\t%d\n", j,
5574 ivopts_global_cost_for_size (data, j));
5575 fprintf (dump_file, "\n");
5579 /* Returns true if A is a cheaper cost pair than B. */
5581 static bool
5582 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5584 int cmp;
5586 if (!a)
5587 return false;
5589 if (!b)
5590 return true;
5592 cmp = compare_costs (a->cost, b->cost);
5593 if (cmp < 0)
5594 return true;
5596 if (cmp > 0)
5597 return false;
5599 /* In case the costs are the same, prefer the cheaper candidate. */
5600 if (a->cand->cost < b->cand->cost)
5601 return true;
5603 return false;
5607 /* Returns candidate by that USE is expressed in IVS. */
5609 static struct cost_pair *
5610 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5612 return ivs->cand_for_use[use->id];
5615 /* Computes the cost field of IVS structure. */
5617 static void
5618 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5620 comp_cost cost = ivs->cand_use_cost;
5622 cost.cost += ivs->cand_cost;
5624 cost.cost += ivopts_global_cost_for_size (data,
5625 ivs->n_regs + ivs->num_used_inv_expr);
5627 ivs->cost = cost;
5630 /* Remove invariants in set INVS to set IVS. */
5632 static void
5633 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5635 bitmap_iterator bi;
5636 unsigned iid;
5638 if (!invs)
5639 return;
5641 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5643 ivs->n_invariant_uses[iid]--;
5644 if (ivs->n_invariant_uses[iid] == 0)
5645 ivs->n_regs--;
5649 /* Set USE not to be expressed by any candidate in IVS. */
5651 static void
5652 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5653 struct iv_use *use)
5655 unsigned uid = use->id, cid;
5656 struct cost_pair *cp;
5658 cp = ivs->cand_for_use[uid];
5659 if (!cp)
5660 return;
5661 cid = cp->cand->id;
5663 ivs->bad_uses++;
5664 ivs->cand_for_use[uid] = NULL;
5665 ivs->n_cand_uses[cid]--;
5667 if (ivs->n_cand_uses[cid] == 0)
5669 bitmap_clear_bit (ivs->cands, cid);
5670 /* Do not count the pseudocandidates. */
5671 if (cp->cand->iv)
5672 ivs->n_regs--;
5673 ivs->n_cands--;
5674 ivs->cand_cost -= cp->cand->cost;
5676 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5679 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5681 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5683 if (cp->inv_expr_id != -1)
5685 ivs->used_inv_expr[cp->inv_expr_id]--;
5686 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5687 ivs->num_used_inv_expr--;
5689 iv_ca_recount_cost (data, ivs);
5692 /* Add invariants in set INVS to set IVS. */
5694 static void
5695 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5697 bitmap_iterator bi;
5698 unsigned iid;
5700 if (!invs)
5701 return;
5703 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5705 ivs->n_invariant_uses[iid]++;
5706 if (ivs->n_invariant_uses[iid] == 1)
5707 ivs->n_regs++;
5711 /* Set cost pair for USE in set IVS to CP. */
5713 static void
5714 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5715 struct iv_use *use, struct cost_pair *cp)
5717 unsigned uid = use->id, cid;
5719 if (ivs->cand_for_use[uid] == cp)
5720 return;
5722 if (ivs->cand_for_use[uid])
5723 iv_ca_set_no_cp (data, ivs, use);
5725 if (cp)
5727 cid = cp->cand->id;
5729 ivs->bad_uses--;
5730 ivs->cand_for_use[uid] = cp;
5731 ivs->n_cand_uses[cid]++;
5732 if (ivs->n_cand_uses[cid] == 1)
5734 bitmap_set_bit (ivs->cands, cid);
5735 /* Do not count the pseudocandidates. */
5736 if (cp->cand->iv)
5737 ivs->n_regs++;
5738 ivs->n_cands++;
5739 ivs->cand_cost += cp->cand->cost;
5741 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5744 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5745 iv_ca_set_add_invariants (ivs, cp->depends_on);
5747 if (cp->inv_expr_id != -1)
5749 ivs->used_inv_expr[cp->inv_expr_id]++;
5750 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5751 ivs->num_used_inv_expr++;
5753 iv_ca_recount_cost (data, ivs);
5757 /* Extend set IVS by expressing USE by some of the candidates in it
5758 if possible. Consider all important candidates if candidates in
5759 set IVS don't give any result. */
5761 static void
5762 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5763 struct iv_use *use)
5765 struct cost_pair *best_cp = NULL, *cp;
5766 bitmap_iterator bi;
5767 unsigned i;
5768 struct iv_cand *cand;
5770 gcc_assert (ivs->upto >= use->id);
5771 ivs->upto++;
5772 ivs->bad_uses++;
5774 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5776 cand = iv_cand (data, i);
5777 cp = get_use_iv_cost (data, use, cand);
5778 if (cheaper_cost_pair (cp, best_cp))
5779 best_cp = cp;
5782 if (best_cp == NULL)
5784 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5786 cand = iv_cand (data, i);
5787 cp = get_use_iv_cost (data, use, cand);
5788 if (cheaper_cost_pair (cp, best_cp))
5789 best_cp = cp;
5793 iv_ca_set_cp (data, ivs, use, best_cp);
5796 /* Get cost for assignment IVS. */
5798 static comp_cost
5799 iv_ca_cost (struct iv_ca *ivs)
5801 /* This was a conditional expression but it triggered a bug in
5802 Sun C 5.5. */
5803 if (ivs->bad_uses)
5804 return infinite_cost;
5805 else
5806 return ivs->cost;
5809 /* Returns true if all dependences of CP are among invariants in IVS. */
5811 static bool
5812 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5814 unsigned i;
5815 bitmap_iterator bi;
5817 if (!cp->depends_on)
5818 return true;
5820 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5822 if (ivs->n_invariant_uses[i] == 0)
5823 return false;
5826 return true;
5829 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5830 it before NEXT_CHANGE. */
5832 static struct iv_ca_delta *
5833 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5834 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5836 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5838 change->use = use;
5839 change->old_cp = old_cp;
5840 change->new_cp = new_cp;
5841 change->next_change = next_change;
5843 return change;
5846 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5847 are rewritten. */
5849 static struct iv_ca_delta *
5850 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5852 struct iv_ca_delta *last;
5854 if (!l2)
5855 return l1;
5857 if (!l1)
5858 return l2;
5860 for (last = l1; last->next_change; last = last->next_change)
5861 continue;
5862 last->next_change = l2;
5864 return l1;
5867 /* Reverse the list of changes DELTA, forming the inverse to it. */
5869 static struct iv_ca_delta *
5870 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5872 struct iv_ca_delta *act, *next, *prev = NULL;
5874 for (act = delta; act; act = next)
5876 next = act->next_change;
5877 act->next_change = prev;
5878 prev = act;
5880 std::swap (act->old_cp, act->new_cp);
5883 return prev;
5886 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5887 reverted instead. */
5889 static void
5890 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5891 struct iv_ca_delta *delta, bool forward)
5893 struct cost_pair *from, *to;
5894 struct iv_ca_delta *act;
5896 if (!forward)
5897 delta = iv_ca_delta_reverse (delta);
5899 for (act = delta; act; act = act->next_change)
5901 from = act->old_cp;
5902 to = act->new_cp;
5903 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5904 iv_ca_set_cp (data, ivs, act->use, to);
5907 if (!forward)
5908 iv_ca_delta_reverse (delta);
5911 /* Returns true if CAND is used in IVS. */
5913 static bool
5914 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5916 return ivs->n_cand_uses[cand->id] > 0;
5919 /* Returns number of induction variable candidates in the set IVS. */
5921 static unsigned
5922 iv_ca_n_cands (struct iv_ca *ivs)
5924 return ivs->n_cands;
5927 /* Free the list of changes DELTA. */
5929 static void
5930 iv_ca_delta_free (struct iv_ca_delta **delta)
5932 struct iv_ca_delta *act, *next;
5934 for (act = *delta; act; act = next)
5936 next = act->next_change;
5937 free (act);
5940 *delta = NULL;
5943 /* Allocates new iv candidates assignment. */
5945 static struct iv_ca *
5946 iv_ca_new (struct ivopts_data *data)
5948 struct iv_ca *nw = XNEW (struct iv_ca);
5950 nw->upto = 0;
5951 nw->bad_uses = 0;
5952 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5953 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5954 nw->cands = BITMAP_ALLOC (NULL);
5955 nw->n_cands = 0;
5956 nw->n_regs = 0;
5957 nw->cand_use_cost = no_cost;
5958 nw->cand_cost = 0;
5959 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5960 nw->cost = no_cost;
5961 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5962 nw->num_used_inv_expr = 0;
5964 return nw;
5967 /* Free memory occupied by the set IVS. */
5969 static void
5970 iv_ca_free (struct iv_ca **ivs)
5972 free ((*ivs)->cand_for_use);
5973 free ((*ivs)->n_cand_uses);
5974 BITMAP_FREE ((*ivs)->cands);
5975 free ((*ivs)->n_invariant_uses);
5976 free ((*ivs)->used_inv_expr);
5977 free (*ivs);
5978 *ivs = NULL;
5981 /* Dumps IVS to FILE. */
5983 static void
5984 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5986 const char *pref = " invariants ";
5987 unsigned i;
5988 comp_cost cost = iv_ca_cost (ivs);
5990 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5991 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5992 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5993 bitmap_print (file, ivs->cands, " candidates: ","\n");
5995 for (i = 0; i < ivs->upto; i++)
5997 struct iv_use *use = iv_use (data, i);
5998 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5999 if (cp)
6000 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6001 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6002 else
6003 fprintf (file, " use:%d --> ??\n", use->id);
6006 for (i = 1; i <= data->max_inv_id; i++)
6007 if (ivs->n_invariant_uses[i])
6009 fprintf (file, "%s%d", pref, i);
6010 pref = ", ";
6012 fprintf (file, "\n\n");
6015 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6016 new set, and store differences in DELTA. Number of induction variables
6017 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6018 the function will try to find a solution with mimimal iv candidates. */
6020 static comp_cost
6021 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6022 struct iv_cand *cand, struct iv_ca_delta **delta,
6023 unsigned *n_ivs, bool min_ncand)
6025 unsigned i;
6026 comp_cost cost;
6027 struct iv_use *use;
6028 struct cost_pair *old_cp, *new_cp;
6030 *delta = NULL;
6031 for (i = 0; i < ivs->upto; i++)
6033 use = iv_use (data, i);
6034 old_cp = iv_ca_cand_for_use (ivs, use);
6036 if (old_cp
6037 && old_cp->cand == cand)
6038 continue;
6040 new_cp = get_use_iv_cost (data, use, cand);
6041 if (!new_cp)
6042 continue;
6044 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6045 continue;
6047 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6048 continue;
6050 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6053 iv_ca_delta_commit (data, ivs, *delta, true);
6054 cost = iv_ca_cost (ivs);
6055 if (n_ivs)
6056 *n_ivs = iv_ca_n_cands (ivs);
6057 iv_ca_delta_commit (data, ivs, *delta, false);
6059 return cost;
6062 /* Try narrowing set IVS by removing CAND. Return the cost of
6063 the new set and store the differences in DELTA. START is
6064 the candidate with which we start narrowing. */
6066 static comp_cost
6067 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6068 struct iv_cand *cand, struct iv_cand *start,
6069 struct iv_ca_delta **delta)
6071 unsigned i, ci;
6072 struct iv_use *use;
6073 struct cost_pair *old_cp, *new_cp, *cp;
6074 bitmap_iterator bi;
6075 struct iv_cand *cnd;
6076 comp_cost cost, best_cost, acost;
6078 *delta = NULL;
6079 for (i = 0; i < n_iv_uses (data); i++)
6081 use = iv_use (data, i);
6083 old_cp = iv_ca_cand_for_use (ivs, use);
6084 if (old_cp->cand != cand)
6085 continue;
6087 best_cost = iv_ca_cost (ivs);
6088 /* Start narrowing with START. */
6089 new_cp = get_use_iv_cost (data, use, start);
6091 if (data->consider_all_candidates)
6093 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6095 if (ci == cand->id || (start && ci == start->id))
6096 continue;
6098 cnd = iv_cand (data, ci);
6100 cp = get_use_iv_cost (data, use, cnd);
6101 if (!cp)
6102 continue;
6104 iv_ca_set_cp (data, ivs, use, cp);
6105 acost = iv_ca_cost (ivs);
6107 if (compare_costs (acost, best_cost) < 0)
6109 best_cost = acost;
6110 new_cp = cp;
6114 else
6116 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6118 if (ci == cand->id || (start && ci == start->id))
6119 continue;
6121 cnd = iv_cand (data, ci);
6123 cp = get_use_iv_cost (data, use, cnd);
6124 if (!cp)
6125 continue;
6127 iv_ca_set_cp (data, ivs, use, cp);
6128 acost = iv_ca_cost (ivs);
6130 if (compare_costs (acost, best_cost) < 0)
6132 best_cost = acost;
6133 new_cp = cp;
6137 /* Restore to old cp for use. */
6138 iv_ca_set_cp (data, ivs, use, old_cp);
6140 if (!new_cp)
6142 iv_ca_delta_free (delta);
6143 return infinite_cost;
6146 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6149 iv_ca_delta_commit (data, ivs, *delta, true);
6150 cost = iv_ca_cost (ivs);
6151 iv_ca_delta_commit (data, ivs, *delta, false);
6153 return cost;
6156 /* Try optimizing the set of candidates IVS by removing candidates different
6157 from to EXCEPT_CAND from it. Return cost of the new set, and store
6158 differences in DELTA. */
6160 static comp_cost
6161 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6162 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6164 bitmap_iterator bi;
6165 struct iv_ca_delta *act_delta, *best_delta;
6166 unsigned i;
6167 comp_cost best_cost, acost;
6168 struct iv_cand *cand;
6170 best_delta = NULL;
6171 best_cost = iv_ca_cost (ivs);
6173 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6175 cand = iv_cand (data, i);
6177 if (cand == except_cand)
6178 continue;
6180 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6182 if (compare_costs (acost, best_cost) < 0)
6184 best_cost = acost;
6185 iv_ca_delta_free (&best_delta);
6186 best_delta = act_delta;
6188 else
6189 iv_ca_delta_free (&act_delta);
6192 if (!best_delta)
6194 *delta = NULL;
6195 return best_cost;
6198 /* Recurse to possibly remove other unnecessary ivs. */
6199 iv_ca_delta_commit (data, ivs, best_delta, true);
6200 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6201 iv_ca_delta_commit (data, ivs, best_delta, false);
6202 *delta = iv_ca_delta_join (best_delta, *delta);
6203 return best_cost;
6206 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6207 cheaper local cost for USE than BEST_CP. Return pointer to
6208 the corresponding cost_pair, otherwise just return BEST_CP. */
6210 static struct cost_pair*
6211 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6212 unsigned int cand_idx, struct iv_cand *old_cand,
6213 struct cost_pair *best_cp)
6215 struct iv_cand *cand;
6216 struct cost_pair *cp;
6218 gcc_assert (old_cand != NULL && best_cp != NULL);
6219 if (cand_idx == old_cand->id)
6220 return best_cp;
6222 cand = iv_cand (data, cand_idx);
6223 cp = get_use_iv_cost (data, use, cand);
6224 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6225 return cp;
6227 return best_cp;
6230 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6231 which are used by more than one iv uses. For each of those candidates,
6232 this function tries to represent iv uses under that candidate using
6233 other ones with lower local cost, then tries to prune the new set.
6234 If the new set has lower cost, It returns the new cost after recording
6235 candidate replacement in list DELTA. */
6237 static comp_cost
6238 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6239 struct iv_ca_delta **delta)
6241 bitmap_iterator bi, bj;
6242 unsigned int i, j, k;
6243 struct iv_use *use;
6244 struct iv_cand *cand;
6245 comp_cost orig_cost, acost;
6246 struct iv_ca_delta *act_delta, *tmp_delta;
6247 struct cost_pair *old_cp, *best_cp = NULL;
6249 *delta = NULL;
6250 orig_cost = iv_ca_cost (ivs);
6252 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6254 if (ivs->n_cand_uses[i] == 1
6255 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6256 continue;
6258 cand = iv_cand (data, i);
6260 act_delta = NULL;
6261 /* Represent uses under current candidate using other ones with
6262 lower local cost. */
6263 for (j = 0; j < ivs->upto; j++)
6265 use = iv_use (data, j);
6266 old_cp = iv_ca_cand_for_use (ivs, use);
6268 if (old_cp->cand != cand)
6269 continue;
6271 best_cp = old_cp;
6272 if (data->consider_all_candidates)
6273 for (k = 0; k < n_iv_cands (data); k++)
6274 best_cp = cheaper_cost_with_cand (data, use, k,
6275 old_cp->cand, best_cp);
6276 else
6277 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6278 best_cp = cheaper_cost_with_cand (data, use, k,
6279 old_cp->cand, best_cp);
6281 if (best_cp == old_cp)
6282 continue;
6284 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6286 /* No need for further prune. */
6287 if (!act_delta)
6288 continue;
6290 /* Prune the new candidate set. */
6291 iv_ca_delta_commit (data, ivs, act_delta, true);
6292 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6293 iv_ca_delta_commit (data, ivs, act_delta, false);
6294 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6296 if (compare_costs (acost, orig_cost) < 0)
6298 *delta = act_delta;
6299 return acost;
6301 else
6302 iv_ca_delta_free (&act_delta);
6305 return orig_cost;
6308 /* Tries to extend the sets IVS in the best possible way in order
6309 to express the USE. If ORIGINALP is true, prefer candidates from
6310 the original set of IVs, otherwise favor important candidates not
6311 based on any memory object. */
6313 static bool
6314 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6315 struct iv_use *use, bool originalp)
6317 comp_cost best_cost, act_cost;
6318 unsigned i;
6319 bitmap_iterator bi;
6320 struct iv_cand *cand;
6321 struct iv_ca_delta *best_delta = NULL, *act_delta;
6322 struct cost_pair *cp;
6324 iv_ca_add_use (data, ivs, use);
6325 best_cost = iv_ca_cost (ivs);
6326 cp = iv_ca_cand_for_use (ivs, use);
6327 if (cp)
6329 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6330 iv_ca_set_no_cp (data, ivs, use);
6333 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6334 first try important candidates not based on any memory object. Only if
6335 this fails, try the specific ones. Rationale -- in loops with many
6336 variables the best choice often is to use just one generic biv. If we
6337 added here many ivs specific to the uses, the optimization algorithm later
6338 would be likely to get stuck in a local minimum, thus causing us to create
6339 too many ivs. The approach from few ivs to more seems more likely to be
6340 successful -- starting from few ivs, replacing an expensive use by a
6341 specific iv should always be a win. */
6342 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6344 cand = iv_cand (data, i);
6346 if (originalp && cand->pos !=IP_ORIGINAL)
6347 continue;
6349 if (!originalp && cand->iv->base_object != NULL_TREE)
6350 continue;
6352 if (iv_ca_cand_used_p (ivs, cand))
6353 continue;
6355 cp = get_use_iv_cost (data, use, cand);
6356 if (!cp)
6357 continue;
6359 iv_ca_set_cp (data, ivs, use, cp);
6360 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6361 true);
6362 iv_ca_set_no_cp (data, ivs, use);
6363 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6365 if (compare_costs (act_cost, best_cost) < 0)
6367 best_cost = act_cost;
6369 iv_ca_delta_free (&best_delta);
6370 best_delta = act_delta;
6372 else
6373 iv_ca_delta_free (&act_delta);
6376 if (infinite_cost_p (best_cost))
6378 for (i = 0; i < use->n_map_members; i++)
6380 cp = use->cost_map + i;
6381 cand = cp->cand;
6382 if (!cand)
6383 continue;
6385 /* Already tried this. */
6386 if (cand->important)
6388 if (originalp && cand->pos == IP_ORIGINAL)
6389 continue;
6390 if (!originalp && cand->iv->base_object == NULL_TREE)
6391 continue;
6394 if (iv_ca_cand_used_p (ivs, cand))
6395 continue;
6397 act_delta = NULL;
6398 iv_ca_set_cp (data, ivs, use, cp);
6399 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6400 iv_ca_set_no_cp (data, ivs, use);
6401 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6402 cp, act_delta);
6404 if (compare_costs (act_cost, best_cost) < 0)
6406 best_cost = act_cost;
6408 if (best_delta)
6409 iv_ca_delta_free (&best_delta);
6410 best_delta = act_delta;
6412 else
6413 iv_ca_delta_free (&act_delta);
6417 iv_ca_delta_commit (data, ivs, best_delta, true);
6418 iv_ca_delta_free (&best_delta);
6420 return !infinite_cost_p (best_cost);
6423 /* Finds an initial assignment of candidates to uses. */
6425 static struct iv_ca *
6426 get_initial_solution (struct ivopts_data *data, bool originalp)
6428 struct iv_ca *ivs = iv_ca_new (data);
6429 unsigned i;
6431 for (i = 0; i < n_iv_uses (data); i++)
6432 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6434 iv_ca_free (&ivs);
6435 return NULL;
6438 return ivs;
6441 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6442 points to a bool variable, this function tries to break local
6443 optimal fixed-point by replacing candidates in IVS if it's true. */
6445 static bool
6446 try_improve_iv_set (struct ivopts_data *data,
6447 struct iv_ca *ivs, bool *try_replace_p)
6449 unsigned i, n_ivs;
6450 comp_cost acost, best_cost = iv_ca_cost (ivs);
6451 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6452 struct iv_cand *cand;
6454 /* Try extending the set of induction variables by one. */
6455 for (i = 0; i < n_iv_cands (data); i++)
6457 cand = iv_cand (data, i);
6459 if (iv_ca_cand_used_p (ivs, cand))
6460 continue;
6462 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6463 if (!act_delta)
6464 continue;
6466 /* If we successfully added the candidate and the set is small enough,
6467 try optimizing it by removing other candidates. */
6468 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6470 iv_ca_delta_commit (data, ivs, act_delta, true);
6471 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6472 iv_ca_delta_commit (data, ivs, act_delta, false);
6473 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6476 if (compare_costs (acost, best_cost) < 0)
6478 best_cost = acost;
6479 iv_ca_delta_free (&best_delta);
6480 best_delta = act_delta;
6482 else
6483 iv_ca_delta_free (&act_delta);
6486 if (!best_delta)
6488 /* Try removing the candidates from the set instead. */
6489 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6491 if (!best_delta && *try_replace_p)
6493 *try_replace_p = false;
6494 /* So far candidate selecting algorithm tends to choose fewer IVs
6495 so that it can handle cases in which loops have many variables
6496 but the best choice is often to use only one general biv. One
6497 weakness is it can't handle opposite cases, in which different
6498 candidates should be chosen with respect to each use. To solve
6499 the problem, we replace candidates in a manner described by the
6500 comments of iv_ca_replace, thus give general algorithm a chance
6501 to break local optimal fixed-point in these cases. */
6502 best_cost = iv_ca_replace (data, ivs, &best_delta);
6505 if (!best_delta)
6506 return false;
6509 iv_ca_delta_commit (data, ivs, best_delta, true);
6510 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6511 iv_ca_delta_free (&best_delta);
6512 return true;
6515 /* Attempts to find the optimal set of induction variables. We do simple
6516 greedy heuristic -- we try to replace at most one candidate in the selected
6517 solution and remove the unused ivs while this improves the cost. */
6519 static struct iv_ca *
6520 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6522 struct iv_ca *set;
6523 bool try_replace_p = true;
6525 /* Get the initial solution. */
6526 set = get_initial_solution (data, originalp);
6527 if (!set)
6529 if (dump_file && (dump_flags & TDF_DETAILS))
6530 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6531 return NULL;
6534 if (dump_file && (dump_flags & TDF_DETAILS))
6536 fprintf (dump_file, "Initial set of candidates:\n");
6537 iv_ca_dump (data, dump_file, set);
6540 while (try_improve_iv_set (data, set, &try_replace_p))
6542 if (dump_file && (dump_flags & TDF_DETAILS))
6544 fprintf (dump_file, "Improved to:\n");
6545 iv_ca_dump (data, dump_file, set);
6549 return set;
6552 static struct iv_ca *
6553 find_optimal_iv_set (struct ivopts_data *data)
6555 unsigned i;
6556 struct iv_ca *set, *origset;
6557 struct iv_use *use;
6558 comp_cost cost, origcost;
6560 /* Determine the cost based on a strategy that starts with original IVs,
6561 and try again using a strategy that prefers candidates not based
6562 on any IVs. */
6563 origset = find_optimal_iv_set_1 (data, true);
6564 set = find_optimal_iv_set_1 (data, false);
6566 if (!origset && !set)
6567 return NULL;
6569 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6570 cost = set ? iv_ca_cost (set) : infinite_cost;
6572 if (dump_file && (dump_flags & TDF_DETAILS))
6574 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6575 origcost.cost, origcost.complexity);
6576 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6577 cost.cost, cost.complexity);
6580 /* Choose the one with the best cost. */
6581 if (compare_costs (origcost, cost) <= 0)
6583 if (set)
6584 iv_ca_free (&set);
6585 set = origset;
6587 else if (origset)
6588 iv_ca_free (&origset);
6590 for (i = 0; i < n_iv_uses (data); i++)
6592 use = iv_use (data, i);
6593 use->selected = iv_ca_cand_for_use (set, use)->cand;
6596 return set;
6599 /* Creates a new induction variable corresponding to CAND. */
6601 static void
6602 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6604 gimple_stmt_iterator incr_pos;
6605 tree base;
6606 bool after = false;
6608 if (!cand->iv)
6609 return;
6611 switch (cand->pos)
6613 case IP_NORMAL:
6614 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6615 break;
6617 case IP_END:
6618 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6619 after = true;
6620 break;
6622 case IP_AFTER_USE:
6623 after = true;
6624 /* fall through */
6625 case IP_BEFORE_USE:
6626 incr_pos = gsi_for_stmt (cand->incremented_at);
6627 break;
6629 case IP_ORIGINAL:
6630 /* Mark that the iv is preserved. */
6631 name_info (data, cand->var_before)->preserve_biv = true;
6632 name_info (data, cand->var_after)->preserve_biv = true;
6634 /* Rewrite the increment so that it uses var_before directly. */
6635 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6636 return;
6639 gimple_add_tmp_var (cand->var_before);
6641 base = unshare_expr (cand->iv->base);
6643 create_iv (base, unshare_expr (cand->iv->step),
6644 cand->var_before, data->current_loop,
6645 &incr_pos, after, &cand->var_before, &cand->var_after);
6648 /* Creates new induction variables described in SET. */
6650 static void
6651 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6653 unsigned i;
6654 struct iv_cand *cand;
6655 bitmap_iterator bi;
6657 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6659 cand = iv_cand (data, i);
6660 create_new_iv (data, cand);
6663 if (dump_file && (dump_flags & TDF_DETAILS))
6665 fprintf (dump_file, "Selected IV set for loop %d",
6666 data->current_loop->num);
6667 if (data->loop_loc != UNKNOWN_LOCATION)
6668 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6669 LOCATION_LINE (data->loop_loc));
6670 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6671 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6673 cand = iv_cand (data, i);
6674 dump_cand (dump_file, cand);
6676 fprintf (dump_file, "\n");
6680 /* Rewrites USE (definition of iv used in a nonlinear expression)
6681 using candidate CAND. */
6683 static void
6684 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6685 struct iv_use *use, struct iv_cand *cand)
6687 tree comp;
6688 tree op, tgt;
6689 gassign *ass;
6690 gimple_stmt_iterator bsi;
6692 /* An important special case -- if we are asked to express value of
6693 the original iv by itself, just exit; there is no need to
6694 introduce a new computation (that might also need casting the
6695 variable to unsigned and back). */
6696 if (cand->pos == IP_ORIGINAL
6697 && cand->incremented_at == use->stmt)
6699 enum tree_code stmt_code;
6701 gcc_assert (is_gimple_assign (use->stmt));
6702 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6704 /* Check whether we may leave the computation unchanged.
6705 This is the case only if it does not rely on other
6706 computations in the loop -- otherwise, the computation
6707 we rely upon may be removed in remove_unused_ivs,
6708 thus leading to ICE. */
6709 stmt_code = gimple_assign_rhs_code (use->stmt);
6710 if (stmt_code == PLUS_EXPR
6711 || stmt_code == MINUS_EXPR
6712 || stmt_code == POINTER_PLUS_EXPR)
6714 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6715 op = gimple_assign_rhs2 (use->stmt);
6716 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6717 op = gimple_assign_rhs1 (use->stmt);
6718 else
6719 op = NULL_TREE;
6721 else
6722 op = NULL_TREE;
6724 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6725 return;
6728 comp = get_computation (data->current_loop, use, cand);
6729 gcc_assert (comp != NULL_TREE);
6731 switch (gimple_code (use->stmt))
6733 case GIMPLE_PHI:
6734 tgt = PHI_RESULT (use->stmt);
6736 /* If we should keep the biv, do not replace it. */
6737 if (name_info (data, tgt)->preserve_biv)
6738 return;
6740 bsi = gsi_after_labels (gimple_bb (use->stmt));
6741 break;
6743 case GIMPLE_ASSIGN:
6744 tgt = gimple_assign_lhs (use->stmt);
6745 bsi = gsi_for_stmt (use->stmt);
6746 break;
6748 default:
6749 gcc_unreachable ();
6752 if (!valid_gimple_rhs_p (comp)
6753 || (gimple_code (use->stmt) != GIMPLE_PHI
6754 /* We can't allow re-allocating the stmt as it might be pointed
6755 to still. */
6756 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6757 >= gimple_num_ops (gsi_stmt (bsi)))))
6759 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6760 true, GSI_SAME_STMT);
6761 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6763 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6764 /* As this isn't a plain copy we have to reset alignment
6765 information. */
6766 if (SSA_NAME_PTR_INFO (comp))
6767 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6771 if (gimple_code (use->stmt) == GIMPLE_PHI)
6773 ass = gimple_build_assign (tgt, comp);
6774 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6776 bsi = gsi_for_stmt (use->stmt);
6777 remove_phi_node (&bsi, false);
6779 else
6781 gimple_assign_set_rhs_from_tree (&bsi, comp);
6782 use->stmt = gsi_stmt (bsi);
6786 /* Performs a peephole optimization to reorder the iv update statement with
6787 a mem ref to enable instruction combining in later phases. The mem ref uses
6788 the iv value before the update, so the reordering transformation requires
6789 adjustment of the offset. CAND is the selected IV_CAND.
6791 Example:
6793 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6794 iv2 = iv1 + 1;
6796 if (t < val) (1)
6797 goto L;
6798 goto Head;
6801 directly propagating t over to (1) will introduce overlapping live range
6802 thus increase register pressure. This peephole transform it into:
6805 iv2 = iv1 + 1;
6806 t = MEM_REF (base, iv2, 8, 8);
6807 if (t < val)
6808 goto L;
6809 goto Head;
6812 static void
6813 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6815 tree var_after;
6816 gimple iv_update, stmt;
6817 basic_block bb;
6818 gimple_stmt_iterator gsi, gsi_iv;
6820 if (cand->pos != IP_NORMAL)
6821 return;
6823 var_after = cand->var_after;
6824 iv_update = SSA_NAME_DEF_STMT (var_after);
6826 bb = gimple_bb (iv_update);
6827 gsi = gsi_last_nondebug_bb (bb);
6828 stmt = gsi_stmt (gsi);
6830 /* Only handle conditional statement for now. */
6831 if (gimple_code (stmt) != GIMPLE_COND)
6832 return;
6834 gsi_prev_nondebug (&gsi);
6835 stmt = gsi_stmt (gsi);
6836 if (stmt != iv_update)
6837 return;
6839 gsi_prev_nondebug (&gsi);
6840 if (gsi_end_p (gsi))
6841 return;
6843 stmt = gsi_stmt (gsi);
6844 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6845 return;
6847 if (stmt != use->stmt)
6848 return;
6850 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6851 return;
6853 if (dump_file && (dump_flags & TDF_DETAILS))
6855 fprintf (dump_file, "Reordering \n");
6856 print_gimple_stmt (dump_file, iv_update, 0, 0);
6857 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6858 fprintf (dump_file, "\n");
6861 gsi = gsi_for_stmt (use->stmt);
6862 gsi_iv = gsi_for_stmt (iv_update);
6863 gsi_move_before (&gsi_iv, &gsi);
6865 cand->pos = IP_BEFORE_USE;
6866 cand->incremented_at = use->stmt;
6869 /* Rewrites USE (address that is an iv) using candidate CAND. */
6871 static void
6872 rewrite_use_address_1 (struct ivopts_data *data,
6873 struct iv_use *use, struct iv_cand *cand)
6875 aff_tree aff;
6876 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6877 tree base_hint = NULL_TREE;
6878 tree ref, iv;
6879 bool ok;
6881 adjust_iv_update_pos (cand, use);
6882 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6883 gcc_assert (ok);
6884 unshare_aff_combination (&aff);
6886 /* To avoid undefined overflow problems, all IV candidates use unsigned
6887 integer types. The drawback is that this makes it impossible for
6888 create_mem_ref to distinguish an IV that is based on a memory object
6889 from one that represents simply an offset.
6891 To work around this problem, we pass a hint to create_mem_ref that
6892 indicates which variable (if any) in aff is an IV based on a memory
6893 object. Note that we only consider the candidate. If this is not
6894 based on an object, the base of the reference is in some subexpression
6895 of the use -- but these will use pointer types, so they are recognized
6896 by the create_mem_ref heuristics anyway. */
6897 if (cand->iv->base_object)
6898 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6900 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6901 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6902 reference_alias_ptr_type (*use->op_p),
6903 iv, base_hint, data->speed);
6904 copy_ref_info (ref, *use->op_p);
6905 *use->op_p = ref;
6908 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6909 first use of a group, rewrites sub uses in the group too. */
6911 static void
6912 rewrite_use_address (struct ivopts_data *data,
6913 struct iv_use *use, struct iv_cand *cand)
6915 struct iv_use *next;
6917 gcc_assert (use->sub_id == 0);
6918 rewrite_use_address_1 (data, use, cand);
6919 update_stmt (use->stmt);
6921 for (next = use->next; next != NULL; next = next->next)
6923 rewrite_use_address_1 (data, next, cand);
6924 update_stmt (next->stmt);
6927 return;
6930 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6931 candidate CAND. */
6933 static void
6934 rewrite_use_compare (struct ivopts_data *data,
6935 struct iv_use *use, struct iv_cand *cand)
6937 tree comp, *var_p, op, bound;
6938 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6939 enum tree_code compare;
6940 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6941 bool ok;
6943 bound = cp->value;
6944 if (bound)
6946 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6947 tree var_type = TREE_TYPE (var);
6948 gimple_seq stmts;
6950 if (dump_file && (dump_flags & TDF_DETAILS))
6952 fprintf (dump_file, "Replacing exit test: ");
6953 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6955 compare = cp->comp;
6956 bound = unshare_expr (fold_convert (var_type, bound));
6957 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6958 if (stmts)
6959 gsi_insert_seq_on_edge_immediate (
6960 loop_preheader_edge (data->current_loop),
6961 stmts);
6963 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6964 gimple_cond_set_lhs (cond_stmt, var);
6965 gimple_cond_set_code (cond_stmt, compare);
6966 gimple_cond_set_rhs (cond_stmt, op);
6967 return;
6970 /* The induction variable elimination failed; just express the original
6971 giv. */
6972 comp = get_computation (data->current_loop, use, cand);
6973 gcc_assert (comp != NULL_TREE);
6975 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6976 gcc_assert (ok);
6978 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6979 true, GSI_SAME_STMT);
6982 /* Rewrites USE using candidate CAND. */
6984 static void
6985 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6987 switch (use->type)
6989 case USE_NONLINEAR_EXPR:
6990 rewrite_use_nonlinear_expr (data, use, cand);
6991 break;
6993 case USE_ADDRESS:
6994 rewrite_use_address (data, use, cand);
6995 break;
6997 case USE_COMPARE:
6998 rewrite_use_compare (data, use, cand);
6999 break;
7001 default:
7002 gcc_unreachable ();
7005 update_stmt (use->stmt);
7008 /* Rewrite the uses using the selected induction variables. */
7010 static void
7011 rewrite_uses (struct ivopts_data *data)
7013 unsigned i;
7014 struct iv_cand *cand;
7015 struct iv_use *use;
7017 for (i = 0; i < n_iv_uses (data); i++)
7019 use = iv_use (data, i);
7020 cand = use->selected;
7021 gcc_assert (cand);
7023 rewrite_use (data, use, cand);
7027 /* Removes the ivs that are not used after rewriting. */
7029 static void
7030 remove_unused_ivs (struct ivopts_data *data)
7032 unsigned j;
7033 bitmap_iterator bi;
7034 bitmap toremove = BITMAP_ALLOC (NULL);
7036 /* Figure out an order in which to release SSA DEFs so that we don't
7037 release something that we'd have to propagate into a debug stmt
7038 afterwards. */
7039 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7041 struct version_info *info;
7043 info = ver_info (data, j);
7044 if (info->iv
7045 && !integer_zerop (info->iv->step)
7046 && !info->inv_id
7047 && !info->iv->have_use_for
7048 && !info->preserve_biv)
7050 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7052 tree def = info->iv->ssa_name;
7054 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7056 imm_use_iterator imm_iter;
7057 use_operand_p use_p;
7058 gimple stmt;
7059 int count = 0;
7061 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7063 if (!gimple_debug_bind_p (stmt))
7064 continue;
7066 /* We just want to determine whether to do nothing
7067 (count == 0), to substitute the computed
7068 expression into a single use of the SSA DEF by
7069 itself (count == 1), or to use a debug temp
7070 because the SSA DEF is used multiple times or as
7071 part of a larger expression (count > 1). */
7072 count++;
7073 if (gimple_debug_bind_get_value (stmt) != def)
7074 count++;
7076 if (count > 1)
7077 BREAK_FROM_IMM_USE_STMT (imm_iter);
7080 if (!count)
7081 continue;
7083 struct iv_use dummy_use;
7084 struct iv_cand *best_cand = NULL, *cand;
7085 unsigned i, best_pref = 0, cand_pref;
7087 memset (&dummy_use, 0, sizeof (dummy_use));
7088 dummy_use.iv = info->iv;
7089 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7091 cand = iv_use (data, i)->selected;
7092 if (cand == best_cand)
7093 continue;
7094 cand_pref = operand_equal_p (cand->iv->step,
7095 info->iv->step, 0)
7096 ? 4 : 0;
7097 cand_pref
7098 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7099 == TYPE_MODE (TREE_TYPE (info->iv->base))
7100 ? 2 : 0;
7101 cand_pref
7102 += TREE_CODE (cand->iv->base) == INTEGER_CST
7103 ? 1 : 0;
7104 if (best_cand == NULL || best_pref < cand_pref)
7106 best_cand = cand;
7107 best_pref = cand_pref;
7111 if (!best_cand)
7112 continue;
7114 tree comp = get_computation_at (data->current_loop,
7115 &dummy_use, best_cand,
7116 SSA_NAME_DEF_STMT (def));
7117 if (!comp)
7118 continue;
7120 if (count > 1)
7122 tree vexpr = make_node (DEBUG_EXPR_DECL);
7123 DECL_ARTIFICIAL (vexpr) = 1;
7124 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7125 if (SSA_NAME_VAR (def))
7126 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7127 else
7128 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7129 gdebug *def_temp
7130 = gimple_build_debug_bind (vexpr, comp, NULL);
7131 gimple_stmt_iterator gsi;
7133 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7134 gsi = gsi_after_labels (gimple_bb
7135 (SSA_NAME_DEF_STMT (def)));
7136 else
7137 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7139 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7140 comp = vexpr;
7143 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7145 if (!gimple_debug_bind_p (stmt))
7146 continue;
7148 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7149 SET_USE (use_p, comp);
7151 update_stmt (stmt);
7157 release_defs_bitset (toremove);
7159 BITMAP_FREE (toremove);
7162 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7163 for hash_map::traverse. */
7165 bool
7166 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7168 free (value);
7169 return true;
7172 /* Frees data allocated by the optimization of a single loop. */
7174 static void
7175 free_loop_data (struct ivopts_data *data)
7177 unsigned i, j;
7178 bitmap_iterator bi;
7179 tree obj;
7181 if (data->niters)
7183 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7184 delete data->niters;
7185 data->niters = NULL;
7188 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7190 struct version_info *info;
7192 info = ver_info (data, i);
7193 free (info->iv);
7194 info->iv = NULL;
7195 info->has_nonlin_use = false;
7196 info->preserve_biv = false;
7197 info->inv_id = 0;
7199 bitmap_clear (data->relevant);
7200 bitmap_clear (data->important_candidates);
7202 for (i = 0; i < n_iv_uses (data); i++)
7204 struct iv_use *use = iv_use (data, i);
7205 struct iv_use *pre = use, *sub = use->next;
7207 while (sub)
7209 gcc_assert (sub->related_cands == NULL);
7210 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7212 free (sub->iv);
7213 pre = sub;
7214 sub = sub->next;
7215 free (pre);
7218 free (use->iv);
7219 BITMAP_FREE (use->related_cands);
7220 for (j = 0; j < use->n_map_members; j++)
7221 if (use->cost_map[j].depends_on)
7222 BITMAP_FREE (use->cost_map[j].depends_on);
7223 free (use->cost_map);
7224 free (use);
7226 data->iv_uses.truncate (0);
7228 for (i = 0; i < n_iv_cands (data); i++)
7230 struct iv_cand *cand = iv_cand (data, i);
7232 free (cand->iv);
7233 if (cand->depends_on)
7234 BITMAP_FREE (cand->depends_on);
7235 free (cand);
7237 data->iv_candidates.truncate (0);
7239 if (data->version_info_size < num_ssa_names)
7241 data->version_info_size = 2 * num_ssa_names;
7242 free (data->version_info);
7243 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7246 data->max_inv_id = 0;
7248 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7249 SET_DECL_RTL (obj, NULL_RTX);
7251 decl_rtl_to_reset.truncate (0);
7253 data->inv_expr_tab->empty ();
7254 data->inv_expr_id = 0;
7257 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7258 loop tree. */
7260 static void
7261 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7263 free_loop_data (data);
7264 free (data->version_info);
7265 BITMAP_FREE (data->relevant);
7266 BITMAP_FREE (data->important_candidates);
7268 decl_rtl_to_reset.release ();
7269 data->iv_uses.release ();
7270 data->iv_candidates.release ();
7271 delete data->inv_expr_tab;
7272 data->inv_expr_tab = NULL;
7273 free_affine_expand_cache (&data->name_expansion_cache);
7276 /* Returns true if the loop body BODY includes any function calls. */
7278 static bool
7279 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7281 gimple_stmt_iterator gsi;
7282 unsigned i;
7284 for (i = 0; i < num_nodes; i++)
7285 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7287 gimple stmt = gsi_stmt (gsi);
7288 if (is_gimple_call (stmt)
7289 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7290 return true;
7292 return false;
7295 /* Optimizes the LOOP. Returns true if anything changed. */
7297 static bool
7298 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7300 bool changed = false;
7301 struct iv_ca *iv_ca;
7302 edge exit = single_dom_exit (loop);
7303 basic_block *body;
7305 gcc_assert (!data->niters);
7306 data->current_loop = loop;
7307 data->loop_loc = find_loop_location (loop);
7308 data->speed = optimize_loop_for_speed_p (loop);
7310 if (dump_file && (dump_flags & TDF_DETAILS))
7312 fprintf (dump_file, "Processing loop %d", loop->num);
7313 if (data->loop_loc != UNKNOWN_LOCATION)
7314 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7315 LOCATION_LINE (data->loop_loc));
7316 fprintf (dump_file, "\n");
7318 if (exit)
7320 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7321 exit->src->index, exit->dest->index);
7322 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7323 fprintf (dump_file, "\n");
7326 fprintf (dump_file, "\n");
7329 body = get_loop_body (loop);
7330 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7331 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7332 free (body);
7334 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7336 /* For each ssa name determines whether it behaves as an induction variable
7337 in some loop. */
7338 if (!find_induction_variables (data))
7339 goto finish;
7341 /* Finds interesting uses (item 1). */
7342 find_interesting_uses (data);
7343 group_address_uses (data);
7344 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7345 goto finish;
7347 /* Finds candidates for the induction variables (item 2). */
7348 find_iv_candidates (data);
7350 /* Calculates the costs (item 3, part 1). */
7351 determine_iv_costs (data);
7352 determine_use_iv_costs (data);
7353 determine_set_costs (data);
7355 /* Find the optimal set of induction variables (item 3, part 2). */
7356 iv_ca = find_optimal_iv_set (data);
7357 if (!iv_ca)
7358 goto finish;
7359 changed = true;
7361 /* Create the new induction variables (item 4, part 1). */
7362 create_new_ivs (data, iv_ca);
7363 iv_ca_free (&iv_ca);
7365 /* Rewrite the uses (item 4, part 2). */
7366 rewrite_uses (data);
7368 /* Remove the ivs that are unused after rewriting. */
7369 remove_unused_ivs (data);
7371 /* We have changed the structure of induction variables; it might happen
7372 that definitions in the scev database refer to some of them that were
7373 eliminated. */
7374 scev_reset ();
7376 finish:
7377 free_loop_data (data);
7379 return changed;
7382 /* Main entry point. Optimizes induction variables in loops. */
7384 void
7385 tree_ssa_iv_optimize (void)
7387 struct loop *loop;
7388 struct ivopts_data data;
7390 tree_ssa_iv_optimize_init (&data);
7392 /* Optimize the loops starting with the innermost ones. */
7393 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7395 if (dump_file && (dump_flags & TDF_DETAILS))
7396 flow_loop_dump (loop, dump_file, NULL, 1);
7398 tree_ssa_iv_optimize_loop (&data, loop);
7401 tree_ssa_iv_optimize_finalize (&data);