2015-06-29 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1ce275b096b6b8d936b9851dadc030c1efdd6bf3
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "alias.h"
69 #include "symtab.h"
70 #include "tree.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
73 #include "tm_p.h"
74 #include "predict.h"
75 #include "hard-reg-set.h"
76 #include "function.h"
77 #include "dominance.h"
78 #include "cfg.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
90 #include "cgraph.h"
91 #include "tree-cfg.h"
92 #include "tree-phinodes.h"
93 #include "ssa-iterators.h"
94 #include "stringpool.h"
95 #include "tree-ssanames.h"
96 #include "tree-ssa-loop-ivopts.h"
97 #include "tree-ssa-loop-manip.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
100 #include "rtl.h"
101 #include "flags.h"
102 #include "insn-config.h"
103 #include "expmed.h"
104 #include "dojump.h"
105 #include "explow.h"
106 #include "calls.h"
107 #include "emit-rtl.h"
108 #include "varasm.h"
109 #include "stmt.h"
110 #include "expr.h"
111 #include "tree-dfa.h"
112 #include "tree-ssa.h"
113 #include "cfgloop.h"
114 #include "tree-pass.h"
115 #include "tree-chrec.h"
116 #include "tree-scalar-evolution.h"
117 #include "params.h"
118 #include "langhooks.h"
119 #include "tree-affine.h"
120 #include "target.h"
121 #include "tree-inline.h"
122 #include "tree-ssa-propagate.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125 #include "tree-vectorizer.h"
127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
128 cost of different addressing modes. This should be moved to a TBD
129 interface between the GIMPLE and RTL worlds. */
130 #include "recog.h"
132 /* The infinite cost. */
133 #define INFTY 10000000
135 #define AVG_LOOP_NITER(LOOP) 5
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
148 return niter;
151 /* Representation of the induction variable. */
152 struct iv
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 unsigned use_id; /* The identifier in the use if it is the case. */
159 bool biv_p; /* Is it a biv? */
160 bool have_use_for; /* Do we already have a use for it? */
161 bool no_overflow; /* True if the iv doesn't overflow. */
164 /* Per-ssa version information (induction variable descriptions, etc.). */
165 struct version_info
167 tree name; /* The ssa name. */
168 struct iv *iv; /* Induction variable description. */
169 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
170 an expression that is not an induction variable. */
171 bool preserve_biv; /* For the original biv, whether to preserve it. */
172 unsigned inv_id; /* Id of an invariant. */
175 /* Types of uses. */
176 enum use_type
178 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
179 USE_ADDRESS, /* Use in an address. */
180 USE_COMPARE /* Use is a compare. */
183 /* Cost of a computation. */
184 typedef struct
186 int cost; /* The runtime cost. */
187 unsigned complexity; /* The estimate of the complexity of the code for
188 the computation (in no concrete units --
189 complexity field should be larger for more
190 complex expressions and addressing modes). */
191 } comp_cost;
193 static const comp_cost no_cost = {0, 0};
194 static const comp_cost infinite_cost = {INFTY, INFTY};
196 /* The candidate - cost pair. */
197 struct cost_pair
199 struct iv_cand *cand; /* The candidate. */
200 comp_cost cost; /* The cost. */
201 bitmap depends_on; /* The list of invariants that have to be
202 preserved. */
203 tree value; /* For final value elimination, the expression for
204 the final value of the iv. For iv elimination,
205 the new bound to compare with. */
206 enum tree_code comp; /* For iv elimination, the comparison. */
207 int inv_expr_id; /* Loop invariant expression id. */
210 /* Use. */
211 struct iv_use
213 unsigned id; /* The id of the use. */
214 unsigned sub_id; /* The id of the sub use. */
215 enum use_type type; /* Type of the use. */
216 struct iv *iv; /* The induction variable it is based on. */
217 gimple stmt; /* Statement in that it occurs. */
218 tree *op_p; /* The place where it occurs. */
219 bitmap related_cands; /* The set of "related" iv candidates, plus the common
220 important ones. */
222 unsigned n_map_members; /* Number of candidates in the cost_map list. */
223 struct cost_pair *cost_map;
224 /* The costs wrto the iv candidates. */
226 struct iv_cand *selected;
227 /* The selected candidate. */
229 struct iv_use *next; /* The next sub use. */
230 tree addr_base; /* Base address with const offset stripped. */
231 unsigned HOST_WIDE_INT addr_offset;
232 /* Const offset stripped from base address. */
235 /* The position where the iv is computed. */
236 enum iv_position
238 IP_NORMAL, /* At the end, just before the exit condition. */
239 IP_END, /* At the end of the latch block. */
240 IP_BEFORE_USE, /* Immediately before a specific use. */
241 IP_AFTER_USE, /* Immediately after a specific use. */
242 IP_ORIGINAL /* The original biv. */
245 /* The induction variable candidate. */
246 struct iv_cand
248 unsigned id; /* The number of the candidate. */
249 bool important; /* Whether this is an "important" candidate, i.e. such
250 that it should be considered by all uses. */
251 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
252 gimple incremented_at;/* For original biv, the statement where it is
253 incremented. */
254 tree var_before; /* The variable used for it before increment. */
255 tree var_after; /* The variable used for it after increment. */
256 struct iv *iv; /* The value of the candidate. NULL for
257 "pseudocandidate" used to indicate the possibility
258 to replace the final value of an iv by direct
259 computation of the value. */
260 unsigned cost; /* Cost of the candidate. */
261 unsigned cost_step; /* Cost of the candidate's increment operation. */
262 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
263 where it is incremented. */
264 bitmap depends_on; /* The list of invariants that are used in step of the
265 biv. */
268 /* Loop invariant expression hashtable entry. */
269 struct iv_inv_expr_ent
271 tree expr;
272 int id;
273 hashval_t hash;
276 /* The data used by the induction variable optimizations. */
278 typedef struct iv_use *iv_use_p;
280 typedef struct iv_cand *iv_cand_p;
282 /* Hashtable helpers. */
284 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
286 static inline hashval_t hash (const iv_inv_expr_ent *);
287 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
290 /* Hash function for loop invariant expressions. */
292 inline hashval_t
293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
295 return expr->hash;
298 /* Hash table equality function for expressions. */
300 inline bool
301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
302 const iv_inv_expr_ent *expr2)
304 return expr1->hash == expr2->hash
305 && operand_equal_p (expr1->expr, expr2->expr, 0);
308 struct ivopts_data
310 /* The currently optimized loop. */
311 struct loop *current_loop;
312 source_location loop_loc;
314 /* Numbers of iterations for all exits of the current loop. */
315 hash_map<edge, tree_niter_desc *> *niters;
317 /* Number of registers used in it. */
318 unsigned regs_used;
320 /* The size of version_info array allocated. */
321 unsigned version_info_size;
323 /* The array of information for the ssa names. */
324 struct version_info *version_info;
326 /* The hashtable of loop invariant expressions created
327 by ivopt. */
328 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
330 /* Loop invariant expression id. */
331 int inv_expr_id;
333 /* The bitmap of indices in version_info whose value was changed. */
334 bitmap relevant;
336 /* The uses of induction variables. */
337 vec<iv_use_p> iv_uses;
339 /* The candidates. */
340 vec<iv_cand_p> iv_candidates;
342 /* A bitmap of important candidates. */
343 bitmap important_candidates;
345 /* Cache used by tree_to_aff_combination_expand. */
346 hash_map<tree, name_expansion *> *name_expansion_cache;
348 /* The maximum invariant id. */
349 unsigned max_inv_id;
351 /* Whether to consider just related and important candidates when replacing a
352 use. */
353 bool consider_all_candidates;
355 /* Are we optimizing for speed? */
356 bool speed;
358 /* Whether the loop body includes any function calls. */
359 bool body_includes_call;
361 /* Whether the loop body can only be exited via single exit. */
362 bool loop_single_exit_p;
365 /* An assignment of iv candidates to uses. */
367 struct iv_ca
369 /* The number of uses covered by the assignment. */
370 unsigned upto;
372 /* Number of uses that cannot be expressed by the candidates in the set. */
373 unsigned bad_uses;
375 /* Candidate assigned to a use, together with the related costs. */
376 struct cost_pair **cand_for_use;
378 /* Number of times each candidate is used. */
379 unsigned *n_cand_uses;
381 /* The candidates used. */
382 bitmap cands;
384 /* The number of candidates in the set. */
385 unsigned n_cands;
387 /* Total number of registers needed. */
388 unsigned n_regs;
390 /* Total cost of expressing uses. */
391 comp_cost cand_use_cost;
393 /* Total cost of candidates. */
394 unsigned cand_cost;
396 /* Number of times each invariant is used. */
397 unsigned *n_invariant_uses;
399 /* The array holding the number of uses of each loop
400 invariant expressions created by ivopt. */
401 unsigned *used_inv_expr;
403 /* The number of created loop invariants. */
404 unsigned num_used_inv_expr;
406 /* Total cost of the assignment. */
407 comp_cost cost;
410 /* Difference of two iv candidate assignments. */
412 struct iv_ca_delta
414 /* Changed use. */
415 struct iv_use *use;
417 /* An old assignment (for rollback purposes). */
418 struct cost_pair *old_cp;
420 /* A new assignment. */
421 struct cost_pair *new_cp;
423 /* Next change in the list. */
424 struct iv_ca_delta *next_change;
427 /* Bound on number of candidates below that all candidates are considered. */
429 #define CONSIDER_ALL_CANDIDATES_BOUND \
430 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
432 /* If there are more iv occurrences, we just give up (it is quite unlikely that
433 optimizing such a loop would help, and it would take ages). */
435 #define MAX_CONSIDERED_USES \
436 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
438 /* If there are at most this number of ivs in the set, try removing unnecessary
439 ivs from the set always. */
441 #define ALWAYS_PRUNE_CAND_SET_BOUND \
442 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
444 /* The list of trees for that the decl_rtl field must be reset is stored
445 here. */
447 static vec<tree> decl_rtl_to_reset;
449 static comp_cost force_expr_to_var_cost (tree, bool);
451 /* Number of uses recorded in DATA. */
453 static inline unsigned
454 n_iv_uses (struct ivopts_data *data)
456 return data->iv_uses.length ();
459 /* Ith use recorded in DATA. */
461 static inline struct iv_use *
462 iv_use (struct ivopts_data *data, unsigned i)
464 return data->iv_uses[i];
467 /* Number of candidates recorded in DATA. */
469 static inline unsigned
470 n_iv_cands (struct ivopts_data *data)
472 return data->iv_candidates.length ();
475 /* Ith candidate recorded in DATA. */
477 static inline struct iv_cand *
478 iv_cand (struct ivopts_data *data, unsigned i)
480 return data->iv_candidates[i];
483 /* The single loop exit if it dominates the latch, NULL otherwise. */
485 edge
486 single_dom_exit (struct loop *loop)
488 edge exit = single_exit (loop);
490 if (!exit)
491 return NULL;
493 if (!just_once_each_iteration_p (loop, exit->src))
494 return NULL;
496 return exit;
499 /* Dumps information about the induction variable IV to FILE. */
501 void
502 dump_iv (FILE *file, struct iv *iv, bool dump_name)
504 if (iv->ssa_name && dump_name)
506 fprintf (file, "ssa name ");
507 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
508 fprintf (file, "\n");
511 fprintf (file, " type ");
512 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
513 fprintf (file, "\n");
515 if (iv->step)
517 fprintf (file, " base ");
518 print_generic_expr (file, iv->base, TDF_SLIM);
519 fprintf (file, "\n");
521 fprintf (file, " step ");
522 print_generic_expr (file, iv->step, TDF_SLIM);
523 fprintf (file, "\n");
525 else
527 fprintf (file, " invariant ");
528 print_generic_expr (file, iv->base, TDF_SLIM);
529 fprintf (file, "\n");
532 if (iv->base_object)
534 fprintf (file, " base object ");
535 print_generic_expr (file, iv->base_object, TDF_SLIM);
536 fprintf (file, "\n");
539 if (iv->biv_p)
540 fprintf (file, " is a biv\n");
543 /* Dumps information about the USE to FILE. */
545 void
546 dump_use (FILE *file, struct iv_use *use)
548 fprintf (file, "use %d", use->id);
549 if (use->sub_id)
550 fprintf (file, ".%d", use->sub_id);
552 fprintf (file, "\n");
554 switch (use->type)
556 case USE_NONLINEAR_EXPR:
557 fprintf (file, " generic\n");
558 break;
560 case USE_ADDRESS:
561 fprintf (file, " address\n");
562 break;
564 case USE_COMPARE:
565 fprintf (file, " compare\n");
566 break;
568 default:
569 gcc_unreachable ();
572 fprintf (file, " in statement ");
573 print_gimple_stmt (file, use->stmt, 0, 0);
574 fprintf (file, "\n");
576 fprintf (file, " at position ");
577 if (use->op_p)
578 print_generic_expr (file, *use->op_p, TDF_SLIM);
579 fprintf (file, "\n");
581 dump_iv (file, use->iv, false);
583 if (use->related_cands)
585 fprintf (file, " related candidates ");
586 dump_bitmap (file, use->related_cands);
590 /* Dumps information about the uses to FILE. */
592 void
593 dump_uses (FILE *file, struct ivopts_data *data)
595 unsigned i;
596 struct iv_use *use;
598 for (i = 0; i < n_iv_uses (data); i++)
600 use = iv_use (data, i);
603 dump_use (file, use);
604 use = use->next;
606 while (use);
607 fprintf (file, "\n");
611 /* Dumps information about induction variable candidate CAND to FILE. */
613 void
614 dump_cand (FILE *file, struct iv_cand *cand)
616 struct iv *iv = cand->iv;
618 fprintf (file, "candidate %d%s\n",
619 cand->id, cand->important ? " (important)" : "");
621 if (cand->depends_on)
623 fprintf (file, " depends on ");
624 dump_bitmap (file, cand->depends_on);
627 if (!iv)
629 fprintf (file, " final value replacement\n");
630 return;
633 if (cand->var_before)
635 fprintf (file, " var_before ");
636 print_generic_expr (file, cand->var_before, TDF_SLIM);
637 fprintf (file, "\n");
639 if (cand->var_after)
641 fprintf (file, " var_after ");
642 print_generic_expr (file, cand->var_after, TDF_SLIM);
643 fprintf (file, "\n");
646 switch (cand->pos)
648 case IP_NORMAL:
649 fprintf (file, " incremented before exit test\n");
650 break;
652 case IP_BEFORE_USE:
653 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
654 break;
656 case IP_AFTER_USE:
657 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
658 break;
660 case IP_END:
661 fprintf (file, " incremented at end\n");
662 break;
664 case IP_ORIGINAL:
665 fprintf (file, " original biv\n");
666 break;
669 dump_iv (file, iv, false);
672 /* Returns the info for ssa version VER. */
674 static inline struct version_info *
675 ver_info (struct ivopts_data *data, unsigned ver)
677 return data->version_info + ver;
680 /* Returns the info for ssa name NAME. */
682 static inline struct version_info *
683 name_info (struct ivopts_data *data, tree name)
685 return ver_info (data, SSA_NAME_VERSION (name));
688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
689 emitted in LOOP. */
691 static bool
692 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
694 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
696 gcc_assert (bb);
698 if (sbb == loop->latch)
699 return true;
701 if (sbb != bb)
702 return false;
704 return stmt == last_stmt (bb);
707 /* Returns true if STMT if after the place where the original induction
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
711 static bool
712 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
714 basic_block cand_bb = gimple_bb (cand->incremented_at);
715 basic_block stmt_bb = gimple_bb (stmt);
717 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
718 return false;
720 if (stmt_bb != cand_bb)
721 return true;
723 if (true_if_equal
724 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
725 return true;
726 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
729 /* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
732 static bool
733 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
735 switch (cand->pos)
737 case IP_END:
738 return false;
740 case IP_NORMAL:
741 return stmt_after_ip_normal_pos (loop, stmt);
743 case IP_ORIGINAL:
744 case IP_AFTER_USE:
745 return stmt_after_inc_pos (cand, stmt, false);
747 case IP_BEFORE_USE:
748 return stmt_after_inc_pos (cand, stmt, true);
750 default:
751 gcc_unreachable ();
755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
757 static bool
758 abnormal_ssa_name_p (tree exp)
760 if (!exp)
761 return false;
763 if (TREE_CODE (exp) != SSA_NAME)
764 return false;
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
772 static bool
773 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
774 void *data ATTRIBUTE_UNUSED)
776 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
778 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
779 return false;
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
781 return false;
784 return !abnormal_ssa_name_p (*index);
787 /* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
790 bool
791 contains_abnormal_ssa_name_p (tree expr)
793 enum tree_code code;
794 enum tree_code_class codeclass;
796 if (!expr)
797 return false;
799 code = TREE_CODE (expr);
800 codeclass = TREE_CODE_CLASS (code);
802 if (code == SSA_NAME)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
805 if (code == INTEGER_CST
806 || is_gimple_min_invariant (expr))
807 return false;
809 if (code == ADDR_EXPR)
810 return !for_each_index (&TREE_OPERAND (expr, 0),
811 idx_contains_abnormal_ssa_name_p,
812 NULL);
814 if (code == COND_EXPR)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
819 switch (codeclass)
821 case tcc_binary:
822 case tcc_comparison:
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
824 return true;
826 /* Fallthru. */
827 case tcc_unary:
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
829 return true;
831 break;
833 default:
834 gcc_unreachable ();
837 return false;
840 /* Returns the structure describing number of iterations determined from
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
843 static struct tree_niter_desc *
844 niter_for_exit (struct ivopts_data *data, edge exit)
846 struct tree_niter_desc *desc;
847 tree_niter_desc **slot;
849 if (!data->niters)
851 data->niters = new hash_map<edge, tree_niter_desc *>;
852 slot = NULL;
854 else
855 slot = data->niters->get (exit);
857 if (!slot)
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
862 desc = XNEW (struct tree_niter_desc);
863 if (!number_of_iterations_exit (data->current_loop,
864 exit, desc, true)
865 || contains_abnormal_ssa_name_p (desc->niter))
867 XDELETE (desc);
868 desc = NULL;
870 data->niters->put (exit, desc);
872 else
873 desc = *slot;
875 return desc;
878 /* Returns the structure describing number of iterations determined from
879 single dominating exit of DATA->current_loop, or NULL if something
880 goes wrong. */
882 static struct tree_niter_desc *
883 niter_for_single_dom_exit (struct ivopts_data *data)
885 edge exit = single_dom_exit (data->current_loop);
887 if (!exit)
888 return NULL;
890 return niter_for_exit (data, exit);
893 /* Initializes data structures used by the iv optimization pass, stored
894 in DATA. */
896 static void
897 tree_ssa_iv_optimize_init (struct ivopts_data *data)
899 data->version_info_size = 2 * num_ssa_names;
900 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
901 data->relevant = BITMAP_ALLOC (NULL);
902 data->important_candidates = BITMAP_ALLOC (NULL);
903 data->max_inv_id = 0;
904 data->niters = NULL;
905 data->iv_uses.create (20);
906 data->iv_candidates.create (20);
907 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
908 data->inv_expr_id = 0;
909 data->name_expansion_cache = NULL;
910 decl_rtl_to_reset.create (20);
913 /* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
916 static tree
917 determine_base_object (tree expr)
919 enum tree_code code = TREE_CODE (expr);
920 tree base, obj;
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
925 if (CONVERT_EXPR_P (expr))
926 return determine_base_object (TREE_OPERAND (expr, 0));
928 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
929 return NULL_TREE;
931 switch (code)
933 case INTEGER_CST:
934 return NULL_TREE;
936 case ADDR_EXPR:
937 obj = TREE_OPERAND (expr, 0);
938 base = get_base_address (obj);
940 if (!base)
941 return expr;
943 if (TREE_CODE (base) == MEM_REF)
944 return determine_base_object (TREE_OPERAND (base, 0));
946 return fold_convert (ptr_type_node,
947 build_fold_addr_expr (base));
949 case POINTER_PLUS_EXPR:
950 return determine_base_object (TREE_OPERAND (expr, 0));
952 case PLUS_EXPR:
953 case MINUS_EXPR:
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
955 gcc_unreachable ();
957 default:
958 return fold_convert (ptr_type_node, expr);
962 /* Return true if address expression with non-DECL_P operand appears
963 in EXPR. */
965 static bool
966 contain_complex_addr_expr (tree expr)
968 bool res = false;
970 STRIP_NOPS (expr);
971 switch (TREE_CODE (expr))
973 case POINTER_PLUS_EXPR:
974 case PLUS_EXPR:
975 case MINUS_EXPR:
976 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
977 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
978 break;
980 case ADDR_EXPR:
981 return (!DECL_P (TREE_OPERAND (expr, 0)));
983 default:
984 return false;
987 return res;
990 /* Allocates an induction variable with given initial value BASE and step STEP
991 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
993 static struct iv *
994 alloc_iv (tree base, tree step, bool no_overflow = false)
996 tree expr = base;
997 struct iv *iv = XCNEW (struct iv);
998 gcc_assert (step != NULL_TREE);
1000 /* Lower address expression in base except ones with DECL_P as operand.
1001 By doing this:
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1005 STRIP_NOPS (expr);
1006 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007 || contain_complex_addr_expr (expr))
1009 aff_tree comb;
1010 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1014 iv->base = base;
1015 iv->base_object = determine_base_object (base);
1016 iv->step = step;
1017 iv->biv_p = false;
1018 iv->have_use_for = false;
1019 iv->use_id = 0;
1020 iv->ssa_name = NULL_TREE;
1021 iv->no_overflow = no_overflow;
1023 return iv;
1026 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1027 doesn't overflow. */
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1031 bool no_overflow)
1033 struct version_info *info = name_info (data, iv);
1035 gcc_assert (!info->iv);
1037 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038 info->iv = alloc_iv (base, step, no_overflow);
1039 info->iv->ssa_name = iv;
1042 /* Finds induction variable declaration for VAR. */
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1047 basic_block bb;
1048 tree type = TREE_TYPE (var);
1050 if (!POINTER_TYPE_P (type)
1051 && !INTEGRAL_TYPE_P (type))
1052 return NULL;
1054 if (!name_info (data, var)->iv)
1056 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058 if (!bb
1059 || !flow_bb_inside_loop_p (data->current_loop, bb))
1060 set_iv (data, var, var, build_int_cst (type, 0), true);
1063 return name_info (data, var)->iv;
1066 /* Return the first non-invariant ssa var found in EXPR. */
1068 static tree
1069 extract_single_var_from_expr (tree expr)
1071 int i, n;
1072 tree tmp;
1073 enum tree_code code;
1075 if (!expr || is_gimple_min_invariant (expr))
1076 return NULL;
1078 code = TREE_CODE (expr);
1079 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1081 n = TREE_OPERAND_LENGTH (expr);
1082 for (i = 0; i < n; i++)
1084 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1086 if (tmp)
1087 return tmp;
1090 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1093 /* Finds basic ivs. */
1095 static bool
1096 find_bivs (struct ivopts_data *data)
1098 gphi *phi;
1099 affine_iv iv;
1100 tree step, type, base, stop;
1101 bool found = false;
1102 struct loop *loop = data->current_loop;
1103 gphi_iterator psi;
1105 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1107 phi = psi.phi ();
1109 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1110 continue;
1112 if (virtual_operand_p (PHI_RESULT (phi)))
1113 continue;
1115 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1116 continue;
1118 if (integer_zerop (iv.step))
1119 continue;
1121 step = iv.step;
1122 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1123 /* Stop expanding iv base at the first ssa var referred by iv step.
1124 Ideally we should stop at any ssa var, because that's expensive
1125 and unusual to happen, we just do it on the first one.
1127 See PR64705 for the rationale. */
1128 stop = extract_single_var_from_expr (step);
1129 base = expand_simple_operations (base, stop);
1130 if (contains_abnormal_ssa_name_p (base)
1131 || contains_abnormal_ssa_name_p (step))
1132 continue;
1134 type = TREE_TYPE (PHI_RESULT (phi));
1135 base = fold_convert (type, base);
1136 if (step)
1138 if (POINTER_TYPE_P (type))
1139 step = convert_to_ptrofftype (step);
1140 else
1141 step = fold_convert (type, step);
1144 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1145 found = true;
1148 return found;
1151 /* Marks basic ivs. */
1153 static void
1154 mark_bivs (struct ivopts_data *data)
1156 gphi *phi;
1157 gimple def;
1158 tree var;
1159 struct iv *iv, *incr_iv;
1160 struct loop *loop = data->current_loop;
1161 basic_block incr_bb;
1162 gphi_iterator psi;
1164 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1166 phi = psi.phi ();
1168 iv = get_iv (data, PHI_RESULT (phi));
1169 if (!iv)
1170 continue;
1172 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1173 def = SSA_NAME_DEF_STMT (var);
1174 /* Don't mark iv peeled from other one as biv. */
1175 if (def
1176 && gimple_code (def) == GIMPLE_PHI
1177 && gimple_bb (def) == loop->header)
1178 continue;
1180 incr_iv = get_iv (data, var);
1181 if (!incr_iv)
1182 continue;
1184 /* If the increment is in the subloop, ignore it. */
1185 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1186 if (incr_bb->loop_father != data->current_loop
1187 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1188 continue;
1190 iv->biv_p = true;
1191 incr_iv->biv_p = true;
1195 /* Checks whether STMT defines a linear induction variable and stores its
1196 parameters to IV. */
1198 static bool
1199 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1201 tree lhs, stop;
1202 struct loop *loop = data->current_loop;
1204 iv->base = NULL_TREE;
1205 iv->step = NULL_TREE;
1207 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1208 return false;
1210 lhs = gimple_assign_lhs (stmt);
1211 if (TREE_CODE (lhs) != SSA_NAME)
1212 return false;
1214 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1215 return false;
1217 /* Stop expanding iv base at the first ssa var referred by iv step.
1218 Ideally we should stop at any ssa var, because that's expensive
1219 and unusual to happen, we just do it on the first one.
1221 See PR64705 for the rationale. */
1222 stop = extract_single_var_from_expr (iv->step);
1223 iv->base = expand_simple_operations (iv->base, stop);
1224 if (contains_abnormal_ssa_name_p (iv->base)
1225 || contains_abnormal_ssa_name_p (iv->step))
1226 return false;
1228 /* If STMT could throw, then do not consider STMT as defining a GIV.
1229 While this will suppress optimizations, we can not safely delete this
1230 GIV and associated statements, even if it appears it is not used. */
1231 if (stmt_could_throw_p (stmt))
1232 return false;
1234 return true;
1237 /* Finds general ivs in statement STMT. */
1239 static void
1240 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1242 affine_iv iv;
1244 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1245 return;
1247 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1250 /* Finds general ivs in basic block BB. */
1252 static void
1253 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1255 gimple_stmt_iterator bsi;
1257 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1258 find_givs_in_stmt (data, gsi_stmt (bsi));
1261 /* Finds general ivs. */
1263 static void
1264 find_givs (struct ivopts_data *data)
1266 struct loop *loop = data->current_loop;
1267 basic_block *body = get_loop_body_in_dom_order (loop);
1268 unsigned i;
1270 for (i = 0; i < loop->num_nodes; i++)
1271 find_givs_in_bb (data, body[i]);
1272 free (body);
1275 /* For each ssa name defined in LOOP determines whether it is an induction
1276 variable and if so, its initial value and step. */
1278 static bool
1279 find_induction_variables (struct ivopts_data *data)
1281 unsigned i;
1282 bitmap_iterator bi;
1284 if (!find_bivs (data))
1285 return false;
1287 find_givs (data);
1288 mark_bivs (data);
1290 if (dump_file && (dump_flags & TDF_DETAILS))
1292 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1294 if (niter)
1296 fprintf (dump_file, " number of iterations ");
1297 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1298 if (!integer_zerop (niter->may_be_zero))
1300 fprintf (dump_file, "; zero if ");
1301 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1303 fprintf (dump_file, "\n\n");
1306 fprintf (dump_file, "Induction variables:\n\n");
1308 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1310 if (ver_info (data, i)->iv)
1311 dump_iv (dump_file, ver_info (data, i)->iv, true);
1315 return true;
1318 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1319 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1320 is the const offset stripped from IV base. For uses of other types,
1321 ADDR_BASE and ADDR_OFFSET are zero by default. */
1323 static struct iv_use *
1324 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1325 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1326 unsigned HOST_WIDE_INT addr_offset = 0)
1328 struct iv_use *use = XCNEW (struct iv_use);
1330 use->id = n_iv_uses (data);
1331 use->sub_id = 0;
1332 use->type = use_type;
1333 use->iv = iv;
1334 use->stmt = stmt;
1335 use->op_p = use_p;
1336 use->related_cands = BITMAP_ALLOC (NULL);
1337 use->next = NULL;
1338 use->addr_base = addr_base;
1339 use->addr_offset = addr_offset;
1341 data->iv_uses.safe_push (use);
1343 return use;
1346 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1347 The sub use is recorded under the one whose use id is ID_GROUP. */
1349 static struct iv_use *
1350 record_sub_use (struct ivopts_data *data, tree *use_p,
1351 struct iv *iv, gimple stmt, enum use_type use_type,
1352 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1353 unsigned int id_group)
1355 struct iv_use *use = XCNEW (struct iv_use);
1356 struct iv_use *group = iv_use (data, id_group);
1358 use->id = group->id;
1359 use->sub_id = 0;
1360 use->type = use_type;
1361 use->iv = iv;
1362 use->stmt = stmt;
1363 use->op_p = use_p;
1364 use->related_cands = NULL;
1365 use->addr_base = addr_base;
1366 use->addr_offset = addr_offset;
1368 /* Sub use list is maintained in offset ascending order. */
1369 if (addr_offset <= group->addr_offset)
1371 use->related_cands = group->related_cands;
1372 group->related_cands = NULL;
1373 use->next = group;
1374 data->iv_uses[id_group] = use;
1376 else
1378 struct iv_use *pre;
1381 pre = group;
1382 group = group->next;
1384 while (group && addr_offset > group->addr_offset);
1385 use->next = pre->next;
1386 pre->next = use;
1389 /* To avoid showing ssa name in the dumps, if it was not reset by the
1390 caller. */
1391 iv->ssa_name = NULL_TREE;
1393 return use;
1396 /* Checks whether OP is a loop-level invariant and if so, records it.
1397 NONLINEAR_USE is true if the invariant is used in a way we do not
1398 handle specially. */
1400 static void
1401 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1403 basic_block bb;
1404 struct version_info *info;
1406 if (TREE_CODE (op) != SSA_NAME
1407 || virtual_operand_p (op))
1408 return;
1410 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1411 if (bb
1412 && flow_bb_inside_loop_p (data->current_loop, bb))
1413 return;
1415 info = name_info (data, op);
1416 info->name = op;
1417 info->has_nonlin_use |= nonlinear_use;
1418 if (!info->inv_id)
1419 info->inv_id = ++data->max_inv_id;
1420 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 /* Checks whether the use OP is interesting and if so, records it. */
1425 static struct iv_use *
1426 find_interesting_uses_op (struct ivopts_data *data, tree op)
1428 struct iv *iv;
1429 struct iv *civ;
1430 gimple stmt;
1431 struct iv_use *use;
1433 if (TREE_CODE (op) != SSA_NAME)
1434 return NULL;
1436 iv = get_iv (data, op);
1437 if (!iv)
1438 return NULL;
1440 if (iv->have_use_for)
1442 use = iv_use (data, iv->use_id);
1444 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1445 return use;
1448 if (integer_zerop (iv->step))
1450 record_invariant (data, op, true);
1451 return NULL;
1453 iv->have_use_for = true;
1455 civ = XNEW (struct iv);
1456 *civ = *iv;
1458 stmt = SSA_NAME_DEF_STMT (op);
1459 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1460 || is_gimple_assign (stmt));
1462 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1463 iv->use_id = use->id;
1465 return use;
1468 /* Given a condition in statement STMT, checks whether it is a compare
1469 of an induction variable and an invariant. If this is the case,
1470 CONTROL_VAR is set to location of the iv, BOUND to the location of
1471 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1472 induction variable descriptions, and true is returned. If this is not
1473 the case, CONTROL_VAR and BOUND are set to the arguments of the
1474 condition and false is returned. */
1476 static bool
1477 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1478 tree **control_var, tree **bound,
1479 struct iv **iv_var, struct iv **iv_bound)
1481 /* The objects returned when COND has constant operands. */
1482 static struct iv const_iv;
1483 static tree zero;
1484 tree *op0 = &zero, *op1 = &zero;
1485 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1486 bool ret = false;
1488 if (gimple_code (stmt) == GIMPLE_COND)
1490 gcond *cond_stmt = as_a <gcond *> (stmt);
1491 op0 = gimple_cond_lhs_ptr (cond_stmt);
1492 op1 = gimple_cond_rhs_ptr (cond_stmt);
1494 else
1496 op0 = gimple_assign_rhs1_ptr (stmt);
1497 op1 = gimple_assign_rhs2_ptr (stmt);
1500 zero = integer_zero_node;
1501 const_iv.step = integer_zero_node;
1503 if (TREE_CODE (*op0) == SSA_NAME)
1504 iv0 = get_iv (data, *op0);
1505 if (TREE_CODE (*op1) == SSA_NAME)
1506 iv1 = get_iv (data, *op1);
1508 /* Exactly one of the compared values must be an iv, and the other one must
1509 be an invariant. */
1510 if (!iv0 || !iv1)
1511 goto end;
1513 if (integer_zerop (iv0->step))
1515 /* Control variable may be on the other side. */
1516 std::swap (op0, op1);
1517 std::swap (iv0, iv1);
1519 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1521 end:
1522 if (control_var)
1523 *control_var = op0;
1524 if (iv_var)
1525 *iv_var = iv0;
1526 if (bound)
1527 *bound = op1;
1528 if (iv_bound)
1529 *iv_bound = iv1;
1531 return ret;
1534 /* Checks whether the condition in STMT is interesting and if so,
1535 records it. */
1537 static void
1538 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1540 tree *var_p, *bound_p;
1541 struct iv *var_iv, *civ;
1543 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1545 find_interesting_uses_op (data, *var_p);
1546 find_interesting_uses_op (data, *bound_p);
1547 return;
1550 civ = XNEW (struct iv);
1551 *civ = *var_iv;
1552 record_use (data, NULL, civ, stmt, USE_COMPARE);
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556 relative to the loop LOOP, i.e. if all its operands are defined
1557 outside of the returned loop. Returns NULL if EXPR is not
1558 even obviously invariant in LOOP. */
1560 struct loop *
1561 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1563 basic_block def_bb;
1564 unsigned i, len;
1566 if (is_gimple_min_invariant (expr))
1567 return current_loops->tree_root;
1569 if (TREE_CODE (expr) == SSA_NAME)
1571 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1572 if (def_bb)
1574 if (flow_bb_inside_loop_p (loop, def_bb))
1575 return NULL;
1576 return superloop_at_depth (loop,
1577 loop_depth (def_bb->loop_father) + 1);
1580 return current_loops->tree_root;
1583 if (!EXPR_P (expr))
1584 return NULL;
1586 unsigned maxdepth = 0;
1587 len = TREE_OPERAND_LENGTH (expr);
1588 for (i = 0; i < len; i++)
1590 struct loop *ivloop;
1591 if (!TREE_OPERAND (expr, i))
1592 continue;
1594 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1595 if (!ivloop)
1596 return NULL;
1597 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1600 return superloop_at_depth (loop, maxdepth);
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604 i.e. if all its operands are defined outside of the LOOP. LOOP
1605 should not be the function body. */
1607 bool
1608 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1610 basic_block def_bb;
1611 unsigned i, len;
1613 gcc_assert (loop_depth (loop) > 0);
1615 if (is_gimple_min_invariant (expr))
1616 return true;
1618 if (TREE_CODE (expr) == SSA_NAME)
1620 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1621 if (def_bb
1622 && flow_bb_inside_loop_p (loop, def_bb))
1623 return false;
1625 return true;
1628 if (!EXPR_P (expr))
1629 return false;
1631 len = TREE_OPERAND_LENGTH (expr);
1632 for (i = 0; i < len; i++)
1633 if (TREE_OPERAND (expr, i)
1634 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1635 return false;
1637 return true;
1640 /* Cumulates the steps of indices into DATA and replaces their values with the
1641 initial ones. Returns false when the value of the index cannot be determined.
1642 Callback for for_each_index. */
1644 struct ifs_ivopts_data
1646 struct ivopts_data *ivopts_data;
1647 gimple stmt;
1648 tree step;
1651 static bool
1652 idx_find_step (tree base, tree *idx, void *data)
1654 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1655 struct iv *iv;
1656 bool use_overflow_semantics = false;
1657 tree step, iv_base, iv_step, lbound, off;
1658 struct loop *loop = dta->ivopts_data->current_loop;
1660 /* If base is a component ref, require that the offset of the reference
1661 be invariant. */
1662 if (TREE_CODE (base) == COMPONENT_REF)
1664 off = component_ref_field_offset (base);
1665 return expr_invariant_in_loop_p (loop, off);
1668 /* If base is array, first check whether we will be able to move the
1669 reference out of the loop (in order to take its address in strength
1670 reduction). In order for this to work we need both lower bound
1671 and step to be loop invariants. */
1672 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1674 /* Moreover, for a range, the size needs to be invariant as well. */
1675 if (TREE_CODE (base) == ARRAY_RANGE_REF
1676 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1677 return false;
1679 step = array_ref_element_size (base);
1680 lbound = array_ref_low_bound (base);
1682 if (!expr_invariant_in_loop_p (loop, step)
1683 || !expr_invariant_in_loop_p (loop, lbound))
1684 return false;
1687 if (TREE_CODE (*idx) != SSA_NAME)
1688 return true;
1690 iv = get_iv (dta->ivopts_data, *idx);
1691 if (!iv)
1692 return false;
1694 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1695 *&x[0], which is not folded and does not trigger the
1696 ARRAY_REF path below. */
1697 *idx = iv->base;
1699 if (integer_zerop (iv->step))
1700 return true;
1702 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1704 step = array_ref_element_size (base);
1706 /* We only handle addresses whose step is an integer constant. */
1707 if (TREE_CODE (step) != INTEGER_CST)
1708 return false;
1710 else
1711 /* The step for pointer arithmetics already is 1 byte. */
1712 step = size_one_node;
1714 iv_base = iv->base;
1715 iv_step = iv->step;
1716 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1717 use_overflow_semantics = true;
1719 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1720 sizetype, &iv_base, &iv_step, dta->stmt,
1721 use_overflow_semantics))
1723 /* The index might wrap. */
1724 return false;
1727 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1728 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1730 return true;
1733 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1734 object is passed to it in DATA. */
1736 static bool
1737 idx_record_use (tree base, tree *idx,
1738 void *vdata)
1740 struct ivopts_data *data = (struct ivopts_data *) vdata;
1741 find_interesting_uses_op (data, *idx);
1742 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1744 find_interesting_uses_op (data, array_ref_element_size (base));
1745 find_interesting_uses_op (data, array_ref_low_bound (base));
1747 return true;
1750 /* If we can prove that TOP = cst * BOT for some constant cst,
1751 store cst to MUL and return true. Otherwise return false.
1752 The returned value is always sign-extended, regardless of the
1753 signedness of TOP and BOT. */
1755 static bool
1756 constant_multiple_of (tree top, tree bot, widest_int *mul)
1758 tree mby;
1759 enum tree_code code;
1760 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1761 widest_int res, p0, p1;
1763 STRIP_NOPS (top);
1764 STRIP_NOPS (bot);
1766 if (operand_equal_p (top, bot, 0))
1768 *mul = 1;
1769 return true;
1772 code = TREE_CODE (top);
1773 switch (code)
1775 case MULT_EXPR:
1776 mby = TREE_OPERAND (top, 1);
1777 if (TREE_CODE (mby) != INTEGER_CST)
1778 return false;
1780 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1781 return false;
1783 *mul = wi::sext (res * wi::to_widest (mby), precision);
1784 return true;
1786 case PLUS_EXPR:
1787 case MINUS_EXPR:
1788 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1789 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1790 return false;
1792 if (code == MINUS_EXPR)
1793 p1 = -p1;
1794 *mul = wi::sext (p0 + p1, precision);
1795 return true;
1797 case INTEGER_CST:
1798 if (TREE_CODE (bot) != INTEGER_CST)
1799 return false;
1801 p0 = widest_int::from (top, SIGNED);
1802 p1 = widest_int::from (bot, SIGNED);
1803 if (p1 == 0)
1804 return false;
1805 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1806 return res == 0;
1808 default:
1809 return false;
1813 /* Return true if memory reference REF with step STEP may be unaligned. */
1815 static bool
1816 may_be_unaligned_p (tree ref, tree step)
1818 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1819 thus they are not misaligned. */
1820 if (TREE_CODE (ref) == TARGET_MEM_REF)
1821 return false;
1823 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1824 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1825 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1827 unsigned HOST_WIDE_INT bitpos;
1828 unsigned int ref_align;
1829 get_object_alignment_1 (ref, &ref_align, &bitpos);
1830 if (ref_align < align
1831 || (bitpos % align) != 0
1832 || (bitpos % BITS_PER_UNIT) != 0)
1833 return true;
1835 unsigned int trailing_zeros = tree_ctz (step);
1836 if (trailing_zeros < HOST_BITS_PER_INT
1837 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1838 return true;
1840 return false;
1843 /* Return true if EXPR may be non-addressable. */
1845 bool
1846 may_be_nonaddressable_p (tree expr)
1848 switch (TREE_CODE (expr))
1850 case TARGET_MEM_REF:
1851 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1852 target, thus they are always addressable. */
1853 return false;
1855 case COMPONENT_REF:
1856 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1857 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1859 case VIEW_CONVERT_EXPR:
1860 /* This kind of view-conversions may wrap non-addressable objects
1861 and make them look addressable. After some processing the
1862 non-addressability may be uncovered again, causing ADDR_EXPRs
1863 of inappropriate objects to be built. */
1864 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1865 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1866 return true;
1868 /* ... fall through ... */
1870 case ARRAY_REF:
1871 case ARRAY_RANGE_REF:
1872 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1874 CASE_CONVERT:
1875 return true;
1877 default:
1878 break;
1881 return false;
1884 static tree
1885 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1887 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1888 If there is an existing use which has same stripped iv base and step,
1889 this function records this one as a sub use to that; otherwise records
1890 it as a normal one. */
1892 static struct iv_use *
1893 record_group_use (struct ivopts_data *data, tree *use_p,
1894 struct iv *iv, gimple stmt, enum use_type use_type)
1896 unsigned int i;
1897 struct iv_use *use;
1898 tree addr_base;
1899 unsigned HOST_WIDE_INT addr_offset;
1901 /* Only support sub use for address type uses, that is, with base
1902 object. */
1903 if (!iv->base_object)
1904 return record_use (data, use_p, iv, stmt, use_type);
1906 addr_base = strip_offset (iv->base, &addr_offset);
1907 for (i = 0; i < n_iv_uses (data); i++)
1909 use = iv_use (data, i);
1910 if (use->type != USE_ADDRESS || !use->iv->base_object)
1911 continue;
1913 /* Check if it has the same stripped base and step. */
1914 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1915 && operand_equal_p (iv->step, use->iv->step, 0)
1916 && operand_equal_p (addr_base, use->addr_base, 0))
1917 break;
1920 if (i == n_iv_uses (data))
1921 return record_use (data, use_p, iv, stmt,
1922 use_type, addr_base, addr_offset);
1923 else
1924 return record_sub_use (data, use_p, iv, stmt,
1925 use_type, addr_base, addr_offset, i);
1928 /* Finds addresses in *OP_P inside STMT. */
1930 static void
1931 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1933 tree base = *op_p, step = size_zero_node;
1934 struct iv *civ;
1935 struct ifs_ivopts_data ifs_ivopts_data;
1937 /* Do not play with volatile memory references. A bit too conservative,
1938 perhaps, but safe. */
1939 if (gimple_has_volatile_ops (stmt))
1940 goto fail;
1942 /* Ignore bitfields for now. Not really something terribly complicated
1943 to handle. TODO. */
1944 if (TREE_CODE (base) == BIT_FIELD_REF)
1945 goto fail;
1947 base = unshare_expr (base);
1949 if (TREE_CODE (base) == TARGET_MEM_REF)
1951 tree type = build_pointer_type (TREE_TYPE (base));
1952 tree astep;
1954 if (TMR_BASE (base)
1955 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1957 civ = get_iv (data, TMR_BASE (base));
1958 if (!civ)
1959 goto fail;
1961 TMR_BASE (base) = civ->base;
1962 step = civ->step;
1964 if (TMR_INDEX2 (base)
1965 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1967 civ = get_iv (data, TMR_INDEX2 (base));
1968 if (!civ)
1969 goto fail;
1971 TMR_INDEX2 (base) = civ->base;
1972 step = civ->step;
1974 if (TMR_INDEX (base)
1975 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1977 civ = get_iv (data, TMR_INDEX (base));
1978 if (!civ)
1979 goto fail;
1981 TMR_INDEX (base) = civ->base;
1982 astep = civ->step;
1984 if (astep)
1986 if (TMR_STEP (base))
1987 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1989 step = fold_build2 (PLUS_EXPR, type, step, astep);
1993 if (integer_zerop (step))
1994 goto fail;
1995 base = tree_mem_ref_addr (type, base);
1997 else
1999 ifs_ivopts_data.ivopts_data = data;
2000 ifs_ivopts_data.stmt = stmt;
2001 ifs_ivopts_data.step = size_zero_node;
2002 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2003 || integer_zerop (ifs_ivopts_data.step))
2004 goto fail;
2005 step = ifs_ivopts_data.step;
2007 /* Check that the base expression is addressable. This needs
2008 to be done after substituting bases of IVs into it. */
2009 if (may_be_nonaddressable_p (base))
2010 goto fail;
2012 /* Moreover, on strict alignment platforms, check that it is
2013 sufficiently aligned. */
2014 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2015 goto fail;
2017 base = build_fold_addr_expr (base);
2019 /* Substituting bases of IVs into the base expression might
2020 have caused folding opportunities. */
2021 if (TREE_CODE (base) == ADDR_EXPR)
2023 tree *ref = &TREE_OPERAND (base, 0);
2024 while (handled_component_p (*ref))
2025 ref = &TREE_OPERAND (*ref, 0);
2026 if (TREE_CODE (*ref) == MEM_REF)
2028 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2029 TREE_OPERAND (*ref, 0),
2030 TREE_OPERAND (*ref, 1));
2031 if (tem)
2032 *ref = tem;
2037 civ = alloc_iv (base, step);
2038 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2039 return;
2041 fail:
2042 for_each_index (op_p, idx_record_use, data);
2045 /* Finds and records invariants used in STMT. */
2047 static void
2048 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2050 ssa_op_iter iter;
2051 use_operand_p use_p;
2052 tree op;
2054 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2056 op = USE_FROM_PTR (use_p);
2057 record_invariant (data, op, false);
2061 /* Finds interesting uses of induction variables in the statement STMT. */
2063 static void
2064 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2066 struct iv *iv;
2067 tree op, *lhs, *rhs;
2068 ssa_op_iter iter;
2069 use_operand_p use_p;
2070 enum tree_code code;
2072 find_invariants_stmt (data, stmt);
2074 if (gimple_code (stmt) == GIMPLE_COND)
2076 find_interesting_uses_cond (data, stmt);
2077 return;
2080 if (is_gimple_assign (stmt))
2082 lhs = gimple_assign_lhs_ptr (stmt);
2083 rhs = gimple_assign_rhs1_ptr (stmt);
2085 if (TREE_CODE (*lhs) == SSA_NAME)
2087 /* If the statement defines an induction variable, the uses are not
2088 interesting by themselves. */
2090 iv = get_iv (data, *lhs);
2092 if (iv && !integer_zerop (iv->step))
2093 return;
2096 code = gimple_assign_rhs_code (stmt);
2097 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2098 && (REFERENCE_CLASS_P (*rhs)
2099 || is_gimple_val (*rhs)))
2101 if (REFERENCE_CLASS_P (*rhs))
2102 find_interesting_uses_address (data, stmt, rhs);
2103 else
2104 find_interesting_uses_op (data, *rhs);
2106 if (REFERENCE_CLASS_P (*lhs))
2107 find_interesting_uses_address (data, stmt, lhs);
2108 return;
2110 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2112 find_interesting_uses_cond (data, stmt);
2113 return;
2116 /* TODO -- we should also handle address uses of type
2118 memory = call (whatever);
2122 call (memory). */
2125 if (gimple_code (stmt) == GIMPLE_PHI
2126 && gimple_bb (stmt) == data->current_loop->header)
2128 iv = get_iv (data, PHI_RESULT (stmt));
2130 if (iv && !integer_zerop (iv->step))
2131 return;
2134 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2136 op = USE_FROM_PTR (use_p);
2138 if (TREE_CODE (op) != SSA_NAME)
2139 continue;
2141 iv = get_iv (data, op);
2142 if (!iv)
2143 continue;
2145 find_interesting_uses_op (data, op);
2149 /* Finds interesting uses of induction variables outside of loops
2150 on loop exit edge EXIT. */
2152 static void
2153 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2155 gphi *phi;
2156 gphi_iterator psi;
2157 tree def;
2159 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2161 phi = psi.phi ();
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2163 if (!virtual_operand_p (def))
2164 find_interesting_uses_op (data, def);
2168 /* Finds uses of the induction variables that are interesting. */
2170 static void
2171 find_interesting_uses (struct ivopts_data *data)
2173 basic_block bb;
2174 gimple_stmt_iterator bsi;
2175 basic_block *body = get_loop_body (data->current_loop);
2176 unsigned i;
2177 struct version_info *info;
2178 edge e;
2180 if (dump_file && (dump_flags & TDF_DETAILS))
2181 fprintf (dump_file, "Uses:\n\n");
2183 for (i = 0; i < data->current_loop->num_nodes; i++)
2185 edge_iterator ei;
2186 bb = body[i];
2188 FOR_EACH_EDGE (e, ei, bb->succs)
2189 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2190 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2191 find_interesting_uses_outside (data, e);
2193 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2194 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2195 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2196 if (!is_gimple_debug (gsi_stmt (bsi)))
2197 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2200 if (dump_file && (dump_flags & TDF_DETAILS))
2202 bitmap_iterator bi;
2204 fprintf (dump_file, "\n");
2206 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2208 info = ver_info (data, i);
2209 if (info->inv_id)
2211 fprintf (dump_file, " ");
2212 print_generic_expr (dump_file, info->name, TDF_SLIM);
2213 fprintf (dump_file, " is invariant (%d)%s\n",
2214 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2218 fprintf (dump_file, "\n");
2221 free (body);
2224 /* Compute maximum offset of [base + offset] addressing mode
2225 for memory reference represented by USE. */
2227 static HOST_WIDE_INT
2228 compute_max_addr_offset (struct iv_use *use)
2230 int width;
2231 rtx reg, addr;
2232 HOST_WIDE_INT i, off;
2233 unsigned list_index, num;
2234 addr_space_t as;
2235 machine_mode mem_mode, addr_mode;
2236 static vec<HOST_WIDE_INT> max_offset_list;
2238 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2239 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2241 num = max_offset_list.length ();
2242 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2243 if (list_index >= num)
2245 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2246 for (; num < max_offset_list.length (); num++)
2247 max_offset_list[num] = -1;
2250 off = max_offset_list[list_index];
2251 if (off != -1)
2252 return off;
2254 addr_mode = targetm.addr_space.address_mode (as);
2255 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2256 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2258 width = GET_MODE_BITSIZE (addr_mode) - 1;
2259 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2260 width = HOST_BITS_PER_WIDE_INT - 1;
2262 for (i = width; i > 0; i--)
2264 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2265 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2266 if (memory_address_addr_space_p (mem_mode, addr, as))
2267 break;
2269 /* For some strict-alignment targets, the offset must be naturally
2270 aligned. Try an aligned offset if mem_mode is not QImode. */
2271 off = ((unsigned HOST_WIDE_INT) 1 << i);
2272 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2274 off -= GET_MODE_SIZE (mem_mode);
2275 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2276 if (memory_address_addr_space_p (mem_mode, addr, as))
2277 break;
2280 if (i == 0)
2281 off = 0;
2283 max_offset_list[list_index] = off;
2284 return off;
2287 /* Check if all small groups should be split. Return true if and
2288 only if:
2290 1) At least one groups contain two uses with different offsets.
2291 2) No group contains more than two uses with different offsets.
2293 Return false otherwise. We want to split such groups because:
2295 1) Small groups don't have much benefit and may interfer with
2296 general candidate selection.
2297 2) Size for problem with only small groups is usually small and
2298 general algorithm can handle it well.
2300 TODO -- Above claim may not hold when auto increment is supported. */
2302 static bool
2303 split_all_small_groups (struct ivopts_data *data)
2305 bool split_p = false;
2306 unsigned int i, n, distinct;
2307 struct iv_use *pre, *use;
2309 n = n_iv_uses (data);
2310 for (i = 0; i < n; i++)
2312 use = iv_use (data, i);
2313 if (!use->next)
2314 continue;
2316 distinct = 1;
2317 gcc_assert (use->type == USE_ADDRESS);
2318 for (pre = use, use = use->next; use; pre = use, use = use->next)
2320 if (pre->addr_offset != use->addr_offset)
2321 distinct++;
2323 if (distinct > 2)
2324 return false;
2326 if (distinct == 2)
2327 split_p = true;
2330 return split_p;
2333 /* For each group of address type uses, this function further groups
2334 these uses according to the maximum offset supported by target's
2335 [base + offset] addressing mode. */
2337 static void
2338 group_address_uses (struct ivopts_data *data)
2340 HOST_WIDE_INT max_offset = -1;
2341 unsigned int i, n, sub_id;
2342 struct iv_use *pre, *use;
2343 unsigned HOST_WIDE_INT addr_offset_first;
2345 /* Reset max offset to split all small groups. */
2346 if (split_all_small_groups (data))
2347 max_offset = 0;
2349 n = n_iv_uses (data);
2350 for (i = 0; i < n; i++)
2352 use = iv_use (data, i);
2353 if (!use->next)
2354 continue;
2356 gcc_assert (use->type == USE_ADDRESS);
2357 if (max_offset != 0)
2358 max_offset = compute_max_addr_offset (use);
2360 while (use)
2362 sub_id = 0;
2363 addr_offset_first = use->addr_offset;
2364 /* Only uses with offset that can fit in offset part against
2365 the first use can be grouped together. */
2366 for (pre = use, use = use->next;
2367 use && (use->addr_offset - addr_offset_first
2368 <= (unsigned HOST_WIDE_INT) max_offset);
2369 pre = use, use = use->next)
2371 use->id = pre->id;
2372 use->sub_id = ++sub_id;
2375 /* Break the list and create new group. */
2376 if (use)
2378 pre->next = NULL;
2379 use->id = n_iv_uses (data);
2380 use->related_cands = BITMAP_ALLOC (NULL);
2381 data->iv_uses.safe_push (use);
2386 if (dump_file && (dump_flags & TDF_DETAILS))
2387 dump_uses (dump_file, data);
2390 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2391 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2392 we are at the top-level of the processed address. */
2394 static tree
2395 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2396 HOST_WIDE_INT *offset)
2398 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2399 enum tree_code code;
2400 tree type, orig_type = TREE_TYPE (expr);
2401 HOST_WIDE_INT off0, off1, st;
2402 tree orig_expr = expr;
2404 STRIP_NOPS (expr);
2406 type = TREE_TYPE (expr);
2407 code = TREE_CODE (expr);
2408 *offset = 0;
2410 switch (code)
2412 case INTEGER_CST:
2413 if (!cst_and_fits_in_hwi (expr)
2414 || integer_zerop (expr))
2415 return orig_expr;
2417 *offset = int_cst_value (expr);
2418 return build_int_cst (orig_type, 0);
2420 case POINTER_PLUS_EXPR:
2421 case PLUS_EXPR:
2422 case MINUS_EXPR:
2423 op0 = TREE_OPERAND (expr, 0);
2424 op1 = TREE_OPERAND (expr, 1);
2426 op0 = strip_offset_1 (op0, false, false, &off0);
2427 op1 = strip_offset_1 (op1, false, false, &off1);
2429 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2430 if (op0 == TREE_OPERAND (expr, 0)
2431 && op1 == TREE_OPERAND (expr, 1))
2432 return orig_expr;
2434 if (integer_zerop (op1))
2435 expr = op0;
2436 else if (integer_zerop (op0))
2438 if (code == MINUS_EXPR)
2439 expr = fold_build1 (NEGATE_EXPR, type, op1);
2440 else
2441 expr = op1;
2443 else
2444 expr = fold_build2 (code, type, op0, op1);
2446 return fold_convert (orig_type, expr);
2448 case MULT_EXPR:
2449 op1 = TREE_OPERAND (expr, 1);
2450 if (!cst_and_fits_in_hwi (op1))
2451 return orig_expr;
2453 op0 = TREE_OPERAND (expr, 0);
2454 op0 = strip_offset_1 (op0, false, false, &off0);
2455 if (op0 == TREE_OPERAND (expr, 0))
2456 return orig_expr;
2458 *offset = off0 * int_cst_value (op1);
2459 if (integer_zerop (op0))
2460 expr = op0;
2461 else
2462 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2464 return fold_convert (orig_type, expr);
2466 case ARRAY_REF:
2467 case ARRAY_RANGE_REF:
2468 if (!inside_addr)
2469 return orig_expr;
2471 step = array_ref_element_size (expr);
2472 if (!cst_and_fits_in_hwi (step))
2473 break;
2475 st = int_cst_value (step);
2476 op1 = TREE_OPERAND (expr, 1);
2477 op1 = strip_offset_1 (op1, false, false, &off1);
2478 *offset = off1 * st;
2480 if (top_compref
2481 && integer_zerop (op1))
2483 /* Strip the component reference completely. */
2484 op0 = TREE_OPERAND (expr, 0);
2485 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2486 *offset += off0;
2487 return op0;
2489 break;
2491 case COMPONENT_REF:
2493 tree field;
2495 if (!inside_addr)
2496 return orig_expr;
2498 tmp = component_ref_field_offset (expr);
2499 field = TREE_OPERAND (expr, 1);
2500 if (top_compref
2501 && cst_and_fits_in_hwi (tmp)
2502 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2504 HOST_WIDE_INT boffset, abs_off;
2506 /* Strip the component reference completely. */
2507 op0 = TREE_OPERAND (expr, 0);
2508 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2509 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2510 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2511 if (boffset < 0)
2512 abs_off = -abs_off;
2514 *offset = off0 + int_cst_value (tmp) + abs_off;
2515 return op0;
2518 break;
2520 case ADDR_EXPR:
2521 op0 = TREE_OPERAND (expr, 0);
2522 op0 = strip_offset_1 (op0, true, true, &off0);
2523 *offset += off0;
2525 if (op0 == TREE_OPERAND (expr, 0))
2526 return orig_expr;
2528 expr = build_fold_addr_expr (op0);
2529 return fold_convert (orig_type, expr);
2531 case MEM_REF:
2532 /* ??? Offset operand? */
2533 inside_addr = false;
2534 break;
2536 default:
2537 return orig_expr;
2540 /* Default handling of expressions for that we want to recurse into
2541 the first operand. */
2542 op0 = TREE_OPERAND (expr, 0);
2543 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2544 *offset += off0;
2546 if (op0 == TREE_OPERAND (expr, 0)
2547 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2548 return orig_expr;
2550 expr = copy_node (expr);
2551 TREE_OPERAND (expr, 0) = op0;
2552 if (op1)
2553 TREE_OPERAND (expr, 1) = op1;
2555 /* Inside address, we might strip the top level component references,
2556 thus changing type of the expression. Handling of ADDR_EXPR
2557 will fix that. */
2558 expr = fold_convert (orig_type, expr);
2560 return expr;
2563 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2565 static tree
2566 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2568 HOST_WIDE_INT off;
2569 tree core = strip_offset_1 (expr, false, false, &off);
2570 *offset = off;
2571 return core;
2574 /* Returns variant of TYPE that can be used as base for different uses.
2575 We return unsigned type with the same precision, which avoids problems
2576 with overflows. */
2578 static tree
2579 generic_type_for (tree type)
2581 if (POINTER_TYPE_P (type))
2582 return unsigned_type_for (type);
2584 if (TYPE_UNSIGNED (type))
2585 return type;
2587 return unsigned_type_for (type);
2590 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2591 the bitmap to that we should store it. */
2593 static struct ivopts_data *fd_ivopts_data;
2594 static tree
2595 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2597 bitmap *depends_on = (bitmap *) data;
2598 struct version_info *info;
2600 if (TREE_CODE (*expr_p) != SSA_NAME)
2601 return NULL_TREE;
2602 info = name_info (fd_ivopts_data, *expr_p);
2604 if (!info->inv_id || info->has_nonlin_use)
2605 return NULL_TREE;
2607 if (!*depends_on)
2608 *depends_on = BITMAP_ALLOC (NULL);
2609 bitmap_set_bit (*depends_on, info->inv_id);
2611 return NULL_TREE;
2614 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2615 position to POS. If USE is not NULL, the candidate is set as related to
2616 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2617 replacement of the final value of the iv by a direct computation. */
2619 static struct iv_cand *
2620 add_candidate_1 (struct ivopts_data *data,
2621 tree base, tree step, bool important, enum iv_position pos,
2622 struct iv_use *use, gimple incremented_at)
2624 unsigned i;
2625 struct iv_cand *cand = NULL;
2626 tree type, orig_type;
2628 /* For non-original variables, make sure their values are computed in a type
2629 that does not invoke undefined behavior on overflows (since in general,
2630 we cannot prove that these induction variables are non-wrapping). */
2631 if (pos != IP_ORIGINAL)
2633 orig_type = TREE_TYPE (base);
2634 type = generic_type_for (orig_type);
2635 if (type != orig_type)
2637 base = fold_convert (type, base);
2638 step = fold_convert (type, step);
2642 for (i = 0; i < n_iv_cands (data); i++)
2644 cand = iv_cand (data, i);
2646 if (cand->pos != pos)
2647 continue;
2649 if (cand->incremented_at != incremented_at
2650 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2651 && cand->ainc_use != use))
2652 continue;
2654 if (!cand->iv)
2656 if (!base && !step)
2657 break;
2659 continue;
2662 if (!base && !step)
2663 continue;
2665 if (operand_equal_p (base, cand->iv->base, 0)
2666 && operand_equal_p (step, cand->iv->step, 0)
2667 && (TYPE_PRECISION (TREE_TYPE (base))
2668 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2669 break;
2672 if (i == n_iv_cands (data))
2674 cand = XCNEW (struct iv_cand);
2675 cand->id = i;
2677 if (!base && !step)
2678 cand->iv = NULL;
2679 else
2680 cand->iv = alloc_iv (base, step);
2682 cand->pos = pos;
2683 if (pos != IP_ORIGINAL && cand->iv)
2685 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2686 cand->var_after = cand->var_before;
2688 cand->important = important;
2689 cand->incremented_at = incremented_at;
2690 data->iv_candidates.safe_push (cand);
2692 if (step
2693 && TREE_CODE (step) != INTEGER_CST)
2695 fd_ivopts_data = data;
2696 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2699 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2700 cand->ainc_use = use;
2701 else
2702 cand->ainc_use = NULL;
2704 if (dump_file && (dump_flags & TDF_DETAILS))
2705 dump_cand (dump_file, cand);
2708 if (important && !cand->important)
2710 cand->important = true;
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2712 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2715 if (use)
2717 bitmap_set_bit (use->related_cands, i);
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2719 fprintf (dump_file, "Candidate %d is related to use %d\n",
2720 cand->id, use->id);
2723 return cand;
2726 /* Returns true if incrementing the induction variable at the end of the LOOP
2727 is allowed.
2729 The purpose is to avoid splitting latch edge with a biv increment, thus
2730 creating a jump, possibly confusing other optimization passes and leaving
2731 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2732 is not available (so we do not have a better alternative), or if the latch
2733 edge is already nonempty. */
2735 static bool
2736 allow_ip_end_pos_p (struct loop *loop)
2738 if (!ip_normal_pos (loop))
2739 return true;
2741 if (!empty_block_p (ip_end_pos (loop)))
2742 return true;
2744 return false;
2747 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2748 Important field is set to IMPORTANT. */
2750 static void
2751 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2752 bool important, struct iv_use *use)
2754 basic_block use_bb = gimple_bb (use->stmt);
2755 machine_mode mem_mode;
2756 unsigned HOST_WIDE_INT cstepi;
2758 /* If we insert the increment in any position other than the standard
2759 ones, we must ensure that it is incremented once per iteration.
2760 It must not be in an inner nested loop, or one side of an if
2761 statement. */
2762 if (use_bb->loop_father != data->current_loop
2763 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2764 || stmt_could_throw_p (use->stmt)
2765 || !cst_and_fits_in_hwi (step))
2766 return;
2768 cstepi = int_cst_value (step);
2770 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2771 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2772 || USE_STORE_PRE_INCREMENT (mem_mode))
2773 && GET_MODE_SIZE (mem_mode) == cstepi)
2774 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2775 || USE_STORE_PRE_DECREMENT (mem_mode))
2776 && GET_MODE_SIZE (mem_mode) == -cstepi))
2778 enum tree_code code = MINUS_EXPR;
2779 tree new_base;
2780 tree new_step = step;
2782 if (POINTER_TYPE_P (TREE_TYPE (base)))
2784 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2785 code = POINTER_PLUS_EXPR;
2787 else
2788 new_step = fold_convert (TREE_TYPE (base), new_step);
2789 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2790 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2791 use->stmt);
2793 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2794 || USE_STORE_POST_INCREMENT (mem_mode))
2795 && GET_MODE_SIZE (mem_mode) == cstepi)
2796 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2797 || USE_STORE_POST_DECREMENT (mem_mode))
2798 && GET_MODE_SIZE (mem_mode) == -cstepi))
2800 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2801 use->stmt);
2805 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2806 position to POS. If USE is not NULL, the candidate is set as related to
2807 it. The candidate computation is scheduled on all available positions. */
2809 static void
2810 add_candidate (struct ivopts_data *data,
2811 tree base, tree step, bool important, struct iv_use *use)
2813 gcc_assert (use == NULL || use->sub_id == 0);
2815 if (ip_normal_pos (data->current_loop))
2816 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2817 if (ip_end_pos (data->current_loop)
2818 && allow_ip_end_pos_p (data->current_loop))
2819 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2821 if (use != NULL && use->type == USE_ADDRESS)
2822 add_autoinc_candidates (data, base, step, important, use);
2825 /* Adds standard iv candidates. */
2827 static void
2828 add_standard_iv_candidates (struct ivopts_data *data)
2830 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2832 /* The same for a double-integer type if it is still fast enough. */
2833 if (TYPE_PRECISION
2834 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2835 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2836 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2837 build_int_cst (long_integer_type_node, 1), true, NULL);
2839 /* The same for a double-integer type if it is still fast enough. */
2840 if (TYPE_PRECISION
2841 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2842 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2843 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2844 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2848 /* Adds candidates bases on the old induction variable IV. */
2850 static void
2851 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2853 gimple phi;
2854 tree def;
2855 struct iv_cand *cand;
2857 add_candidate (data, iv->base, iv->step, true, NULL);
2859 /* The same, but with initial value zero. */
2860 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2861 add_candidate (data, size_int (0), iv->step, true, NULL);
2862 else
2863 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2864 iv->step, true, NULL);
2866 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2867 if (gimple_code (phi) == GIMPLE_PHI)
2869 /* Additionally record the possibility of leaving the original iv
2870 untouched. */
2871 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2872 /* Don't add candidate if it's from another PHI node because
2873 it's an affine iv appearing in the form of PEELED_CHREC. */
2874 phi = SSA_NAME_DEF_STMT (def);
2875 if (gimple_code (phi) != GIMPLE_PHI)
2877 cand = add_candidate_1 (data,
2878 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2879 SSA_NAME_DEF_STMT (def));
2880 cand->var_before = iv->ssa_name;
2881 cand->var_after = def;
2883 else
2884 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2888 /* Adds candidates based on the old induction variables. */
2890 static void
2891 add_old_ivs_candidates (struct ivopts_data *data)
2893 unsigned i;
2894 struct iv *iv;
2895 bitmap_iterator bi;
2897 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2899 iv = ver_info (data, i)->iv;
2900 if (iv && iv->biv_p && !integer_zerop (iv->step))
2901 add_old_iv_candidates (data, iv);
2905 /* Adds candidates based on the value of the induction variable IV and USE. */
2907 static void
2908 add_iv_value_candidates (struct ivopts_data *data,
2909 struct iv *iv, struct iv_use *use)
2911 unsigned HOST_WIDE_INT offset;
2912 tree base;
2913 tree basetype;
2915 add_candidate (data, iv->base, iv->step, false, use);
2917 /* The same, but with initial value zero. Make such variable important,
2918 since it is generic enough so that possibly many uses may be based
2919 on it. */
2920 basetype = TREE_TYPE (iv->base);
2921 if (POINTER_TYPE_P (basetype))
2922 basetype = sizetype;
2923 add_candidate (data, build_int_cst (basetype, 0),
2924 iv->step, true, use);
2926 /* Third, try removing the constant offset. Make sure to even
2927 add a candidate for &a[0] vs. (T *)&a. */
2928 base = strip_offset (iv->base, &offset);
2929 if (offset
2930 || base != iv->base)
2931 add_candidate (data, base, iv->step, false, use);
2934 /* Adds candidates based on the uses. */
2936 static void
2937 add_derived_ivs_candidates (struct ivopts_data *data)
2939 unsigned i;
2941 for (i = 0; i < n_iv_uses (data); i++)
2943 struct iv_use *use = iv_use (data, i);
2945 if (!use)
2946 continue;
2948 switch (use->type)
2950 case USE_NONLINEAR_EXPR:
2951 case USE_COMPARE:
2952 case USE_ADDRESS:
2953 /* Just add the ivs based on the value of the iv used here. */
2954 add_iv_value_candidates (data, use->iv, use);
2955 break;
2957 default:
2958 gcc_unreachable ();
2963 /* Record important candidates and add them to related_cands bitmaps
2964 if needed. */
2966 static void
2967 record_important_candidates (struct ivopts_data *data)
2969 unsigned i;
2970 struct iv_use *use;
2972 for (i = 0; i < n_iv_cands (data); i++)
2974 struct iv_cand *cand = iv_cand (data, i);
2976 if (cand->important)
2977 bitmap_set_bit (data->important_candidates, i);
2980 data->consider_all_candidates = (n_iv_cands (data)
2981 <= CONSIDER_ALL_CANDIDATES_BOUND);
2983 if (data->consider_all_candidates)
2985 /* We will not need "related_cands" bitmaps in this case,
2986 so release them to decrease peak memory consumption. */
2987 for (i = 0; i < n_iv_uses (data); i++)
2989 use = iv_use (data, i);
2990 BITMAP_FREE (use->related_cands);
2993 else
2995 /* Add important candidates to the related_cands bitmaps. */
2996 for (i = 0; i < n_iv_uses (data); i++)
2997 bitmap_ior_into (iv_use (data, i)->related_cands,
2998 data->important_candidates);
3002 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3003 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3004 we allocate a simple list to every use. */
3006 static void
3007 alloc_use_cost_map (struct ivopts_data *data)
3009 unsigned i, size, s;
3011 for (i = 0; i < n_iv_uses (data); i++)
3013 struct iv_use *use = iv_use (data, i);
3015 if (data->consider_all_candidates)
3016 size = n_iv_cands (data);
3017 else
3019 s = bitmap_count_bits (use->related_cands);
3021 /* Round up to the power of two, so that moduling by it is fast. */
3022 size = s ? (1 << ceil_log2 (s)) : 1;
3025 use->n_map_members = size;
3026 use->cost_map = XCNEWVEC (struct cost_pair, size);
3030 /* Returns description of computation cost of expression whose runtime
3031 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3033 static comp_cost
3034 new_cost (unsigned runtime, unsigned complexity)
3036 comp_cost cost;
3038 cost.cost = runtime;
3039 cost.complexity = complexity;
3041 return cost;
3044 /* Returns true if COST is infinite. */
3046 static bool
3047 infinite_cost_p (comp_cost cost)
3049 return cost.cost == INFTY;
3052 /* Adds costs COST1 and COST2. */
3054 static comp_cost
3055 add_costs (comp_cost cost1, comp_cost cost2)
3057 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3058 return infinite_cost;
3060 cost1.cost += cost2.cost;
3061 cost1.complexity += cost2.complexity;
3063 return cost1;
3065 /* Subtracts costs COST1 and COST2. */
3067 static comp_cost
3068 sub_costs (comp_cost cost1, comp_cost cost2)
3070 cost1.cost -= cost2.cost;
3071 cost1.complexity -= cost2.complexity;
3073 return cost1;
3076 /* Returns a negative number if COST1 < COST2, a positive number if
3077 COST1 > COST2, and 0 if COST1 = COST2. */
3079 static int
3080 compare_costs (comp_cost cost1, comp_cost cost2)
3082 if (cost1.cost == cost2.cost)
3083 return cost1.complexity - cost2.complexity;
3085 return cost1.cost - cost2.cost;
3088 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3089 on invariants DEPENDS_ON and that the value used in expressing it
3090 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3092 static void
3093 set_use_iv_cost (struct ivopts_data *data,
3094 struct iv_use *use, struct iv_cand *cand,
3095 comp_cost cost, bitmap depends_on, tree value,
3096 enum tree_code comp, int inv_expr_id)
3098 unsigned i, s;
3100 if (infinite_cost_p (cost))
3102 BITMAP_FREE (depends_on);
3103 return;
3106 if (data->consider_all_candidates)
3108 use->cost_map[cand->id].cand = cand;
3109 use->cost_map[cand->id].cost = cost;
3110 use->cost_map[cand->id].depends_on = depends_on;
3111 use->cost_map[cand->id].value = value;
3112 use->cost_map[cand->id].comp = comp;
3113 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3114 return;
3117 /* n_map_members is a power of two, so this computes modulo. */
3118 s = cand->id & (use->n_map_members - 1);
3119 for (i = s; i < use->n_map_members; i++)
3120 if (!use->cost_map[i].cand)
3121 goto found;
3122 for (i = 0; i < s; i++)
3123 if (!use->cost_map[i].cand)
3124 goto found;
3126 gcc_unreachable ();
3128 found:
3129 use->cost_map[i].cand = cand;
3130 use->cost_map[i].cost = cost;
3131 use->cost_map[i].depends_on = depends_on;
3132 use->cost_map[i].value = value;
3133 use->cost_map[i].comp = comp;
3134 use->cost_map[i].inv_expr_id = inv_expr_id;
3137 /* Gets cost of (USE, CANDIDATE) pair. */
3139 static struct cost_pair *
3140 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3141 struct iv_cand *cand)
3143 unsigned i, s;
3144 struct cost_pair *ret;
3146 if (!cand)
3147 return NULL;
3149 if (data->consider_all_candidates)
3151 ret = use->cost_map + cand->id;
3152 if (!ret->cand)
3153 return NULL;
3155 return ret;
3158 /* n_map_members is a power of two, so this computes modulo. */
3159 s = cand->id & (use->n_map_members - 1);
3160 for (i = s; i < use->n_map_members; i++)
3161 if (use->cost_map[i].cand == cand)
3162 return use->cost_map + i;
3163 else if (use->cost_map[i].cand == NULL)
3164 return NULL;
3165 for (i = 0; i < s; i++)
3166 if (use->cost_map[i].cand == cand)
3167 return use->cost_map + i;
3168 else if (use->cost_map[i].cand == NULL)
3169 return NULL;
3171 return NULL;
3174 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3175 static rtx
3176 produce_memory_decl_rtl (tree obj, int *regno)
3178 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3179 machine_mode address_mode = targetm.addr_space.address_mode (as);
3180 rtx x;
3182 gcc_assert (obj);
3183 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3185 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3186 x = gen_rtx_SYMBOL_REF (address_mode, name);
3187 SET_SYMBOL_REF_DECL (x, obj);
3188 x = gen_rtx_MEM (DECL_MODE (obj), x);
3189 set_mem_addr_space (x, as);
3190 targetm.encode_section_info (obj, x, true);
3192 else
3194 x = gen_raw_REG (address_mode, (*regno)++);
3195 x = gen_rtx_MEM (DECL_MODE (obj), x);
3196 set_mem_addr_space (x, as);
3199 return x;
3202 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3203 walk_tree. DATA contains the actual fake register number. */
3205 static tree
3206 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3208 tree obj = NULL_TREE;
3209 rtx x = NULL_RTX;
3210 int *regno = (int *) data;
3212 switch (TREE_CODE (*expr_p))
3214 case ADDR_EXPR:
3215 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3216 handled_component_p (*expr_p);
3217 expr_p = &TREE_OPERAND (*expr_p, 0))
3218 continue;
3219 obj = *expr_p;
3220 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3221 x = produce_memory_decl_rtl (obj, regno);
3222 break;
3224 case SSA_NAME:
3225 *ws = 0;
3226 obj = SSA_NAME_VAR (*expr_p);
3227 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3228 if (!obj)
3229 return NULL_TREE;
3230 if (!DECL_RTL_SET_P (obj))
3231 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3232 break;
3234 case VAR_DECL:
3235 case PARM_DECL:
3236 case RESULT_DECL:
3237 *ws = 0;
3238 obj = *expr_p;
3240 if (DECL_RTL_SET_P (obj))
3241 break;
3243 if (DECL_MODE (obj) == BLKmode)
3244 x = produce_memory_decl_rtl (obj, regno);
3245 else
3246 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3248 break;
3250 default:
3251 break;
3254 if (x)
3256 decl_rtl_to_reset.safe_push (obj);
3257 SET_DECL_RTL (obj, x);
3260 return NULL_TREE;
3263 /* Determines cost of the computation of EXPR. */
3265 static unsigned
3266 computation_cost (tree expr, bool speed)
3268 rtx_insn *seq;
3269 rtx rslt;
3270 tree type = TREE_TYPE (expr);
3271 unsigned cost;
3272 /* Avoid using hard regs in ways which may be unsupported. */
3273 int regno = LAST_VIRTUAL_REGISTER + 1;
3274 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3275 enum node_frequency real_frequency = node->frequency;
3277 node->frequency = NODE_FREQUENCY_NORMAL;
3278 crtl->maybe_hot_insn_p = speed;
3279 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3280 start_sequence ();
3281 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3282 seq = get_insns ();
3283 end_sequence ();
3284 default_rtl_profile ();
3285 node->frequency = real_frequency;
3287 cost = seq_cost (seq, speed);
3288 if (MEM_P (rslt))
3289 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3290 TYPE_ADDR_SPACE (type), speed);
3291 else if (!REG_P (rslt))
3292 cost += set_src_cost (rslt, speed);
3294 return cost;
3297 /* Returns variable containing the value of candidate CAND at statement AT. */
3299 static tree
3300 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3302 if (stmt_after_increment (loop, cand, stmt))
3303 return cand->var_after;
3304 else
3305 return cand->var_before;
3308 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3309 same precision that is at least as wide as the precision of TYPE, stores
3310 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3311 type of A and B. */
3313 static tree
3314 determine_common_wider_type (tree *a, tree *b)
3316 tree wider_type = NULL;
3317 tree suba, subb;
3318 tree atype = TREE_TYPE (*a);
3320 if (CONVERT_EXPR_P (*a))
3322 suba = TREE_OPERAND (*a, 0);
3323 wider_type = TREE_TYPE (suba);
3324 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3325 return atype;
3327 else
3328 return atype;
3330 if (CONVERT_EXPR_P (*b))
3332 subb = TREE_OPERAND (*b, 0);
3333 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3334 return atype;
3336 else
3337 return atype;
3339 *a = suba;
3340 *b = subb;
3341 return wider_type;
3344 /* Determines the expression by that USE is expressed from induction variable
3345 CAND at statement AT in LOOP. The expression is stored in a decomposed
3346 form into AFF. Returns false if USE cannot be expressed using CAND. */
3348 static bool
3349 get_computation_aff (struct loop *loop,
3350 struct iv_use *use, struct iv_cand *cand, gimple at,
3351 struct aff_tree *aff)
3353 tree ubase = use->iv->base;
3354 tree ustep = use->iv->step;
3355 tree cbase = cand->iv->base;
3356 tree cstep = cand->iv->step, cstep_common;
3357 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3358 tree common_type, var;
3359 tree uutype;
3360 aff_tree cbase_aff, var_aff;
3361 widest_int rat;
3363 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3365 /* We do not have a precision to express the values of use. */
3366 return false;
3369 var = var_at_stmt (loop, cand, at);
3370 uutype = unsigned_type_for (utype);
3372 /* If the conversion is not noop, perform it. */
3373 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3375 cstep = fold_convert (uutype, cstep);
3376 cbase = fold_convert (uutype, cbase);
3377 var = fold_convert (uutype, var);
3380 if (!constant_multiple_of (ustep, cstep, &rat))
3381 return false;
3383 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3384 type, we achieve better folding by computing their difference in this
3385 wider type, and cast the result to UUTYPE. We do not need to worry about
3386 overflows, as all the arithmetics will in the end be performed in UUTYPE
3387 anyway. */
3388 common_type = determine_common_wider_type (&ubase, &cbase);
3390 /* use = ubase - ratio * cbase + ratio * var. */
3391 tree_to_aff_combination (ubase, common_type, aff);
3392 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3393 tree_to_aff_combination (var, uutype, &var_aff);
3395 /* We need to shift the value if we are after the increment. */
3396 if (stmt_after_increment (loop, cand, at))
3398 aff_tree cstep_aff;
3400 if (common_type != uutype)
3401 cstep_common = fold_convert (common_type, cstep);
3402 else
3403 cstep_common = cstep;
3405 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3406 aff_combination_add (&cbase_aff, &cstep_aff);
3409 aff_combination_scale (&cbase_aff, -rat);
3410 aff_combination_add (aff, &cbase_aff);
3411 if (common_type != uutype)
3412 aff_combination_convert (aff, uutype);
3414 aff_combination_scale (&var_aff, rat);
3415 aff_combination_add (aff, &var_aff);
3417 return true;
3420 /* Return the type of USE. */
3422 static tree
3423 get_use_type (struct iv_use *use)
3425 tree base_type = TREE_TYPE (use->iv->base);
3426 tree type;
3428 if (use->type == USE_ADDRESS)
3430 /* The base_type may be a void pointer. Create a pointer type based on
3431 the mem_ref instead. */
3432 type = build_pointer_type (TREE_TYPE (*use->op_p));
3433 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3434 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3436 else
3437 type = base_type;
3439 return type;
3442 /* Determines the expression by that USE is expressed from induction variable
3443 CAND at statement AT in LOOP. The computation is unshared. */
3445 static tree
3446 get_computation_at (struct loop *loop,
3447 struct iv_use *use, struct iv_cand *cand, gimple at)
3449 aff_tree aff;
3450 tree type = get_use_type (use);
3452 if (!get_computation_aff (loop, use, cand, at, &aff))
3453 return NULL_TREE;
3454 unshare_aff_combination (&aff);
3455 return fold_convert (type, aff_combination_to_tree (&aff));
3458 /* Determines the expression by that USE is expressed from induction variable
3459 CAND in LOOP. The computation is unshared. */
3461 static tree
3462 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3464 return get_computation_at (loop, use, cand, use->stmt);
3467 /* Adjust the cost COST for being in loop setup rather than loop body.
3468 If we're optimizing for space, the loop setup overhead is constant;
3469 if we're optimizing for speed, amortize it over the per-iteration cost. */
3470 static unsigned
3471 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3473 if (cost == INFTY)
3474 return cost;
3475 else if (optimize_loop_for_speed_p (data->current_loop))
3476 return cost / avg_loop_niter (data->current_loop);
3477 else
3478 return cost;
3481 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3482 validity for a memory reference accessing memory of mode MODE in
3483 address space AS. */
3486 bool
3487 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3488 addr_space_t as)
3490 #define MAX_RATIO 128
3491 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3492 static vec<sbitmap> valid_mult_list;
3493 sbitmap valid_mult;
3495 if (data_index >= valid_mult_list.length ())
3496 valid_mult_list.safe_grow_cleared (data_index + 1);
3498 valid_mult = valid_mult_list[data_index];
3499 if (!valid_mult)
3501 machine_mode address_mode = targetm.addr_space.address_mode (as);
3502 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3503 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3504 rtx addr, scaled;
3505 HOST_WIDE_INT i;
3507 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3508 bitmap_clear (valid_mult);
3509 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3510 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3511 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3513 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3514 if (memory_address_addr_space_p (mode, addr, as)
3515 || memory_address_addr_space_p (mode, scaled, as))
3516 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (dump_file, " allowed multipliers:");
3522 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3523 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3524 fprintf (dump_file, " %d", (int) i);
3525 fprintf (dump_file, "\n");
3526 fprintf (dump_file, "\n");
3529 valid_mult_list[data_index] = valid_mult;
3532 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3533 return false;
3535 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3538 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3539 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3540 variable is omitted. Compute the cost for a memory reference that accesses
3541 a memory location of mode MEM_MODE in address space AS.
3543 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3544 size of MEM_MODE / RATIO) is available. To make this determination, we
3545 look at the size of the increment to be made, which is given in CSTEP.
3546 CSTEP may be zero if the step is unknown.
3547 STMT_AFTER_INC is true iff the statement we're looking at is after the
3548 increment of the original biv.
3550 TODO -- there must be some better way. This all is quite crude. */
3552 enum ainc_type
3554 AINC_PRE_INC, /* Pre increment. */
3555 AINC_PRE_DEC, /* Pre decrement. */
3556 AINC_POST_INC, /* Post increment. */
3557 AINC_POST_DEC, /* Post decrement. */
3558 AINC_NONE /* Also the number of auto increment types. */
3561 typedef struct address_cost_data_s
3563 HOST_WIDE_INT min_offset, max_offset;
3564 unsigned costs[2][2][2][2];
3565 unsigned ainc_costs[AINC_NONE];
3566 } *address_cost_data;
3569 static comp_cost
3570 get_address_cost (bool symbol_present, bool var_present,
3571 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3572 HOST_WIDE_INT cstep, machine_mode mem_mode,
3573 addr_space_t as, bool speed,
3574 bool stmt_after_inc, bool *may_autoinc)
3576 machine_mode address_mode = targetm.addr_space.address_mode (as);
3577 static vec<address_cost_data> address_cost_data_list;
3578 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3579 address_cost_data data;
3580 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3581 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3582 unsigned cost, acost, complexity;
3583 enum ainc_type autoinc_type;
3584 bool offset_p, ratio_p, autoinc;
3585 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3586 unsigned HOST_WIDE_INT mask;
3587 unsigned bits;
3589 if (data_index >= address_cost_data_list.length ())
3590 address_cost_data_list.safe_grow_cleared (data_index + 1);
3592 data = address_cost_data_list[data_index];
3593 if (!data)
3595 HOST_WIDE_INT i;
3596 HOST_WIDE_INT rat, off = 0;
3597 int old_cse_not_expected, width;
3598 unsigned sym_p, var_p, off_p, rat_p, add_c;
3599 rtx_insn *seq;
3600 rtx addr, base;
3601 rtx reg0, reg1;
3603 data = (address_cost_data) xcalloc (1, sizeof (*data));
3605 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3607 width = GET_MODE_BITSIZE (address_mode) - 1;
3608 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3609 width = HOST_BITS_PER_WIDE_INT - 1;
3610 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3612 for (i = width; i >= 0; i--)
3614 off = -((unsigned HOST_WIDE_INT) 1 << i);
3615 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3616 if (memory_address_addr_space_p (mem_mode, addr, as))
3617 break;
3619 data->min_offset = (i == -1? 0 : off);
3621 for (i = width; i >= 0; i--)
3623 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3624 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3625 if (memory_address_addr_space_p (mem_mode, addr, as))
3626 break;
3627 /* For some strict-alignment targets, the offset must be naturally
3628 aligned. Try an aligned offset if mem_mode is not QImode. */
3629 off = mem_mode != QImode
3630 ? ((unsigned HOST_WIDE_INT) 1 << i)
3631 - GET_MODE_SIZE (mem_mode)
3632 : 0;
3633 if (off > 0)
3635 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3636 if (memory_address_addr_space_p (mem_mode, addr, as))
3637 break;
3640 if (i == -1)
3641 off = 0;
3642 data->max_offset = off;
3644 if (dump_file && (dump_flags & TDF_DETAILS))
3646 fprintf (dump_file, "get_address_cost:\n");
3647 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3648 GET_MODE_NAME (mem_mode),
3649 data->min_offset);
3650 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3651 GET_MODE_NAME (mem_mode),
3652 data->max_offset);
3655 rat = 1;
3656 for (i = 2; i <= MAX_RATIO; i++)
3657 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3659 rat = i;
3660 break;
3663 /* Compute the cost of various addressing modes. */
3664 acost = 0;
3665 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3666 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3668 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3669 || USE_STORE_PRE_DECREMENT (mem_mode))
3671 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3672 has_predec[mem_mode]
3673 = memory_address_addr_space_p (mem_mode, addr, as);
3675 if (has_predec[mem_mode])
3676 data->ainc_costs[AINC_PRE_DEC]
3677 = address_cost (addr, mem_mode, as, speed);
3679 if (USE_LOAD_POST_DECREMENT (mem_mode)
3680 || USE_STORE_POST_DECREMENT (mem_mode))
3682 addr = gen_rtx_POST_DEC (address_mode, reg0);
3683 has_postdec[mem_mode]
3684 = memory_address_addr_space_p (mem_mode, addr, as);
3686 if (has_postdec[mem_mode])
3687 data->ainc_costs[AINC_POST_DEC]
3688 = address_cost (addr, mem_mode, as, speed);
3690 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3691 || USE_STORE_PRE_DECREMENT (mem_mode))
3693 addr = gen_rtx_PRE_INC (address_mode, reg0);
3694 has_preinc[mem_mode]
3695 = memory_address_addr_space_p (mem_mode, addr, as);
3697 if (has_preinc[mem_mode])
3698 data->ainc_costs[AINC_PRE_INC]
3699 = address_cost (addr, mem_mode, as, speed);
3701 if (USE_LOAD_POST_INCREMENT (mem_mode)
3702 || USE_STORE_POST_INCREMENT (mem_mode))
3704 addr = gen_rtx_POST_INC (address_mode, reg0);
3705 has_postinc[mem_mode]
3706 = memory_address_addr_space_p (mem_mode, addr, as);
3708 if (has_postinc[mem_mode])
3709 data->ainc_costs[AINC_POST_INC]
3710 = address_cost (addr, mem_mode, as, speed);
3712 for (i = 0; i < 16; i++)
3714 sym_p = i & 1;
3715 var_p = (i >> 1) & 1;
3716 off_p = (i >> 2) & 1;
3717 rat_p = (i >> 3) & 1;
3719 addr = reg0;
3720 if (rat_p)
3721 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3722 gen_int_mode (rat, address_mode));
3724 if (var_p)
3725 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3727 if (sym_p)
3729 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3730 /* ??? We can run into trouble with some backends by presenting
3731 it with symbols which haven't been properly passed through
3732 targetm.encode_section_info. By setting the local bit, we
3733 enhance the probability of things working. */
3734 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3736 if (off_p)
3737 base = gen_rtx_fmt_e (CONST, address_mode,
3738 gen_rtx_fmt_ee
3739 (PLUS, address_mode, base,
3740 gen_int_mode (off, address_mode)));
3742 else if (off_p)
3743 base = gen_int_mode (off, address_mode);
3744 else
3745 base = NULL_RTX;
3747 if (base)
3748 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3750 start_sequence ();
3751 /* To avoid splitting addressing modes, pretend that no cse will
3752 follow. */
3753 old_cse_not_expected = cse_not_expected;
3754 cse_not_expected = true;
3755 addr = memory_address_addr_space (mem_mode, addr, as);
3756 cse_not_expected = old_cse_not_expected;
3757 seq = get_insns ();
3758 end_sequence ();
3760 acost = seq_cost (seq, speed);
3761 acost += address_cost (addr, mem_mode, as, speed);
3763 if (!acost)
3764 acost = 1;
3765 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3768 /* On some targets, it is quite expensive to load symbol to a register,
3769 which makes addresses that contain symbols look much more expensive.
3770 However, the symbol will have to be loaded in any case before the
3771 loop (and quite likely we have it in register already), so it does not
3772 make much sense to penalize them too heavily. So make some final
3773 tweaks for the SYMBOL_PRESENT modes:
3775 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3776 var is cheaper, use this mode with small penalty.
3777 If VAR_PRESENT is true, try whether the mode with
3778 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3779 if this is the case, use it. */
3780 add_c = add_cost (speed, address_mode);
3781 for (i = 0; i < 8; i++)
3783 var_p = i & 1;
3784 off_p = (i >> 1) & 1;
3785 rat_p = (i >> 2) & 1;
3787 acost = data->costs[0][1][off_p][rat_p] + 1;
3788 if (var_p)
3789 acost += add_c;
3791 if (acost < data->costs[1][var_p][off_p][rat_p])
3792 data->costs[1][var_p][off_p][rat_p] = acost;
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, "Address costs:\n");
3799 for (i = 0; i < 16; i++)
3801 sym_p = i & 1;
3802 var_p = (i >> 1) & 1;
3803 off_p = (i >> 2) & 1;
3804 rat_p = (i >> 3) & 1;
3806 fprintf (dump_file, " ");
3807 if (sym_p)
3808 fprintf (dump_file, "sym + ");
3809 if (var_p)
3810 fprintf (dump_file, "var + ");
3811 if (off_p)
3812 fprintf (dump_file, "cst + ");
3813 if (rat_p)
3814 fprintf (dump_file, "rat * ");
3816 acost = data->costs[sym_p][var_p][off_p][rat_p];
3817 fprintf (dump_file, "index costs %d\n", acost);
3819 if (has_predec[mem_mode] || has_postdec[mem_mode]
3820 || has_preinc[mem_mode] || has_postinc[mem_mode])
3821 fprintf (dump_file, " May include autoinc/dec\n");
3822 fprintf (dump_file, "\n");
3825 address_cost_data_list[data_index] = data;
3828 bits = GET_MODE_BITSIZE (address_mode);
3829 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3830 offset &= mask;
3831 if ((offset >> (bits - 1) & 1))
3832 offset |= ~mask;
3833 s_offset = offset;
3835 autoinc = false;
3836 autoinc_type = AINC_NONE;
3837 msize = GET_MODE_SIZE (mem_mode);
3838 autoinc_offset = offset;
3839 if (stmt_after_inc)
3840 autoinc_offset += ratio * cstep;
3841 if (symbol_present || var_present || ratio != 1)
3842 autoinc = false;
3843 else
3845 if (has_postinc[mem_mode] && autoinc_offset == 0
3846 && msize == cstep)
3847 autoinc_type = AINC_POST_INC;
3848 else if (has_postdec[mem_mode] && autoinc_offset == 0
3849 && msize == -cstep)
3850 autoinc_type = AINC_POST_DEC;
3851 else if (has_preinc[mem_mode] && autoinc_offset == msize
3852 && msize == cstep)
3853 autoinc_type = AINC_PRE_INC;
3854 else if (has_predec[mem_mode] && autoinc_offset == -msize
3855 && msize == -cstep)
3856 autoinc_type = AINC_PRE_DEC;
3858 if (autoinc_type != AINC_NONE)
3859 autoinc = true;
3862 cost = 0;
3863 offset_p = (s_offset != 0
3864 && data->min_offset <= s_offset
3865 && s_offset <= data->max_offset);
3866 ratio_p = (ratio != 1
3867 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3869 if (ratio != 1 && !ratio_p)
3870 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3872 if (s_offset && !offset_p && !symbol_present)
3873 cost += add_cost (speed, address_mode);
3875 if (may_autoinc)
3876 *may_autoinc = autoinc;
3877 if (autoinc)
3878 acost = data->ainc_costs[autoinc_type];
3879 else
3880 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3881 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3882 return new_cost (cost + acost, complexity);
3885 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3886 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3887 calculating the operands of EXPR. Returns true if successful, and returns
3888 the cost in COST. */
3890 static bool
3891 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3892 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3894 comp_cost res;
3895 tree op1 = TREE_OPERAND (expr, 1);
3896 tree cst = TREE_OPERAND (mult, 1);
3897 tree multop = TREE_OPERAND (mult, 0);
3898 int m = exact_log2 (int_cst_value (cst));
3899 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3900 int as_cost, sa_cost;
3901 bool mult_in_op1;
3903 if (!(m >= 0 && m < maxm))
3904 return false;
3906 mult_in_op1 = operand_equal_p (op1, mult, 0);
3908 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3910 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3911 use that in preference to a shift insn followed by an add insn. */
3912 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3913 ? shiftadd_cost (speed, mode, m)
3914 : (mult_in_op1
3915 ? shiftsub1_cost (speed, mode, m)
3916 : shiftsub0_cost (speed, mode, m)));
3918 res = new_cost (MIN (as_cost, sa_cost), 0);
3919 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3921 STRIP_NOPS (multop);
3922 if (!is_gimple_val (multop))
3923 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3925 *cost = res;
3926 return true;
3929 /* Estimates cost of forcing expression EXPR into a variable. */
3931 static comp_cost
3932 force_expr_to_var_cost (tree expr, bool speed)
3934 static bool costs_initialized = false;
3935 static unsigned integer_cost [2];
3936 static unsigned symbol_cost [2];
3937 static unsigned address_cost [2];
3938 tree op0, op1;
3939 comp_cost cost0, cost1, cost;
3940 machine_mode mode;
3942 if (!costs_initialized)
3944 tree type = build_pointer_type (integer_type_node);
3945 tree var, addr;
3946 rtx x;
3947 int i;
3949 var = create_tmp_var_raw (integer_type_node, "test_var");
3950 TREE_STATIC (var) = 1;
3951 x = produce_memory_decl_rtl (var, NULL);
3952 SET_DECL_RTL (var, x);
3954 addr = build1 (ADDR_EXPR, type, var);
3957 for (i = 0; i < 2; i++)
3959 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3960 2000), i);
3962 symbol_cost[i] = computation_cost (addr, i) + 1;
3964 address_cost[i]
3965 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3966 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3969 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3970 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3971 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3972 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3973 fprintf (dump_file, "\n");
3977 costs_initialized = true;
3980 STRIP_NOPS (expr);
3982 if (SSA_VAR_P (expr))
3983 return no_cost;
3985 if (is_gimple_min_invariant (expr))
3987 if (TREE_CODE (expr) == INTEGER_CST)
3988 return new_cost (integer_cost [speed], 0);
3990 if (TREE_CODE (expr) == ADDR_EXPR)
3992 tree obj = TREE_OPERAND (expr, 0);
3994 if (TREE_CODE (obj) == VAR_DECL
3995 || TREE_CODE (obj) == PARM_DECL
3996 || TREE_CODE (obj) == RESULT_DECL)
3997 return new_cost (symbol_cost [speed], 0);
4000 return new_cost (address_cost [speed], 0);
4003 switch (TREE_CODE (expr))
4005 case POINTER_PLUS_EXPR:
4006 case PLUS_EXPR:
4007 case MINUS_EXPR:
4008 case MULT_EXPR:
4009 op0 = TREE_OPERAND (expr, 0);
4010 op1 = TREE_OPERAND (expr, 1);
4011 STRIP_NOPS (op0);
4012 STRIP_NOPS (op1);
4013 break;
4015 CASE_CONVERT:
4016 case NEGATE_EXPR:
4017 op0 = TREE_OPERAND (expr, 0);
4018 STRIP_NOPS (op0);
4019 op1 = NULL_TREE;
4020 break;
4022 default:
4023 /* Just an arbitrary value, FIXME. */
4024 return new_cost (target_spill_cost[speed], 0);
4027 if (op0 == NULL_TREE
4028 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4029 cost0 = no_cost;
4030 else
4031 cost0 = force_expr_to_var_cost (op0, speed);
4033 if (op1 == NULL_TREE
4034 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4035 cost1 = no_cost;
4036 else
4037 cost1 = force_expr_to_var_cost (op1, speed);
4039 mode = TYPE_MODE (TREE_TYPE (expr));
4040 switch (TREE_CODE (expr))
4042 case POINTER_PLUS_EXPR:
4043 case PLUS_EXPR:
4044 case MINUS_EXPR:
4045 case NEGATE_EXPR:
4046 cost = new_cost (add_cost (speed, mode), 0);
4047 if (TREE_CODE (expr) != NEGATE_EXPR)
4049 tree mult = NULL_TREE;
4050 comp_cost sa_cost;
4051 if (TREE_CODE (op1) == MULT_EXPR)
4052 mult = op1;
4053 else if (TREE_CODE (op0) == MULT_EXPR)
4054 mult = op0;
4056 if (mult != NULL_TREE
4057 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4058 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4059 speed, &sa_cost))
4060 return sa_cost;
4062 break;
4064 CASE_CONVERT:
4066 tree inner_mode, outer_mode;
4067 outer_mode = TREE_TYPE (expr);
4068 inner_mode = TREE_TYPE (op0);
4069 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4070 TYPE_MODE (inner_mode), speed), 0);
4072 break;
4074 case MULT_EXPR:
4075 if (cst_and_fits_in_hwi (op0))
4076 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4077 mode, speed), 0);
4078 else if (cst_and_fits_in_hwi (op1))
4079 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4080 mode, speed), 0);
4081 else
4082 return new_cost (target_spill_cost [speed], 0);
4083 break;
4085 default:
4086 gcc_unreachable ();
4089 cost = add_costs (cost, cost0);
4090 cost = add_costs (cost, cost1);
4092 /* Bound the cost by target_spill_cost. The parts of complicated
4093 computations often are either loop invariant or at least can
4094 be shared between several iv uses, so letting this grow without
4095 limits would not give reasonable results. */
4096 if (cost.cost > (int) target_spill_cost [speed])
4097 cost.cost = target_spill_cost [speed];
4099 return cost;
4102 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4103 invariants the computation depends on. */
4105 static comp_cost
4106 force_var_cost (struct ivopts_data *data,
4107 tree expr, bitmap *depends_on)
4109 if (depends_on)
4111 fd_ivopts_data = data;
4112 walk_tree (&expr, find_depends, depends_on, NULL);
4115 return force_expr_to_var_cost (expr, data->speed);
4118 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4119 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4120 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4121 invariants the computation depends on. */
4123 static comp_cost
4124 split_address_cost (struct ivopts_data *data,
4125 tree addr, bool *symbol_present, bool *var_present,
4126 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4128 tree core;
4129 HOST_WIDE_INT bitsize;
4130 HOST_WIDE_INT bitpos;
4131 tree toffset;
4132 machine_mode mode;
4133 int unsignedp, volatilep;
4135 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4136 &unsignedp, &volatilep, false);
4138 if (toffset != 0
4139 || bitpos % BITS_PER_UNIT != 0
4140 || TREE_CODE (core) != VAR_DECL)
4142 *symbol_present = false;
4143 *var_present = true;
4144 fd_ivopts_data = data;
4145 walk_tree (&addr, find_depends, depends_on, NULL);
4146 return new_cost (target_spill_cost[data->speed], 0);
4149 *offset += bitpos / BITS_PER_UNIT;
4150 if (TREE_STATIC (core)
4151 || DECL_EXTERNAL (core))
4153 *symbol_present = true;
4154 *var_present = false;
4155 return no_cost;
4158 *symbol_present = false;
4159 *var_present = true;
4160 return no_cost;
4163 /* Estimates cost of expressing difference of addresses E1 - E2 as
4164 var + symbol + offset. The value of offset is added to OFFSET,
4165 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4166 part is missing. DEPENDS_ON is a set of the invariants the computation
4167 depends on. */
4169 static comp_cost
4170 ptr_difference_cost (struct ivopts_data *data,
4171 tree e1, tree e2, bool *symbol_present, bool *var_present,
4172 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4174 HOST_WIDE_INT diff = 0;
4175 aff_tree aff_e1, aff_e2;
4176 tree type;
4178 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4180 if (ptr_difference_const (e1, e2, &diff))
4182 *offset += diff;
4183 *symbol_present = false;
4184 *var_present = false;
4185 return no_cost;
4188 if (integer_zerop (e2))
4189 return split_address_cost (data, TREE_OPERAND (e1, 0),
4190 symbol_present, var_present, offset, depends_on);
4192 *symbol_present = false;
4193 *var_present = true;
4195 type = signed_type_for (TREE_TYPE (e1));
4196 tree_to_aff_combination (e1, type, &aff_e1);
4197 tree_to_aff_combination (e2, type, &aff_e2);
4198 aff_combination_scale (&aff_e2, -1);
4199 aff_combination_add (&aff_e1, &aff_e2);
4201 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4204 /* Estimates cost of expressing difference E1 - E2 as
4205 var + symbol + offset. The value of offset is added to OFFSET,
4206 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4207 part is missing. DEPENDS_ON is a set of the invariants the computation
4208 depends on. */
4210 static comp_cost
4211 difference_cost (struct ivopts_data *data,
4212 tree e1, tree e2, bool *symbol_present, bool *var_present,
4213 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4215 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4216 unsigned HOST_WIDE_INT off1, off2;
4217 aff_tree aff_e1, aff_e2;
4218 tree type;
4220 e1 = strip_offset (e1, &off1);
4221 e2 = strip_offset (e2, &off2);
4222 *offset += off1 - off2;
4224 STRIP_NOPS (e1);
4225 STRIP_NOPS (e2);
4227 if (TREE_CODE (e1) == ADDR_EXPR)
4228 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4229 offset, depends_on);
4230 *symbol_present = false;
4232 if (operand_equal_p (e1, e2, 0))
4234 *var_present = false;
4235 return no_cost;
4238 *var_present = true;
4240 if (integer_zerop (e2))
4241 return force_var_cost (data, e1, depends_on);
4243 if (integer_zerop (e1))
4245 comp_cost cost = force_var_cost (data, e2, depends_on);
4246 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4247 return cost;
4250 type = signed_type_for (TREE_TYPE (e1));
4251 tree_to_aff_combination (e1, type, &aff_e1);
4252 tree_to_aff_combination (e2, type, &aff_e2);
4253 aff_combination_scale (&aff_e2, -1);
4254 aff_combination_add (&aff_e1, &aff_e2);
4256 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4259 /* Returns true if AFF1 and AFF2 are identical. */
4261 static bool
4262 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4264 unsigned i;
4266 if (aff1->n != aff2->n)
4267 return false;
4269 for (i = 0; i < aff1->n; i++)
4271 if (aff1->elts[i].coef != aff2->elts[i].coef)
4272 return false;
4274 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4275 return false;
4277 return true;
4280 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4282 static int
4283 get_expr_id (struct ivopts_data *data, tree expr)
4285 struct iv_inv_expr_ent ent;
4286 struct iv_inv_expr_ent **slot;
4288 ent.expr = expr;
4289 ent.hash = iterative_hash_expr (expr, 0);
4290 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4291 if (*slot)
4292 return (*slot)->id;
4294 *slot = XNEW (struct iv_inv_expr_ent);
4295 (*slot)->expr = expr;
4296 (*slot)->hash = ent.hash;
4297 (*slot)->id = data->inv_expr_id++;
4298 return (*slot)->id;
4301 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4302 requires a new compiler generated temporary. Returns -1 otherwise.
4303 ADDRESS_P is a flag indicating if the expression is for address
4304 computation. */
4306 static int
4307 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4308 tree cbase, HOST_WIDE_INT ratio,
4309 bool address_p)
4311 aff_tree ubase_aff, cbase_aff;
4312 tree expr, ub, cb;
4314 STRIP_NOPS (ubase);
4315 STRIP_NOPS (cbase);
4316 ub = ubase;
4317 cb = cbase;
4319 if ((TREE_CODE (ubase) == INTEGER_CST)
4320 && (TREE_CODE (cbase) == INTEGER_CST))
4321 return -1;
4323 /* Strips the constant part. */
4324 if (TREE_CODE (ubase) == PLUS_EXPR
4325 || TREE_CODE (ubase) == MINUS_EXPR
4326 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4328 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4329 ubase = TREE_OPERAND (ubase, 0);
4332 /* Strips the constant part. */
4333 if (TREE_CODE (cbase) == PLUS_EXPR
4334 || TREE_CODE (cbase) == MINUS_EXPR
4335 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4337 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4338 cbase = TREE_OPERAND (cbase, 0);
4341 if (address_p)
4343 if (((TREE_CODE (ubase) == SSA_NAME)
4344 || (TREE_CODE (ubase) == ADDR_EXPR
4345 && is_gimple_min_invariant (ubase)))
4346 && (TREE_CODE (cbase) == INTEGER_CST))
4347 return -1;
4349 if (((TREE_CODE (cbase) == SSA_NAME)
4350 || (TREE_CODE (cbase) == ADDR_EXPR
4351 && is_gimple_min_invariant (cbase)))
4352 && (TREE_CODE (ubase) == INTEGER_CST))
4353 return -1;
4356 if (ratio == 1)
4358 if (operand_equal_p (ubase, cbase, 0))
4359 return -1;
4361 if (TREE_CODE (ubase) == ADDR_EXPR
4362 && TREE_CODE (cbase) == ADDR_EXPR)
4364 tree usym, csym;
4366 usym = TREE_OPERAND (ubase, 0);
4367 csym = TREE_OPERAND (cbase, 0);
4368 if (TREE_CODE (usym) == ARRAY_REF)
4370 tree ind = TREE_OPERAND (usym, 1);
4371 if (TREE_CODE (ind) == INTEGER_CST
4372 && tree_fits_shwi_p (ind)
4373 && tree_to_shwi (ind) == 0)
4374 usym = TREE_OPERAND (usym, 0);
4376 if (TREE_CODE (csym) == ARRAY_REF)
4378 tree ind = TREE_OPERAND (csym, 1);
4379 if (TREE_CODE (ind) == INTEGER_CST
4380 && tree_fits_shwi_p (ind)
4381 && tree_to_shwi (ind) == 0)
4382 csym = TREE_OPERAND (csym, 0);
4384 if (operand_equal_p (usym, csym, 0))
4385 return -1;
4387 /* Now do more complex comparison */
4388 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4389 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4390 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4391 return -1;
4394 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4395 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4397 aff_combination_scale (&cbase_aff, -1 * ratio);
4398 aff_combination_add (&ubase_aff, &cbase_aff);
4399 expr = aff_combination_to_tree (&ubase_aff);
4400 return get_expr_id (data, expr);
4405 /* Determines the cost of the computation by that USE is expressed
4406 from induction variable CAND. If ADDRESS_P is true, we just need
4407 to create an address from it, otherwise we want to get it into
4408 register. A set of invariants we depend on is stored in
4409 DEPENDS_ON. AT is the statement at that the value is computed.
4410 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4411 addressing is likely. */
4413 static comp_cost
4414 get_computation_cost_at (struct ivopts_data *data,
4415 struct iv_use *use, struct iv_cand *cand,
4416 bool address_p, bitmap *depends_on, gimple at,
4417 bool *can_autoinc,
4418 int *inv_expr_id)
4420 tree ubase = use->iv->base, ustep = use->iv->step;
4421 tree cbase, cstep;
4422 tree utype = TREE_TYPE (ubase), ctype;
4423 unsigned HOST_WIDE_INT cstepi, offset = 0;
4424 HOST_WIDE_INT ratio, aratio;
4425 bool var_present, symbol_present, stmt_is_after_inc;
4426 comp_cost cost;
4427 widest_int rat;
4428 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4429 machine_mode mem_mode = (address_p
4430 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4431 : VOIDmode);
4433 *depends_on = NULL;
4435 /* Only consider real candidates. */
4436 if (!cand->iv)
4437 return infinite_cost;
4439 cbase = cand->iv->base;
4440 cstep = cand->iv->step;
4441 ctype = TREE_TYPE (cbase);
4443 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4445 /* We do not have a precision to express the values of use. */
4446 return infinite_cost;
4449 if (address_p
4450 || (use->iv->base_object
4451 && cand->iv->base_object
4452 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4453 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4455 /* Do not try to express address of an object with computation based
4456 on address of a different object. This may cause problems in rtl
4457 level alias analysis (that does not expect this to be happening,
4458 as this is illegal in C), and would be unlikely to be useful
4459 anyway. */
4460 if (use->iv->base_object
4461 && cand->iv->base_object
4462 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4463 return infinite_cost;
4466 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4468 /* TODO -- add direct handling of this case. */
4469 goto fallback;
4472 /* CSTEPI is removed from the offset in case statement is after the
4473 increment. If the step is not constant, we use zero instead.
4474 This is a bit imprecise (there is the extra addition), but
4475 redundancy elimination is likely to transform the code so that
4476 it uses value of the variable before increment anyway,
4477 so it is not that much unrealistic. */
4478 if (cst_and_fits_in_hwi (cstep))
4479 cstepi = int_cst_value (cstep);
4480 else
4481 cstepi = 0;
4483 if (!constant_multiple_of (ustep, cstep, &rat))
4484 return infinite_cost;
4486 if (wi::fits_shwi_p (rat))
4487 ratio = rat.to_shwi ();
4488 else
4489 return infinite_cost;
4491 STRIP_NOPS (cbase);
4492 ctype = TREE_TYPE (cbase);
4494 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4496 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4497 or ratio == 1, it is better to handle this like
4499 ubase - ratio * cbase + ratio * var
4501 (also holds in the case ratio == -1, TODO. */
4503 if (cst_and_fits_in_hwi (cbase))
4505 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4506 cost = difference_cost (data,
4507 ubase, build_int_cst (utype, 0),
4508 &symbol_present, &var_present, &offset,
4509 depends_on);
4510 cost.cost /= avg_loop_niter (data->current_loop);
4512 else if (ratio == 1)
4514 tree real_cbase = cbase;
4516 /* Check to see if any adjustment is needed. */
4517 if (cstepi == 0 && stmt_is_after_inc)
4519 aff_tree real_cbase_aff;
4520 aff_tree cstep_aff;
4522 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4523 &real_cbase_aff);
4524 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4526 aff_combination_add (&real_cbase_aff, &cstep_aff);
4527 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4530 cost = difference_cost (data,
4531 ubase, real_cbase,
4532 &symbol_present, &var_present, &offset,
4533 depends_on);
4534 cost.cost /= avg_loop_niter (data->current_loop);
4536 else if (address_p
4537 && !POINTER_TYPE_P (ctype)
4538 && multiplier_allowed_in_address_p
4539 (ratio, mem_mode,
4540 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4542 cbase
4543 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4544 cost = difference_cost (data,
4545 ubase, cbase,
4546 &symbol_present, &var_present, &offset,
4547 depends_on);
4548 cost.cost /= avg_loop_niter (data->current_loop);
4550 else
4552 cost = force_var_cost (data, cbase, depends_on);
4553 cost = add_costs (cost,
4554 difference_cost (data,
4555 ubase, build_int_cst (utype, 0),
4556 &symbol_present, &var_present,
4557 &offset, depends_on));
4558 cost.cost /= avg_loop_niter (data->current_loop);
4559 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4562 /* Set of invariants depended on by sub use has already been computed
4563 for the first use in the group. */
4564 if (use->sub_id)
4566 cost.cost = 0;
4567 if (depends_on && *depends_on)
4568 bitmap_clear (*depends_on);
4570 else if (inv_expr_id)
4572 *inv_expr_id =
4573 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4574 /* Clear depends on. */
4575 if (*inv_expr_id != -1 && depends_on && *depends_on)
4576 bitmap_clear (*depends_on);
4579 /* If we are after the increment, the value of the candidate is higher by
4580 one iteration. */
4581 if (stmt_is_after_inc)
4582 offset -= ratio * cstepi;
4584 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4585 (symbol/var1/const parts may be omitted). If we are looking for an
4586 address, find the cost of addressing this. */
4587 if (address_p)
4588 return add_costs (cost,
4589 get_address_cost (symbol_present, var_present,
4590 offset, ratio, cstepi,
4591 mem_mode,
4592 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4593 speed, stmt_is_after_inc,
4594 can_autoinc));
4596 /* Otherwise estimate the costs for computing the expression. */
4597 if (!symbol_present && !var_present && !offset)
4599 if (ratio != 1)
4600 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4601 return cost;
4604 /* Symbol + offset should be compile-time computable so consider that they
4605 are added once to the variable, if present. */
4606 if (var_present && (symbol_present || offset))
4607 cost.cost += adjust_setup_cost (data,
4608 add_cost (speed, TYPE_MODE (ctype)));
4610 /* Having offset does not affect runtime cost in case it is added to
4611 symbol, but it increases complexity. */
4612 if (offset)
4613 cost.complexity++;
4615 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4617 aratio = ratio > 0 ? ratio : -ratio;
4618 if (aratio != 1)
4619 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4620 return cost;
4622 fallback:
4623 if (can_autoinc)
4624 *can_autoinc = false;
4627 /* Just get the expression, expand it and measure the cost. */
4628 tree comp = get_computation_at (data->current_loop, use, cand, at);
4630 if (!comp)
4631 return infinite_cost;
4633 if (address_p)
4634 comp = build_simple_mem_ref (comp);
4636 return new_cost (computation_cost (comp, speed), 0);
4640 /* Determines the cost of the computation by that USE is expressed
4641 from induction variable CAND. If ADDRESS_P is true, we just need
4642 to create an address from it, otherwise we want to get it into
4643 register. A set of invariants we depend on is stored in
4644 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4645 autoinc addressing is likely. */
4647 static comp_cost
4648 get_computation_cost (struct ivopts_data *data,
4649 struct iv_use *use, struct iv_cand *cand,
4650 bool address_p, bitmap *depends_on,
4651 bool *can_autoinc, int *inv_expr_id)
4653 return get_computation_cost_at (data,
4654 use, cand, address_p, depends_on, use->stmt,
4655 can_autoinc, inv_expr_id);
4658 /* Determines cost of basing replacement of USE on CAND in a generic
4659 expression. */
4661 static bool
4662 determine_use_iv_cost_generic (struct ivopts_data *data,
4663 struct iv_use *use, struct iv_cand *cand)
4665 bitmap depends_on;
4666 comp_cost cost;
4667 int inv_expr_id = -1;
4669 /* The simple case first -- if we need to express value of the preserved
4670 original biv, the cost is 0. This also prevents us from counting the
4671 cost of increment twice -- once at this use and once in the cost of
4672 the candidate. */
4673 if (cand->pos == IP_ORIGINAL
4674 && cand->incremented_at == use->stmt)
4676 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4677 ERROR_MARK, -1);
4678 return true;
4681 cost = get_computation_cost (data, use, cand, false, &depends_on,
4682 NULL, &inv_expr_id);
4684 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4685 inv_expr_id);
4687 return !infinite_cost_p (cost);
4690 /* Determines cost of basing replacement of USE on CAND in an address. */
4692 static bool
4693 determine_use_iv_cost_address (struct ivopts_data *data,
4694 struct iv_use *use, struct iv_cand *cand)
4696 bitmap depends_on;
4697 bool can_autoinc;
4698 int inv_expr_id = -1;
4699 struct iv_use *sub_use;
4700 comp_cost sub_cost;
4701 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4702 &can_autoinc, &inv_expr_id);
4704 if (cand->ainc_use == use)
4706 if (can_autoinc)
4707 cost.cost -= cand->cost_step;
4708 /* If we generated the candidate solely for exploiting autoincrement
4709 opportunities, and it turns out it can't be used, set the cost to
4710 infinity to make sure we ignore it. */
4711 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4712 cost = infinite_cost;
4714 for (sub_use = use->next;
4715 sub_use && !infinite_cost_p (cost);
4716 sub_use = sub_use->next)
4718 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4719 &can_autoinc, &inv_expr_id);
4720 cost = add_costs (cost, sub_cost);
4723 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4724 inv_expr_id);
4726 return !infinite_cost_p (cost);
4729 /* Computes value of candidate CAND at position AT in iteration NITER, and
4730 stores it to VAL. */
4732 static void
4733 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4734 aff_tree *val)
4736 aff_tree step, delta, nit;
4737 struct iv *iv = cand->iv;
4738 tree type = TREE_TYPE (iv->base);
4739 tree steptype = type;
4740 if (POINTER_TYPE_P (type))
4741 steptype = sizetype;
4742 steptype = unsigned_type_for (type);
4744 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4745 aff_combination_convert (&step, steptype);
4746 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4747 aff_combination_convert (&nit, steptype);
4748 aff_combination_mult (&nit, &step, &delta);
4749 if (stmt_after_increment (loop, cand, at))
4750 aff_combination_add (&delta, &step);
4752 tree_to_aff_combination (iv->base, type, val);
4753 if (!POINTER_TYPE_P (type))
4754 aff_combination_convert (val, steptype);
4755 aff_combination_add (val, &delta);
4758 /* Returns period of induction variable iv. */
4760 static tree
4761 iv_period (struct iv *iv)
4763 tree step = iv->step, period, type;
4764 tree pow2div;
4766 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4768 type = unsigned_type_for (TREE_TYPE (step));
4769 /* Period of the iv is lcm (step, type_range)/step -1,
4770 i.e., N*type_range/step - 1. Since type range is power
4771 of two, N == (step >> num_of_ending_zeros_binary (step),
4772 so the final result is
4774 (type_range >> num_of_ending_zeros_binary (step)) - 1
4777 pow2div = num_ending_zeros (step);
4779 period = build_low_bits_mask (type,
4780 (TYPE_PRECISION (type)
4781 - tree_to_uhwi (pow2div)));
4783 return period;
4786 /* Returns the comparison operator used when eliminating the iv USE. */
4788 static enum tree_code
4789 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4791 struct loop *loop = data->current_loop;
4792 basic_block ex_bb;
4793 edge exit;
4795 ex_bb = gimple_bb (use->stmt);
4796 exit = EDGE_SUCC (ex_bb, 0);
4797 if (flow_bb_inside_loop_p (loop, exit->dest))
4798 exit = EDGE_SUCC (ex_bb, 1);
4800 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4803 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4804 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4805 calculation is performed in non-wrapping type.
4807 TODO: More generally, we could test for the situation that
4808 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4809 This would require knowing the sign of OFFSET. */
4811 static bool
4812 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4814 enum tree_code code;
4815 tree e1, e2;
4816 aff_tree aff_e1, aff_e2, aff_offset;
4818 if (!nowrap_type_p (TREE_TYPE (base)))
4819 return false;
4821 base = expand_simple_operations (base);
4823 if (TREE_CODE (base) == SSA_NAME)
4825 gimple stmt = SSA_NAME_DEF_STMT (base);
4827 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4828 return false;
4830 code = gimple_assign_rhs_code (stmt);
4831 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4832 return false;
4834 e1 = gimple_assign_rhs1 (stmt);
4835 e2 = gimple_assign_rhs2 (stmt);
4837 else
4839 code = TREE_CODE (base);
4840 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4841 return false;
4842 e1 = TREE_OPERAND (base, 0);
4843 e2 = TREE_OPERAND (base, 1);
4846 /* Use affine expansion as deeper inspection to prove the equality. */
4847 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4848 &aff_e2, &data->name_expansion_cache);
4849 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4850 &aff_offset, &data->name_expansion_cache);
4851 aff_combination_scale (&aff_offset, -1);
4852 switch (code)
4854 case PLUS_EXPR:
4855 aff_combination_add (&aff_e2, &aff_offset);
4856 if (aff_combination_zero_p (&aff_e2))
4857 return true;
4859 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4860 &aff_e1, &data->name_expansion_cache);
4861 aff_combination_add (&aff_e1, &aff_offset);
4862 return aff_combination_zero_p (&aff_e1);
4864 case POINTER_PLUS_EXPR:
4865 aff_combination_add (&aff_e2, &aff_offset);
4866 return aff_combination_zero_p (&aff_e2);
4868 default:
4869 return false;
4873 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4874 comparison with CAND. NITER describes the number of iterations of
4875 the loops. If successful, the comparison in COMP_P is altered accordingly.
4877 We aim to handle the following situation:
4879 sometype *base, *p;
4880 int a, b, i;
4882 i = a;
4883 p = p_0 = base + a;
4887 bla (*p);
4888 p++;
4889 i++;
4891 while (i < b);
4893 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4894 We aim to optimize this to
4896 p = p_0 = base + a;
4899 bla (*p);
4900 p++;
4902 while (p < p_0 - a + b);
4904 This preserves the correctness, since the pointer arithmetics does not
4905 overflow. More precisely:
4907 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4908 overflow in computing it or the values of p.
4909 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4910 overflow. To prove this, we use the fact that p_0 = base + a. */
4912 static bool
4913 iv_elimination_compare_lt (struct ivopts_data *data,
4914 struct iv_cand *cand, enum tree_code *comp_p,
4915 struct tree_niter_desc *niter)
4917 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4918 struct aff_tree nit, tmpa, tmpb;
4919 enum tree_code comp;
4920 HOST_WIDE_INT step;
4922 /* We need to know that the candidate induction variable does not overflow.
4923 While more complex analysis may be used to prove this, for now just
4924 check that the variable appears in the original program and that it
4925 is computed in a type that guarantees no overflows. */
4926 cand_type = TREE_TYPE (cand->iv->base);
4927 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4928 return false;
4930 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4931 the calculation of the BOUND could overflow, making the comparison
4932 invalid. */
4933 if (!data->loop_single_exit_p)
4934 return false;
4936 /* We need to be able to decide whether candidate is increasing or decreasing
4937 in order to choose the right comparison operator. */
4938 if (!cst_and_fits_in_hwi (cand->iv->step))
4939 return false;
4940 step = int_cst_value (cand->iv->step);
4942 /* Check that the number of iterations matches the expected pattern:
4943 a + 1 > b ? 0 : b - a - 1. */
4944 mbz = niter->may_be_zero;
4945 if (TREE_CODE (mbz) == GT_EXPR)
4947 /* Handle a + 1 > b. */
4948 tree op0 = TREE_OPERAND (mbz, 0);
4949 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4951 a = TREE_OPERAND (op0, 0);
4952 b = TREE_OPERAND (mbz, 1);
4954 else
4955 return false;
4957 else if (TREE_CODE (mbz) == LT_EXPR)
4959 tree op1 = TREE_OPERAND (mbz, 1);
4961 /* Handle b < a + 1. */
4962 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4964 a = TREE_OPERAND (op1, 0);
4965 b = TREE_OPERAND (mbz, 0);
4967 else
4968 return false;
4970 else
4971 return false;
4973 /* Expected number of iterations is B - A - 1. Check that it matches
4974 the actual number, i.e., that B - A - NITER = 1. */
4975 tree_to_aff_combination (niter->niter, nit_type, &nit);
4976 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4977 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4978 aff_combination_scale (&nit, -1);
4979 aff_combination_scale (&tmpa, -1);
4980 aff_combination_add (&tmpb, &tmpa);
4981 aff_combination_add (&tmpb, &nit);
4982 if (tmpb.n != 0 || tmpb.offset != 1)
4983 return false;
4985 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4986 overflow. */
4987 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4988 cand->iv->step,
4989 fold_convert (TREE_TYPE (cand->iv->step), a));
4990 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4991 return false;
4993 /* Determine the new comparison operator. */
4994 comp = step < 0 ? GT_EXPR : LT_EXPR;
4995 if (*comp_p == NE_EXPR)
4996 *comp_p = comp;
4997 else if (*comp_p == EQ_EXPR)
4998 *comp_p = invert_tree_comparison (comp, false);
4999 else
5000 gcc_unreachable ();
5002 return true;
5005 /* Check whether it is possible to express the condition in USE by comparison
5006 of candidate CAND. If so, store the value compared with to BOUND, and the
5007 comparison operator to COMP. */
5009 static bool
5010 may_eliminate_iv (struct ivopts_data *data,
5011 struct iv_use *use, struct iv_cand *cand, tree *bound,
5012 enum tree_code *comp)
5014 basic_block ex_bb;
5015 edge exit;
5016 tree period;
5017 struct loop *loop = data->current_loop;
5018 aff_tree bnd;
5019 struct tree_niter_desc *desc = NULL;
5021 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5022 return false;
5024 /* For now works only for exits that dominate the loop latch.
5025 TODO: extend to other conditions inside loop body. */
5026 ex_bb = gimple_bb (use->stmt);
5027 if (use->stmt != last_stmt (ex_bb)
5028 || gimple_code (use->stmt) != GIMPLE_COND
5029 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5030 return false;
5032 exit = EDGE_SUCC (ex_bb, 0);
5033 if (flow_bb_inside_loop_p (loop, exit->dest))
5034 exit = EDGE_SUCC (ex_bb, 1);
5035 if (flow_bb_inside_loop_p (loop, exit->dest))
5036 return false;
5038 desc = niter_for_exit (data, exit);
5039 if (!desc)
5040 return false;
5042 /* Determine whether we can use the variable to test the exit condition.
5043 This is the case iff the period of the induction variable is greater
5044 than the number of iterations for which the exit condition is true. */
5045 period = iv_period (cand->iv);
5047 /* If the number of iterations is constant, compare against it directly. */
5048 if (TREE_CODE (desc->niter) == INTEGER_CST)
5050 /* See cand_value_at. */
5051 if (stmt_after_increment (loop, cand, use->stmt))
5053 if (!tree_int_cst_lt (desc->niter, period))
5054 return false;
5056 else
5058 if (tree_int_cst_lt (period, desc->niter))
5059 return false;
5063 /* If not, and if this is the only possible exit of the loop, see whether
5064 we can get a conservative estimate on the number of iterations of the
5065 entire loop and compare against that instead. */
5066 else
5068 widest_int period_value, max_niter;
5070 max_niter = desc->max;
5071 if (stmt_after_increment (loop, cand, use->stmt))
5072 max_niter += 1;
5073 period_value = wi::to_widest (period);
5074 if (wi::gtu_p (max_niter, period_value))
5076 /* See if we can take advantage of inferred loop bound information. */
5077 if (data->loop_single_exit_p)
5079 if (!max_loop_iterations (loop, &max_niter))
5080 return false;
5081 /* The loop bound is already adjusted by adding 1. */
5082 if (wi::gtu_p (max_niter, period_value))
5083 return false;
5085 else
5086 return false;
5090 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5092 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5093 aff_combination_to_tree (&bnd));
5094 *comp = iv_elimination_compare (data, use);
5096 /* It is unlikely that computing the number of iterations using division
5097 would be more profitable than keeping the original induction variable. */
5098 if (expression_expensive_p (*bound))
5099 return false;
5101 /* Sometimes, it is possible to handle the situation that the number of
5102 iterations may be zero unless additional assumtions by using <
5103 instead of != in the exit condition.
5105 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5106 base the exit condition on it. However, that is often too
5107 expensive. */
5108 if (!integer_zerop (desc->may_be_zero))
5109 return iv_elimination_compare_lt (data, cand, comp, desc);
5111 return true;
5114 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5115 be copied, if is is used in the loop body and DATA->body_includes_call. */
5117 static int
5118 parm_decl_cost (struct ivopts_data *data, tree bound)
5120 tree sbound = bound;
5121 STRIP_NOPS (sbound);
5123 if (TREE_CODE (sbound) == SSA_NAME
5124 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5125 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5126 && data->body_includes_call)
5127 return COSTS_N_INSNS (1);
5129 return 0;
5132 /* Determines cost of basing replacement of USE on CAND in a condition. */
5134 static bool
5135 determine_use_iv_cost_condition (struct ivopts_data *data,
5136 struct iv_use *use, struct iv_cand *cand)
5138 tree bound = NULL_TREE;
5139 struct iv *cmp_iv;
5140 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5141 comp_cost elim_cost, express_cost, cost, bound_cost;
5142 bool ok;
5143 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5144 tree *control_var, *bound_cst;
5145 enum tree_code comp = ERROR_MARK;
5147 /* Only consider real candidates. */
5148 if (!cand->iv)
5150 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5151 ERROR_MARK, -1);
5152 return false;
5155 /* Try iv elimination. */
5156 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5158 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5159 if (elim_cost.cost == 0)
5160 elim_cost.cost = parm_decl_cost (data, bound);
5161 else if (TREE_CODE (bound) == INTEGER_CST)
5162 elim_cost.cost = 0;
5163 /* If we replace a loop condition 'i < n' with 'p < base + n',
5164 depends_on_elim will have 'base' and 'n' set, which implies
5165 that both 'base' and 'n' will be live during the loop. More likely,
5166 'base + n' will be loop invariant, resulting in only one live value
5167 during the loop. So in that case we clear depends_on_elim and set
5168 elim_inv_expr_id instead. */
5169 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5171 elim_inv_expr_id = get_expr_id (data, bound);
5172 bitmap_clear (depends_on_elim);
5174 /* The bound is a loop invariant, so it will be only computed
5175 once. */
5176 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5178 else
5179 elim_cost = infinite_cost;
5181 /* Try expressing the original giv. If it is compared with an invariant,
5182 note that we cannot get rid of it. */
5183 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5184 NULL, &cmp_iv);
5185 gcc_assert (ok);
5187 /* When the condition is a comparison of the candidate IV against
5188 zero, prefer this IV.
5190 TODO: The constant that we're subtracting from the cost should
5191 be target-dependent. This information should be added to the
5192 target costs for each backend. */
5193 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5194 && integer_zerop (*bound_cst)
5195 && (operand_equal_p (*control_var, cand->var_after, 0)
5196 || operand_equal_p (*control_var, cand->var_before, 0)))
5197 elim_cost.cost -= 1;
5199 express_cost = get_computation_cost (data, use, cand, false,
5200 &depends_on_express, NULL,
5201 &express_inv_expr_id);
5202 fd_ivopts_data = data;
5203 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5205 /* Count the cost of the original bound as well. */
5206 bound_cost = force_var_cost (data, *bound_cst, NULL);
5207 if (bound_cost.cost == 0)
5208 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5209 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5210 bound_cost.cost = 0;
5211 express_cost.cost += bound_cost.cost;
5213 /* Choose the better approach, preferring the eliminated IV. */
5214 if (compare_costs (elim_cost, express_cost) <= 0)
5216 cost = elim_cost;
5217 depends_on = depends_on_elim;
5218 depends_on_elim = NULL;
5219 inv_expr_id = elim_inv_expr_id;
5221 else
5223 cost = express_cost;
5224 depends_on = depends_on_express;
5225 depends_on_express = NULL;
5226 bound = NULL_TREE;
5227 comp = ERROR_MARK;
5228 inv_expr_id = express_inv_expr_id;
5231 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5233 if (depends_on_elim)
5234 BITMAP_FREE (depends_on_elim);
5235 if (depends_on_express)
5236 BITMAP_FREE (depends_on_express);
5238 return !infinite_cost_p (cost);
5241 /* Determines cost of basing replacement of USE on CAND. Returns false
5242 if USE cannot be based on CAND. */
5244 static bool
5245 determine_use_iv_cost (struct ivopts_data *data,
5246 struct iv_use *use, struct iv_cand *cand)
5248 switch (use->type)
5250 case USE_NONLINEAR_EXPR:
5251 return determine_use_iv_cost_generic (data, use, cand);
5253 case USE_ADDRESS:
5254 return determine_use_iv_cost_address (data, use, cand);
5256 case USE_COMPARE:
5257 return determine_use_iv_cost_condition (data, use, cand);
5259 default:
5260 gcc_unreachable ();
5264 /* Return true if get_computation_cost indicates that autoincrement is
5265 a possibility for the pair of USE and CAND, false otherwise. */
5267 static bool
5268 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5269 struct iv_cand *cand)
5271 bitmap depends_on;
5272 bool can_autoinc;
5273 comp_cost cost;
5275 if (use->type != USE_ADDRESS)
5276 return false;
5278 cost = get_computation_cost (data, use, cand, true, &depends_on,
5279 &can_autoinc, NULL);
5281 BITMAP_FREE (depends_on);
5283 return !infinite_cost_p (cost) && can_autoinc;
5286 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5287 use that allows autoincrement, and set their AINC_USE if possible. */
5289 static void
5290 set_autoinc_for_original_candidates (struct ivopts_data *data)
5292 unsigned i, j;
5294 for (i = 0; i < n_iv_cands (data); i++)
5296 struct iv_cand *cand = iv_cand (data, i);
5297 struct iv_use *closest_before = NULL;
5298 struct iv_use *closest_after = NULL;
5299 if (cand->pos != IP_ORIGINAL)
5300 continue;
5302 for (j = 0; j < n_iv_uses (data); j++)
5304 struct iv_use *use = iv_use (data, j);
5305 unsigned uid = gimple_uid (use->stmt);
5307 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5308 continue;
5310 if (uid < gimple_uid (cand->incremented_at)
5311 && (closest_before == NULL
5312 || uid > gimple_uid (closest_before->stmt)))
5313 closest_before = use;
5315 if (uid > gimple_uid (cand->incremented_at)
5316 && (closest_after == NULL
5317 || uid < gimple_uid (closest_after->stmt)))
5318 closest_after = use;
5321 if (closest_before != NULL
5322 && autoinc_possible_for_pair (data, closest_before, cand))
5323 cand->ainc_use = closest_before;
5324 else if (closest_after != NULL
5325 && autoinc_possible_for_pair (data, closest_after, cand))
5326 cand->ainc_use = closest_after;
5330 /* Finds the candidates for the induction variables. */
5332 static void
5333 find_iv_candidates (struct ivopts_data *data)
5335 /* Add commonly used ivs. */
5336 add_standard_iv_candidates (data);
5338 /* Add old induction variables. */
5339 add_old_ivs_candidates (data);
5341 /* Add induction variables derived from uses. */
5342 add_derived_ivs_candidates (data);
5344 set_autoinc_for_original_candidates (data);
5346 /* Record the important candidates. */
5347 record_important_candidates (data);
5350 /* Determines costs of basing the use of the iv on an iv candidate. */
5352 static void
5353 determine_use_iv_costs (struct ivopts_data *data)
5355 unsigned i, j;
5356 struct iv_use *use;
5357 struct iv_cand *cand;
5358 bitmap to_clear = BITMAP_ALLOC (NULL);
5360 alloc_use_cost_map (data);
5362 for (i = 0; i < n_iv_uses (data); i++)
5364 use = iv_use (data, i);
5366 if (data->consider_all_candidates)
5368 for (j = 0; j < n_iv_cands (data); j++)
5370 cand = iv_cand (data, j);
5371 determine_use_iv_cost (data, use, cand);
5374 else
5376 bitmap_iterator bi;
5378 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5380 cand = iv_cand (data, j);
5381 if (!determine_use_iv_cost (data, use, cand))
5382 bitmap_set_bit (to_clear, j);
5385 /* Remove the candidates for that the cost is infinite from
5386 the list of related candidates. */
5387 bitmap_and_compl_into (use->related_cands, to_clear);
5388 bitmap_clear (to_clear);
5392 BITMAP_FREE (to_clear);
5394 if (dump_file && (dump_flags & TDF_DETAILS))
5396 fprintf (dump_file, "Use-candidate costs:\n");
5398 for (i = 0; i < n_iv_uses (data); i++)
5400 use = iv_use (data, i);
5402 fprintf (dump_file, "Use %d:\n", i);
5403 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5404 for (j = 0; j < use->n_map_members; j++)
5406 if (!use->cost_map[j].cand
5407 || infinite_cost_p (use->cost_map[j].cost))
5408 continue;
5410 fprintf (dump_file, " %d\t%d\t%d\t",
5411 use->cost_map[j].cand->id,
5412 use->cost_map[j].cost.cost,
5413 use->cost_map[j].cost.complexity);
5414 if (use->cost_map[j].depends_on)
5415 bitmap_print (dump_file,
5416 use->cost_map[j].depends_on, "","");
5417 if (use->cost_map[j].inv_expr_id != -1)
5418 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5419 fprintf (dump_file, "\n");
5422 fprintf (dump_file, "\n");
5424 fprintf (dump_file, "\n");
5428 /* Determines cost of the candidate CAND. */
5430 static void
5431 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5433 comp_cost cost_base;
5434 unsigned cost, cost_step;
5435 tree base;
5437 if (!cand->iv)
5439 cand->cost = 0;
5440 return;
5443 /* There are two costs associated with the candidate -- its increment
5444 and its initialization. The second is almost negligible for any loop
5445 that rolls enough, so we take it just very little into account. */
5447 base = cand->iv->base;
5448 cost_base = force_var_cost (data, base, NULL);
5449 /* It will be exceptional that the iv register happens to be initialized with
5450 the proper value at no cost. In general, there will at least be a regcopy
5451 or a const set. */
5452 if (cost_base.cost == 0)
5453 cost_base.cost = COSTS_N_INSNS (1);
5454 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5456 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5458 /* Prefer the original ivs unless we may gain something by replacing it.
5459 The reason is to make debugging simpler; so this is not relevant for
5460 artificial ivs created by other optimization passes. */
5461 if (cand->pos != IP_ORIGINAL
5462 || !SSA_NAME_VAR (cand->var_before)
5463 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5464 cost++;
5466 /* Prefer not to insert statements into latch unless there are some
5467 already (so that we do not create unnecessary jumps). */
5468 if (cand->pos == IP_END
5469 && empty_block_p (ip_end_pos (data->current_loop)))
5470 cost++;
5472 cand->cost = cost;
5473 cand->cost_step = cost_step;
5476 /* Determines costs of computation of the candidates. */
5478 static void
5479 determine_iv_costs (struct ivopts_data *data)
5481 unsigned i;
5483 if (dump_file && (dump_flags & TDF_DETAILS))
5485 fprintf (dump_file, "Candidate costs:\n");
5486 fprintf (dump_file, " cand\tcost\n");
5489 for (i = 0; i < n_iv_cands (data); i++)
5491 struct iv_cand *cand = iv_cand (data, i);
5493 determine_iv_cost (data, cand);
5495 if (dump_file && (dump_flags & TDF_DETAILS))
5496 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5499 if (dump_file && (dump_flags & TDF_DETAILS))
5500 fprintf (dump_file, "\n");
5503 /* Calculates cost for having SIZE induction variables. */
5505 static unsigned
5506 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5508 /* We add size to the cost, so that we prefer eliminating ivs
5509 if possible. */
5510 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5511 data->body_includes_call);
5514 /* For each size of the induction variable set determine the penalty. */
5516 static void
5517 determine_set_costs (struct ivopts_data *data)
5519 unsigned j, n;
5520 gphi *phi;
5521 gphi_iterator psi;
5522 tree op;
5523 struct loop *loop = data->current_loop;
5524 bitmap_iterator bi;
5526 if (dump_file && (dump_flags & TDF_DETAILS))
5528 fprintf (dump_file, "Global costs:\n");
5529 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5530 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5531 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5532 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5535 n = 0;
5536 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5538 phi = psi.phi ();
5539 op = PHI_RESULT (phi);
5541 if (virtual_operand_p (op))
5542 continue;
5544 if (get_iv (data, op))
5545 continue;
5547 n++;
5550 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5552 struct version_info *info = ver_info (data, j);
5554 if (info->inv_id && info->has_nonlin_use)
5555 n++;
5558 data->regs_used = n;
5559 if (dump_file && (dump_flags & TDF_DETAILS))
5560 fprintf (dump_file, " regs_used %d\n", n);
5562 if (dump_file && (dump_flags & TDF_DETAILS))
5564 fprintf (dump_file, " cost for size:\n");
5565 fprintf (dump_file, " ivs\tcost\n");
5566 for (j = 0; j <= 2 * target_avail_regs; j++)
5567 fprintf (dump_file, " %d\t%d\n", j,
5568 ivopts_global_cost_for_size (data, j));
5569 fprintf (dump_file, "\n");
5573 /* Returns true if A is a cheaper cost pair than B. */
5575 static bool
5576 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5578 int cmp;
5580 if (!a)
5581 return false;
5583 if (!b)
5584 return true;
5586 cmp = compare_costs (a->cost, b->cost);
5587 if (cmp < 0)
5588 return true;
5590 if (cmp > 0)
5591 return false;
5593 /* In case the costs are the same, prefer the cheaper candidate. */
5594 if (a->cand->cost < b->cand->cost)
5595 return true;
5597 return false;
5601 /* Returns candidate by that USE is expressed in IVS. */
5603 static struct cost_pair *
5604 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5606 return ivs->cand_for_use[use->id];
5609 /* Computes the cost field of IVS structure. */
5611 static void
5612 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5614 comp_cost cost = ivs->cand_use_cost;
5616 cost.cost += ivs->cand_cost;
5618 cost.cost += ivopts_global_cost_for_size (data,
5619 ivs->n_regs + ivs->num_used_inv_expr);
5621 ivs->cost = cost;
5624 /* Remove invariants in set INVS to set IVS. */
5626 static void
5627 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5629 bitmap_iterator bi;
5630 unsigned iid;
5632 if (!invs)
5633 return;
5635 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5637 ivs->n_invariant_uses[iid]--;
5638 if (ivs->n_invariant_uses[iid] == 0)
5639 ivs->n_regs--;
5643 /* Set USE not to be expressed by any candidate in IVS. */
5645 static void
5646 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5647 struct iv_use *use)
5649 unsigned uid = use->id, cid;
5650 struct cost_pair *cp;
5652 cp = ivs->cand_for_use[uid];
5653 if (!cp)
5654 return;
5655 cid = cp->cand->id;
5657 ivs->bad_uses++;
5658 ivs->cand_for_use[uid] = NULL;
5659 ivs->n_cand_uses[cid]--;
5661 if (ivs->n_cand_uses[cid] == 0)
5663 bitmap_clear_bit (ivs->cands, cid);
5664 /* Do not count the pseudocandidates. */
5665 if (cp->cand->iv)
5666 ivs->n_regs--;
5667 ivs->n_cands--;
5668 ivs->cand_cost -= cp->cand->cost;
5670 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5673 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5675 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5677 if (cp->inv_expr_id != -1)
5679 ivs->used_inv_expr[cp->inv_expr_id]--;
5680 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5681 ivs->num_used_inv_expr--;
5683 iv_ca_recount_cost (data, ivs);
5686 /* Add invariants in set INVS to set IVS. */
5688 static void
5689 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5691 bitmap_iterator bi;
5692 unsigned iid;
5694 if (!invs)
5695 return;
5697 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5699 ivs->n_invariant_uses[iid]++;
5700 if (ivs->n_invariant_uses[iid] == 1)
5701 ivs->n_regs++;
5705 /* Set cost pair for USE in set IVS to CP. */
5707 static void
5708 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5709 struct iv_use *use, struct cost_pair *cp)
5711 unsigned uid = use->id, cid;
5713 if (ivs->cand_for_use[uid] == cp)
5714 return;
5716 if (ivs->cand_for_use[uid])
5717 iv_ca_set_no_cp (data, ivs, use);
5719 if (cp)
5721 cid = cp->cand->id;
5723 ivs->bad_uses--;
5724 ivs->cand_for_use[uid] = cp;
5725 ivs->n_cand_uses[cid]++;
5726 if (ivs->n_cand_uses[cid] == 1)
5728 bitmap_set_bit (ivs->cands, cid);
5729 /* Do not count the pseudocandidates. */
5730 if (cp->cand->iv)
5731 ivs->n_regs++;
5732 ivs->n_cands++;
5733 ivs->cand_cost += cp->cand->cost;
5735 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5738 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5739 iv_ca_set_add_invariants (ivs, cp->depends_on);
5741 if (cp->inv_expr_id != -1)
5743 ivs->used_inv_expr[cp->inv_expr_id]++;
5744 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5745 ivs->num_used_inv_expr++;
5747 iv_ca_recount_cost (data, ivs);
5751 /* Extend set IVS by expressing USE by some of the candidates in it
5752 if possible. Consider all important candidates if candidates in
5753 set IVS don't give any result. */
5755 static void
5756 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5757 struct iv_use *use)
5759 struct cost_pair *best_cp = NULL, *cp;
5760 bitmap_iterator bi;
5761 unsigned i;
5762 struct iv_cand *cand;
5764 gcc_assert (ivs->upto >= use->id);
5765 ivs->upto++;
5766 ivs->bad_uses++;
5768 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5770 cand = iv_cand (data, i);
5771 cp = get_use_iv_cost (data, use, cand);
5772 if (cheaper_cost_pair (cp, best_cp))
5773 best_cp = cp;
5776 if (best_cp == NULL)
5778 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5780 cand = iv_cand (data, i);
5781 cp = get_use_iv_cost (data, use, cand);
5782 if (cheaper_cost_pair (cp, best_cp))
5783 best_cp = cp;
5787 iv_ca_set_cp (data, ivs, use, best_cp);
5790 /* Get cost for assignment IVS. */
5792 static comp_cost
5793 iv_ca_cost (struct iv_ca *ivs)
5795 /* This was a conditional expression but it triggered a bug in
5796 Sun C 5.5. */
5797 if (ivs->bad_uses)
5798 return infinite_cost;
5799 else
5800 return ivs->cost;
5803 /* Returns true if all dependences of CP are among invariants in IVS. */
5805 static bool
5806 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5808 unsigned i;
5809 bitmap_iterator bi;
5811 if (!cp->depends_on)
5812 return true;
5814 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5816 if (ivs->n_invariant_uses[i] == 0)
5817 return false;
5820 return true;
5823 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5824 it before NEXT_CHANGE. */
5826 static struct iv_ca_delta *
5827 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5828 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5830 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5832 change->use = use;
5833 change->old_cp = old_cp;
5834 change->new_cp = new_cp;
5835 change->next_change = next_change;
5837 return change;
5840 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5841 are rewritten. */
5843 static struct iv_ca_delta *
5844 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5846 struct iv_ca_delta *last;
5848 if (!l2)
5849 return l1;
5851 if (!l1)
5852 return l2;
5854 for (last = l1; last->next_change; last = last->next_change)
5855 continue;
5856 last->next_change = l2;
5858 return l1;
5861 /* Reverse the list of changes DELTA, forming the inverse to it. */
5863 static struct iv_ca_delta *
5864 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5866 struct iv_ca_delta *act, *next, *prev = NULL;
5868 for (act = delta; act; act = next)
5870 next = act->next_change;
5871 act->next_change = prev;
5872 prev = act;
5874 std::swap (act->old_cp, act->new_cp);
5877 return prev;
5880 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5881 reverted instead. */
5883 static void
5884 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5885 struct iv_ca_delta *delta, bool forward)
5887 struct cost_pair *from, *to;
5888 struct iv_ca_delta *act;
5890 if (!forward)
5891 delta = iv_ca_delta_reverse (delta);
5893 for (act = delta; act; act = act->next_change)
5895 from = act->old_cp;
5896 to = act->new_cp;
5897 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5898 iv_ca_set_cp (data, ivs, act->use, to);
5901 if (!forward)
5902 iv_ca_delta_reverse (delta);
5905 /* Returns true if CAND is used in IVS. */
5907 static bool
5908 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5910 return ivs->n_cand_uses[cand->id] > 0;
5913 /* Returns number of induction variable candidates in the set IVS. */
5915 static unsigned
5916 iv_ca_n_cands (struct iv_ca *ivs)
5918 return ivs->n_cands;
5921 /* Free the list of changes DELTA. */
5923 static void
5924 iv_ca_delta_free (struct iv_ca_delta **delta)
5926 struct iv_ca_delta *act, *next;
5928 for (act = *delta; act; act = next)
5930 next = act->next_change;
5931 free (act);
5934 *delta = NULL;
5937 /* Allocates new iv candidates assignment. */
5939 static struct iv_ca *
5940 iv_ca_new (struct ivopts_data *data)
5942 struct iv_ca *nw = XNEW (struct iv_ca);
5944 nw->upto = 0;
5945 nw->bad_uses = 0;
5946 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5947 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5948 nw->cands = BITMAP_ALLOC (NULL);
5949 nw->n_cands = 0;
5950 nw->n_regs = 0;
5951 nw->cand_use_cost = no_cost;
5952 nw->cand_cost = 0;
5953 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5954 nw->cost = no_cost;
5955 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5956 nw->num_used_inv_expr = 0;
5958 return nw;
5961 /* Free memory occupied by the set IVS. */
5963 static void
5964 iv_ca_free (struct iv_ca **ivs)
5966 free ((*ivs)->cand_for_use);
5967 free ((*ivs)->n_cand_uses);
5968 BITMAP_FREE ((*ivs)->cands);
5969 free ((*ivs)->n_invariant_uses);
5970 free ((*ivs)->used_inv_expr);
5971 free (*ivs);
5972 *ivs = NULL;
5975 /* Dumps IVS to FILE. */
5977 static void
5978 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5980 const char *pref = " invariants ";
5981 unsigned i;
5982 comp_cost cost = iv_ca_cost (ivs);
5984 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5985 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5986 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5987 bitmap_print (file, ivs->cands, " candidates: ","\n");
5989 for (i = 0; i < ivs->upto; i++)
5991 struct iv_use *use = iv_use (data, i);
5992 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5993 if (cp)
5994 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5995 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5996 else
5997 fprintf (file, " use:%d --> ??\n", use->id);
6000 for (i = 1; i <= data->max_inv_id; i++)
6001 if (ivs->n_invariant_uses[i])
6003 fprintf (file, "%s%d", pref, i);
6004 pref = ", ";
6006 fprintf (file, "\n\n");
6009 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6010 new set, and store differences in DELTA. Number of induction variables
6011 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6012 the function will try to find a solution with mimimal iv candidates. */
6014 static comp_cost
6015 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6016 struct iv_cand *cand, struct iv_ca_delta **delta,
6017 unsigned *n_ivs, bool min_ncand)
6019 unsigned i;
6020 comp_cost cost;
6021 struct iv_use *use;
6022 struct cost_pair *old_cp, *new_cp;
6024 *delta = NULL;
6025 for (i = 0; i < ivs->upto; i++)
6027 use = iv_use (data, i);
6028 old_cp = iv_ca_cand_for_use (ivs, use);
6030 if (old_cp
6031 && old_cp->cand == cand)
6032 continue;
6034 new_cp = get_use_iv_cost (data, use, cand);
6035 if (!new_cp)
6036 continue;
6038 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6039 continue;
6041 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6042 continue;
6044 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6047 iv_ca_delta_commit (data, ivs, *delta, true);
6048 cost = iv_ca_cost (ivs);
6049 if (n_ivs)
6050 *n_ivs = iv_ca_n_cands (ivs);
6051 iv_ca_delta_commit (data, ivs, *delta, false);
6053 return cost;
6056 /* Try narrowing set IVS by removing CAND. Return the cost of
6057 the new set and store the differences in DELTA. START is
6058 the candidate with which we start narrowing. */
6060 static comp_cost
6061 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6062 struct iv_cand *cand, struct iv_cand *start,
6063 struct iv_ca_delta **delta)
6065 unsigned i, ci;
6066 struct iv_use *use;
6067 struct cost_pair *old_cp, *new_cp, *cp;
6068 bitmap_iterator bi;
6069 struct iv_cand *cnd;
6070 comp_cost cost, best_cost, acost;
6072 *delta = NULL;
6073 for (i = 0; i < n_iv_uses (data); i++)
6075 use = iv_use (data, i);
6077 old_cp = iv_ca_cand_for_use (ivs, use);
6078 if (old_cp->cand != cand)
6079 continue;
6081 best_cost = iv_ca_cost (ivs);
6082 /* Start narrowing with START. */
6083 new_cp = get_use_iv_cost (data, use, start);
6085 if (data->consider_all_candidates)
6087 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6089 if (ci == cand->id || (start && ci == start->id))
6090 continue;
6092 cnd = iv_cand (data, ci);
6094 cp = get_use_iv_cost (data, use, cnd);
6095 if (!cp)
6096 continue;
6098 iv_ca_set_cp (data, ivs, use, cp);
6099 acost = iv_ca_cost (ivs);
6101 if (compare_costs (acost, best_cost) < 0)
6103 best_cost = acost;
6104 new_cp = cp;
6108 else
6110 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6112 if (ci == cand->id || (start && ci == start->id))
6113 continue;
6115 cnd = iv_cand (data, ci);
6117 cp = get_use_iv_cost (data, use, cnd);
6118 if (!cp)
6119 continue;
6121 iv_ca_set_cp (data, ivs, use, cp);
6122 acost = iv_ca_cost (ivs);
6124 if (compare_costs (acost, best_cost) < 0)
6126 best_cost = acost;
6127 new_cp = cp;
6131 /* Restore to old cp for use. */
6132 iv_ca_set_cp (data, ivs, use, old_cp);
6134 if (!new_cp)
6136 iv_ca_delta_free (delta);
6137 return infinite_cost;
6140 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6143 iv_ca_delta_commit (data, ivs, *delta, true);
6144 cost = iv_ca_cost (ivs);
6145 iv_ca_delta_commit (data, ivs, *delta, false);
6147 return cost;
6150 /* Try optimizing the set of candidates IVS by removing candidates different
6151 from to EXCEPT_CAND from it. Return cost of the new set, and store
6152 differences in DELTA. */
6154 static comp_cost
6155 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6156 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6158 bitmap_iterator bi;
6159 struct iv_ca_delta *act_delta, *best_delta;
6160 unsigned i;
6161 comp_cost best_cost, acost;
6162 struct iv_cand *cand;
6164 best_delta = NULL;
6165 best_cost = iv_ca_cost (ivs);
6167 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6169 cand = iv_cand (data, i);
6171 if (cand == except_cand)
6172 continue;
6174 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6176 if (compare_costs (acost, best_cost) < 0)
6178 best_cost = acost;
6179 iv_ca_delta_free (&best_delta);
6180 best_delta = act_delta;
6182 else
6183 iv_ca_delta_free (&act_delta);
6186 if (!best_delta)
6188 *delta = NULL;
6189 return best_cost;
6192 /* Recurse to possibly remove other unnecessary ivs. */
6193 iv_ca_delta_commit (data, ivs, best_delta, true);
6194 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6195 iv_ca_delta_commit (data, ivs, best_delta, false);
6196 *delta = iv_ca_delta_join (best_delta, *delta);
6197 return best_cost;
6200 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6201 cheaper local cost for USE than BEST_CP. Return pointer to
6202 the corresponding cost_pair, otherwise just return BEST_CP. */
6204 static struct cost_pair*
6205 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6206 unsigned int cand_idx, struct iv_cand *old_cand,
6207 struct cost_pair *best_cp)
6209 struct iv_cand *cand;
6210 struct cost_pair *cp;
6212 gcc_assert (old_cand != NULL && best_cp != NULL);
6213 if (cand_idx == old_cand->id)
6214 return best_cp;
6216 cand = iv_cand (data, cand_idx);
6217 cp = get_use_iv_cost (data, use, cand);
6218 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6219 return cp;
6221 return best_cp;
6224 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6225 which are used by more than one iv uses. For each of those candidates,
6226 this function tries to represent iv uses under that candidate using
6227 other ones with lower local cost, then tries to prune the new set.
6228 If the new set has lower cost, It returns the new cost after recording
6229 candidate replacement in list DELTA. */
6231 static comp_cost
6232 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6233 struct iv_ca_delta **delta)
6235 bitmap_iterator bi, bj;
6236 unsigned int i, j, k;
6237 struct iv_use *use;
6238 struct iv_cand *cand;
6239 comp_cost orig_cost, acost;
6240 struct iv_ca_delta *act_delta, *tmp_delta;
6241 struct cost_pair *old_cp, *best_cp = NULL;
6243 *delta = NULL;
6244 orig_cost = iv_ca_cost (ivs);
6246 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6248 if (ivs->n_cand_uses[i] == 1
6249 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6250 continue;
6252 cand = iv_cand (data, i);
6254 act_delta = NULL;
6255 /* Represent uses under current candidate using other ones with
6256 lower local cost. */
6257 for (j = 0; j < ivs->upto; j++)
6259 use = iv_use (data, j);
6260 old_cp = iv_ca_cand_for_use (ivs, use);
6262 if (old_cp->cand != cand)
6263 continue;
6265 best_cp = old_cp;
6266 if (data->consider_all_candidates)
6267 for (k = 0; k < n_iv_cands (data); k++)
6268 best_cp = cheaper_cost_with_cand (data, use, k,
6269 old_cp->cand, best_cp);
6270 else
6271 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6272 best_cp = cheaper_cost_with_cand (data, use, k,
6273 old_cp->cand, best_cp);
6275 if (best_cp == old_cp)
6276 continue;
6278 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6280 /* No need for further prune. */
6281 if (!act_delta)
6282 continue;
6284 /* Prune the new candidate set. */
6285 iv_ca_delta_commit (data, ivs, act_delta, true);
6286 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6287 iv_ca_delta_commit (data, ivs, act_delta, false);
6288 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6290 if (compare_costs (acost, orig_cost) < 0)
6292 *delta = act_delta;
6293 return acost;
6295 else
6296 iv_ca_delta_free (&act_delta);
6299 return orig_cost;
6302 /* Tries to extend the sets IVS in the best possible way in order
6303 to express the USE. If ORIGINALP is true, prefer candidates from
6304 the original set of IVs, otherwise favor important candidates not
6305 based on any memory object. */
6307 static bool
6308 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6309 struct iv_use *use, bool originalp)
6311 comp_cost best_cost, act_cost;
6312 unsigned i;
6313 bitmap_iterator bi;
6314 struct iv_cand *cand;
6315 struct iv_ca_delta *best_delta = NULL, *act_delta;
6316 struct cost_pair *cp;
6318 iv_ca_add_use (data, ivs, use);
6319 best_cost = iv_ca_cost (ivs);
6320 cp = iv_ca_cand_for_use (ivs, use);
6321 if (cp)
6323 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6324 iv_ca_set_no_cp (data, ivs, use);
6327 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6328 first try important candidates not based on any memory object. Only if
6329 this fails, try the specific ones. Rationale -- in loops with many
6330 variables the best choice often is to use just one generic biv. If we
6331 added here many ivs specific to the uses, the optimization algorithm later
6332 would be likely to get stuck in a local minimum, thus causing us to create
6333 too many ivs. The approach from few ivs to more seems more likely to be
6334 successful -- starting from few ivs, replacing an expensive use by a
6335 specific iv should always be a win. */
6336 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6338 cand = iv_cand (data, i);
6340 if (originalp && cand->pos !=IP_ORIGINAL)
6341 continue;
6343 if (!originalp && cand->iv->base_object != NULL_TREE)
6344 continue;
6346 if (iv_ca_cand_used_p (ivs, cand))
6347 continue;
6349 cp = get_use_iv_cost (data, use, cand);
6350 if (!cp)
6351 continue;
6353 iv_ca_set_cp (data, ivs, use, cp);
6354 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6355 true);
6356 iv_ca_set_no_cp (data, ivs, use);
6357 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6359 if (compare_costs (act_cost, best_cost) < 0)
6361 best_cost = act_cost;
6363 iv_ca_delta_free (&best_delta);
6364 best_delta = act_delta;
6366 else
6367 iv_ca_delta_free (&act_delta);
6370 if (infinite_cost_p (best_cost))
6372 for (i = 0; i < use->n_map_members; i++)
6374 cp = use->cost_map + i;
6375 cand = cp->cand;
6376 if (!cand)
6377 continue;
6379 /* Already tried this. */
6380 if (cand->important)
6382 if (originalp && cand->pos == IP_ORIGINAL)
6383 continue;
6384 if (!originalp && cand->iv->base_object == NULL_TREE)
6385 continue;
6388 if (iv_ca_cand_used_p (ivs, cand))
6389 continue;
6391 act_delta = NULL;
6392 iv_ca_set_cp (data, ivs, use, cp);
6393 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6394 iv_ca_set_no_cp (data, ivs, use);
6395 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6396 cp, act_delta);
6398 if (compare_costs (act_cost, best_cost) < 0)
6400 best_cost = act_cost;
6402 if (best_delta)
6403 iv_ca_delta_free (&best_delta);
6404 best_delta = act_delta;
6406 else
6407 iv_ca_delta_free (&act_delta);
6411 iv_ca_delta_commit (data, ivs, best_delta, true);
6412 iv_ca_delta_free (&best_delta);
6414 return !infinite_cost_p (best_cost);
6417 /* Finds an initial assignment of candidates to uses. */
6419 static struct iv_ca *
6420 get_initial_solution (struct ivopts_data *data, bool originalp)
6422 struct iv_ca *ivs = iv_ca_new (data);
6423 unsigned i;
6425 for (i = 0; i < n_iv_uses (data); i++)
6426 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6428 iv_ca_free (&ivs);
6429 return NULL;
6432 return ivs;
6435 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6436 points to a bool variable, this function tries to break local
6437 optimal fixed-point by replacing candidates in IVS if it's true. */
6439 static bool
6440 try_improve_iv_set (struct ivopts_data *data,
6441 struct iv_ca *ivs, bool *try_replace_p)
6443 unsigned i, n_ivs;
6444 comp_cost acost, best_cost = iv_ca_cost (ivs);
6445 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6446 struct iv_cand *cand;
6448 /* Try extending the set of induction variables by one. */
6449 for (i = 0; i < n_iv_cands (data); i++)
6451 cand = iv_cand (data, i);
6453 if (iv_ca_cand_used_p (ivs, cand))
6454 continue;
6456 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6457 if (!act_delta)
6458 continue;
6460 /* If we successfully added the candidate and the set is small enough,
6461 try optimizing it by removing other candidates. */
6462 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6464 iv_ca_delta_commit (data, ivs, act_delta, true);
6465 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6466 iv_ca_delta_commit (data, ivs, act_delta, false);
6467 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6470 if (compare_costs (acost, best_cost) < 0)
6472 best_cost = acost;
6473 iv_ca_delta_free (&best_delta);
6474 best_delta = act_delta;
6476 else
6477 iv_ca_delta_free (&act_delta);
6480 if (!best_delta)
6482 /* Try removing the candidates from the set instead. */
6483 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6485 if (!best_delta && *try_replace_p)
6487 *try_replace_p = false;
6488 /* So far candidate selecting algorithm tends to choose fewer IVs
6489 so that it can handle cases in which loops have many variables
6490 but the best choice is often to use only one general biv. One
6491 weakness is it can't handle opposite cases, in which different
6492 candidates should be chosen with respect to each use. To solve
6493 the problem, we replace candidates in a manner described by the
6494 comments of iv_ca_replace, thus give general algorithm a chance
6495 to break local optimal fixed-point in these cases. */
6496 best_cost = iv_ca_replace (data, ivs, &best_delta);
6499 if (!best_delta)
6500 return false;
6503 iv_ca_delta_commit (data, ivs, best_delta, true);
6504 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6505 iv_ca_delta_free (&best_delta);
6506 return true;
6509 /* Attempts to find the optimal set of induction variables. We do simple
6510 greedy heuristic -- we try to replace at most one candidate in the selected
6511 solution and remove the unused ivs while this improves the cost. */
6513 static struct iv_ca *
6514 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6516 struct iv_ca *set;
6517 bool try_replace_p = true;
6519 /* Get the initial solution. */
6520 set = get_initial_solution (data, originalp);
6521 if (!set)
6523 if (dump_file && (dump_flags & TDF_DETAILS))
6524 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6525 return NULL;
6528 if (dump_file && (dump_flags & TDF_DETAILS))
6530 fprintf (dump_file, "Initial set of candidates:\n");
6531 iv_ca_dump (data, dump_file, set);
6534 while (try_improve_iv_set (data, set, &try_replace_p))
6536 if (dump_file && (dump_flags & TDF_DETAILS))
6538 fprintf (dump_file, "Improved to:\n");
6539 iv_ca_dump (data, dump_file, set);
6543 return set;
6546 static struct iv_ca *
6547 find_optimal_iv_set (struct ivopts_data *data)
6549 unsigned i;
6550 struct iv_ca *set, *origset;
6551 struct iv_use *use;
6552 comp_cost cost, origcost;
6554 /* Determine the cost based on a strategy that starts with original IVs,
6555 and try again using a strategy that prefers candidates not based
6556 on any IVs. */
6557 origset = find_optimal_iv_set_1 (data, true);
6558 set = find_optimal_iv_set_1 (data, false);
6560 if (!origset && !set)
6561 return NULL;
6563 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6564 cost = set ? iv_ca_cost (set) : infinite_cost;
6566 if (dump_file && (dump_flags & TDF_DETAILS))
6568 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6569 origcost.cost, origcost.complexity);
6570 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6571 cost.cost, cost.complexity);
6574 /* Choose the one with the best cost. */
6575 if (compare_costs (origcost, cost) <= 0)
6577 if (set)
6578 iv_ca_free (&set);
6579 set = origset;
6581 else if (origset)
6582 iv_ca_free (&origset);
6584 for (i = 0; i < n_iv_uses (data); i++)
6586 use = iv_use (data, i);
6587 use->selected = iv_ca_cand_for_use (set, use)->cand;
6590 return set;
6593 /* Creates a new induction variable corresponding to CAND. */
6595 static void
6596 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6598 gimple_stmt_iterator incr_pos;
6599 tree base;
6600 bool after = false;
6602 if (!cand->iv)
6603 return;
6605 switch (cand->pos)
6607 case IP_NORMAL:
6608 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6609 break;
6611 case IP_END:
6612 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6613 after = true;
6614 break;
6616 case IP_AFTER_USE:
6617 after = true;
6618 /* fall through */
6619 case IP_BEFORE_USE:
6620 incr_pos = gsi_for_stmt (cand->incremented_at);
6621 break;
6623 case IP_ORIGINAL:
6624 /* Mark that the iv is preserved. */
6625 name_info (data, cand->var_before)->preserve_biv = true;
6626 name_info (data, cand->var_after)->preserve_biv = true;
6628 /* Rewrite the increment so that it uses var_before directly. */
6629 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6630 return;
6633 gimple_add_tmp_var (cand->var_before);
6635 base = unshare_expr (cand->iv->base);
6637 create_iv (base, unshare_expr (cand->iv->step),
6638 cand->var_before, data->current_loop,
6639 &incr_pos, after, &cand->var_before, &cand->var_after);
6642 /* Creates new induction variables described in SET. */
6644 static void
6645 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6647 unsigned i;
6648 struct iv_cand *cand;
6649 bitmap_iterator bi;
6651 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6653 cand = iv_cand (data, i);
6654 create_new_iv (data, cand);
6657 if (dump_file && (dump_flags & TDF_DETAILS))
6659 fprintf (dump_file, "Selected IV set for loop %d",
6660 data->current_loop->num);
6661 if (data->loop_loc != UNKNOWN_LOCATION)
6662 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6663 LOCATION_LINE (data->loop_loc));
6664 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6665 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6667 cand = iv_cand (data, i);
6668 dump_cand (dump_file, cand);
6670 fprintf (dump_file, "\n");
6674 /* Rewrites USE (definition of iv used in a nonlinear expression)
6675 using candidate CAND. */
6677 static void
6678 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6679 struct iv_use *use, struct iv_cand *cand)
6681 tree comp;
6682 tree op, tgt;
6683 gassign *ass;
6684 gimple_stmt_iterator bsi;
6686 /* An important special case -- if we are asked to express value of
6687 the original iv by itself, just exit; there is no need to
6688 introduce a new computation (that might also need casting the
6689 variable to unsigned and back). */
6690 if (cand->pos == IP_ORIGINAL
6691 && cand->incremented_at == use->stmt)
6693 enum tree_code stmt_code;
6695 gcc_assert (is_gimple_assign (use->stmt));
6696 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6698 /* Check whether we may leave the computation unchanged.
6699 This is the case only if it does not rely on other
6700 computations in the loop -- otherwise, the computation
6701 we rely upon may be removed in remove_unused_ivs,
6702 thus leading to ICE. */
6703 stmt_code = gimple_assign_rhs_code (use->stmt);
6704 if (stmt_code == PLUS_EXPR
6705 || stmt_code == MINUS_EXPR
6706 || stmt_code == POINTER_PLUS_EXPR)
6708 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6709 op = gimple_assign_rhs2 (use->stmt);
6710 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6711 op = gimple_assign_rhs1 (use->stmt);
6712 else
6713 op = NULL_TREE;
6715 else
6716 op = NULL_TREE;
6718 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6719 return;
6722 comp = get_computation (data->current_loop, use, cand);
6723 gcc_assert (comp != NULL_TREE);
6725 switch (gimple_code (use->stmt))
6727 case GIMPLE_PHI:
6728 tgt = PHI_RESULT (use->stmt);
6730 /* If we should keep the biv, do not replace it. */
6731 if (name_info (data, tgt)->preserve_biv)
6732 return;
6734 bsi = gsi_after_labels (gimple_bb (use->stmt));
6735 break;
6737 case GIMPLE_ASSIGN:
6738 tgt = gimple_assign_lhs (use->stmt);
6739 bsi = gsi_for_stmt (use->stmt);
6740 break;
6742 default:
6743 gcc_unreachable ();
6746 if (!valid_gimple_rhs_p (comp)
6747 || (gimple_code (use->stmt) != GIMPLE_PHI
6748 /* We can't allow re-allocating the stmt as it might be pointed
6749 to still. */
6750 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6751 >= gimple_num_ops (gsi_stmt (bsi)))))
6753 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6754 true, GSI_SAME_STMT);
6755 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6757 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6758 /* As this isn't a plain copy we have to reset alignment
6759 information. */
6760 if (SSA_NAME_PTR_INFO (comp))
6761 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6765 if (gimple_code (use->stmt) == GIMPLE_PHI)
6767 ass = gimple_build_assign (tgt, comp);
6768 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6770 bsi = gsi_for_stmt (use->stmt);
6771 remove_phi_node (&bsi, false);
6773 else
6775 gimple_assign_set_rhs_from_tree (&bsi, comp);
6776 use->stmt = gsi_stmt (bsi);
6780 /* Performs a peephole optimization to reorder the iv update statement with
6781 a mem ref to enable instruction combining in later phases. The mem ref uses
6782 the iv value before the update, so the reordering transformation requires
6783 adjustment of the offset. CAND is the selected IV_CAND.
6785 Example:
6787 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6788 iv2 = iv1 + 1;
6790 if (t < val) (1)
6791 goto L;
6792 goto Head;
6795 directly propagating t over to (1) will introduce overlapping live range
6796 thus increase register pressure. This peephole transform it into:
6799 iv2 = iv1 + 1;
6800 t = MEM_REF (base, iv2, 8, 8);
6801 if (t < val)
6802 goto L;
6803 goto Head;
6806 static void
6807 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6809 tree var_after;
6810 gimple iv_update, stmt;
6811 basic_block bb;
6812 gimple_stmt_iterator gsi, gsi_iv;
6814 if (cand->pos != IP_NORMAL)
6815 return;
6817 var_after = cand->var_after;
6818 iv_update = SSA_NAME_DEF_STMT (var_after);
6820 bb = gimple_bb (iv_update);
6821 gsi = gsi_last_nondebug_bb (bb);
6822 stmt = gsi_stmt (gsi);
6824 /* Only handle conditional statement for now. */
6825 if (gimple_code (stmt) != GIMPLE_COND)
6826 return;
6828 gsi_prev_nondebug (&gsi);
6829 stmt = gsi_stmt (gsi);
6830 if (stmt != iv_update)
6831 return;
6833 gsi_prev_nondebug (&gsi);
6834 if (gsi_end_p (gsi))
6835 return;
6837 stmt = gsi_stmt (gsi);
6838 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6839 return;
6841 if (stmt != use->stmt)
6842 return;
6844 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6845 return;
6847 if (dump_file && (dump_flags & TDF_DETAILS))
6849 fprintf (dump_file, "Reordering \n");
6850 print_gimple_stmt (dump_file, iv_update, 0, 0);
6851 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6852 fprintf (dump_file, "\n");
6855 gsi = gsi_for_stmt (use->stmt);
6856 gsi_iv = gsi_for_stmt (iv_update);
6857 gsi_move_before (&gsi_iv, &gsi);
6859 cand->pos = IP_BEFORE_USE;
6860 cand->incremented_at = use->stmt;
6863 /* Rewrites USE (address that is an iv) using candidate CAND. */
6865 static void
6866 rewrite_use_address_1 (struct ivopts_data *data,
6867 struct iv_use *use, struct iv_cand *cand)
6869 aff_tree aff;
6870 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6871 tree base_hint = NULL_TREE;
6872 tree ref, iv;
6873 bool ok;
6875 adjust_iv_update_pos (cand, use);
6876 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6877 gcc_assert (ok);
6878 unshare_aff_combination (&aff);
6880 /* To avoid undefined overflow problems, all IV candidates use unsigned
6881 integer types. The drawback is that this makes it impossible for
6882 create_mem_ref to distinguish an IV that is based on a memory object
6883 from one that represents simply an offset.
6885 To work around this problem, we pass a hint to create_mem_ref that
6886 indicates which variable (if any) in aff is an IV based on a memory
6887 object. Note that we only consider the candidate. If this is not
6888 based on an object, the base of the reference is in some subexpression
6889 of the use -- but these will use pointer types, so they are recognized
6890 by the create_mem_ref heuristics anyway. */
6891 if (cand->iv->base_object)
6892 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6894 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6895 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6896 reference_alias_ptr_type (*use->op_p),
6897 iv, base_hint, data->speed);
6898 copy_ref_info (ref, *use->op_p);
6899 *use->op_p = ref;
6902 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6903 first use of a group, rewrites sub uses in the group too. */
6905 static void
6906 rewrite_use_address (struct ivopts_data *data,
6907 struct iv_use *use, struct iv_cand *cand)
6909 struct iv_use *next;
6911 gcc_assert (use->sub_id == 0);
6912 rewrite_use_address_1 (data, use, cand);
6913 update_stmt (use->stmt);
6915 for (next = use->next; next != NULL; next = next->next)
6917 rewrite_use_address_1 (data, next, cand);
6918 update_stmt (next->stmt);
6921 return;
6924 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6925 candidate CAND. */
6927 static void
6928 rewrite_use_compare (struct ivopts_data *data,
6929 struct iv_use *use, struct iv_cand *cand)
6931 tree comp, *var_p, op, bound;
6932 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6933 enum tree_code compare;
6934 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6935 bool ok;
6937 bound = cp->value;
6938 if (bound)
6940 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6941 tree var_type = TREE_TYPE (var);
6942 gimple_seq stmts;
6944 if (dump_file && (dump_flags & TDF_DETAILS))
6946 fprintf (dump_file, "Replacing exit test: ");
6947 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6949 compare = cp->comp;
6950 bound = unshare_expr (fold_convert (var_type, bound));
6951 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6952 if (stmts)
6953 gsi_insert_seq_on_edge_immediate (
6954 loop_preheader_edge (data->current_loop),
6955 stmts);
6957 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6958 gimple_cond_set_lhs (cond_stmt, var);
6959 gimple_cond_set_code (cond_stmt, compare);
6960 gimple_cond_set_rhs (cond_stmt, op);
6961 return;
6964 /* The induction variable elimination failed; just express the original
6965 giv. */
6966 comp = get_computation (data->current_loop, use, cand);
6967 gcc_assert (comp != NULL_TREE);
6969 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6970 gcc_assert (ok);
6972 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6973 true, GSI_SAME_STMT);
6976 /* Rewrites USE using candidate CAND. */
6978 static void
6979 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6981 switch (use->type)
6983 case USE_NONLINEAR_EXPR:
6984 rewrite_use_nonlinear_expr (data, use, cand);
6985 break;
6987 case USE_ADDRESS:
6988 rewrite_use_address (data, use, cand);
6989 break;
6991 case USE_COMPARE:
6992 rewrite_use_compare (data, use, cand);
6993 break;
6995 default:
6996 gcc_unreachable ();
6999 update_stmt (use->stmt);
7002 /* Rewrite the uses using the selected induction variables. */
7004 static void
7005 rewrite_uses (struct ivopts_data *data)
7007 unsigned i;
7008 struct iv_cand *cand;
7009 struct iv_use *use;
7011 for (i = 0; i < n_iv_uses (data); i++)
7013 use = iv_use (data, i);
7014 cand = use->selected;
7015 gcc_assert (cand);
7017 rewrite_use (data, use, cand);
7021 /* Removes the ivs that are not used after rewriting. */
7023 static void
7024 remove_unused_ivs (struct ivopts_data *data)
7026 unsigned j;
7027 bitmap_iterator bi;
7028 bitmap toremove = BITMAP_ALLOC (NULL);
7030 /* Figure out an order in which to release SSA DEFs so that we don't
7031 release something that we'd have to propagate into a debug stmt
7032 afterwards. */
7033 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7035 struct version_info *info;
7037 info = ver_info (data, j);
7038 if (info->iv
7039 && !integer_zerop (info->iv->step)
7040 && !info->inv_id
7041 && !info->iv->have_use_for
7042 && !info->preserve_biv)
7044 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7046 tree def = info->iv->ssa_name;
7048 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7050 imm_use_iterator imm_iter;
7051 use_operand_p use_p;
7052 gimple stmt;
7053 int count = 0;
7055 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7057 if (!gimple_debug_bind_p (stmt))
7058 continue;
7060 /* We just want to determine whether to do nothing
7061 (count == 0), to substitute the computed
7062 expression into a single use of the SSA DEF by
7063 itself (count == 1), or to use a debug temp
7064 because the SSA DEF is used multiple times or as
7065 part of a larger expression (count > 1). */
7066 count++;
7067 if (gimple_debug_bind_get_value (stmt) != def)
7068 count++;
7070 if (count > 1)
7071 BREAK_FROM_IMM_USE_STMT (imm_iter);
7074 if (!count)
7075 continue;
7077 struct iv_use dummy_use;
7078 struct iv_cand *best_cand = NULL, *cand;
7079 unsigned i, best_pref = 0, cand_pref;
7081 memset (&dummy_use, 0, sizeof (dummy_use));
7082 dummy_use.iv = info->iv;
7083 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7085 cand = iv_use (data, i)->selected;
7086 if (cand == best_cand)
7087 continue;
7088 cand_pref = operand_equal_p (cand->iv->step,
7089 info->iv->step, 0)
7090 ? 4 : 0;
7091 cand_pref
7092 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7093 == TYPE_MODE (TREE_TYPE (info->iv->base))
7094 ? 2 : 0;
7095 cand_pref
7096 += TREE_CODE (cand->iv->base) == INTEGER_CST
7097 ? 1 : 0;
7098 if (best_cand == NULL || best_pref < cand_pref)
7100 best_cand = cand;
7101 best_pref = cand_pref;
7105 if (!best_cand)
7106 continue;
7108 tree comp = get_computation_at (data->current_loop,
7109 &dummy_use, best_cand,
7110 SSA_NAME_DEF_STMT (def));
7111 if (!comp)
7112 continue;
7114 if (count > 1)
7116 tree vexpr = make_node (DEBUG_EXPR_DECL);
7117 DECL_ARTIFICIAL (vexpr) = 1;
7118 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7119 if (SSA_NAME_VAR (def))
7120 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7121 else
7122 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7123 gdebug *def_temp
7124 = gimple_build_debug_bind (vexpr, comp, NULL);
7125 gimple_stmt_iterator gsi;
7127 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7128 gsi = gsi_after_labels (gimple_bb
7129 (SSA_NAME_DEF_STMT (def)));
7130 else
7131 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7133 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7134 comp = vexpr;
7137 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7139 if (!gimple_debug_bind_p (stmt))
7140 continue;
7142 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7143 SET_USE (use_p, comp);
7145 update_stmt (stmt);
7151 release_defs_bitset (toremove);
7153 BITMAP_FREE (toremove);
7156 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7157 for hash_map::traverse. */
7159 bool
7160 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7162 free (value);
7163 return true;
7166 /* Frees data allocated by the optimization of a single loop. */
7168 static void
7169 free_loop_data (struct ivopts_data *data)
7171 unsigned i, j;
7172 bitmap_iterator bi;
7173 tree obj;
7175 if (data->niters)
7177 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7178 delete data->niters;
7179 data->niters = NULL;
7182 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7184 struct version_info *info;
7186 info = ver_info (data, i);
7187 free (info->iv);
7188 info->iv = NULL;
7189 info->has_nonlin_use = false;
7190 info->preserve_biv = false;
7191 info->inv_id = 0;
7193 bitmap_clear (data->relevant);
7194 bitmap_clear (data->important_candidates);
7196 for (i = 0; i < n_iv_uses (data); i++)
7198 struct iv_use *use = iv_use (data, i);
7199 struct iv_use *pre = use, *sub = use->next;
7201 while (sub)
7203 gcc_assert (sub->related_cands == NULL);
7204 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7206 free (sub->iv);
7207 pre = sub;
7208 sub = sub->next;
7209 free (pre);
7212 free (use->iv);
7213 BITMAP_FREE (use->related_cands);
7214 for (j = 0; j < use->n_map_members; j++)
7215 if (use->cost_map[j].depends_on)
7216 BITMAP_FREE (use->cost_map[j].depends_on);
7217 free (use->cost_map);
7218 free (use);
7220 data->iv_uses.truncate (0);
7222 for (i = 0; i < n_iv_cands (data); i++)
7224 struct iv_cand *cand = iv_cand (data, i);
7226 free (cand->iv);
7227 if (cand->depends_on)
7228 BITMAP_FREE (cand->depends_on);
7229 free (cand);
7231 data->iv_candidates.truncate (0);
7233 if (data->version_info_size < num_ssa_names)
7235 data->version_info_size = 2 * num_ssa_names;
7236 free (data->version_info);
7237 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7240 data->max_inv_id = 0;
7242 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7243 SET_DECL_RTL (obj, NULL_RTX);
7245 decl_rtl_to_reset.truncate (0);
7247 data->inv_expr_tab->empty ();
7248 data->inv_expr_id = 0;
7251 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7252 loop tree. */
7254 static void
7255 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7257 free_loop_data (data);
7258 free (data->version_info);
7259 BITMAP_FREE (data->relevant);
7260 BITMAP_FREE (data->important_candidates);
7262 decl_rtl_to_reset.release ();
7263 data->iv_uses.release ();
7264 data->iv_candidates.release ();
7265 delete data->inv_expr_tab;
7266 data->inv_expr_tab = NULL;
7267 free_affine_expand_cache (&data->name_expansion_cache);
7270 /* Returns true if the loop body BODY includes any function calls. */
7272 static bool
7273 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7275 gimple_stmt_iterator gsi;
7276 unsigned i;
7278 for (i = 0; i < num_nodes; i++)
7279 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7281 gimple stmt = gsi_stmt (gsi);
7282 if (is_gimple_call (stmt)
7283 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7284 return true;
7286 return false;
7289 /* Optimizes the LOOP. Returns true if anything changed. */
7291 static bool
7292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7294 bool changed = false;
7295 struct iv_ca *iv_ca;
7296 edge exit = single_dom_exit (loop);
7297 basic_block *body;
7299 gcc_assert (!data->niters);
7300 data->current_loop = loop;
7301 data->loop_loc = find_loop_location (loop);
7302 data->speed = optimize_loop_for_speed_p (loop);
7304 if (dump_file && (dump_flags & TDF_DETAILS))
7306 fprintf (dump_file, "Processing loop %d", loop->num);
7307 if (data->loop_loc != UNKNOWN_LOCATION)
7308 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7309 LOCATION_LINE (data->loop_loc));
7310 fprintf (dump_file, "\n");
7312 if (exit)
7314 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7315 exit->src->index, exit->dest->index);
7316 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7317 fprintf (dump_file, "\n");
7320 fprintf (dump_file, "\n");
7323 body = get_loop_body (loop);
7324 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7325 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7326 free (body);
7328 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7330 /* For each ssa name determines whether it behaves as an induction variable
7331 in some loop. */
7332 if (!find_induction_variables (data))
7333 goto finish;
7335 /* Finds interesting uses (item 1). */
7336 find_interesting_uses (data);
7337 group_address_uses (data);
7338 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7339 goto finish;
7341 /* Finds candidates for the induction variables (item 2). */
7342 find_iv_candidates (data);
7344 /* Calculates the costs (item 3, part 1). */
7345 determine_iv_costs (data);
7346 determine_use_iv_costs (data);
7347 determine_set_costs (data);
7349 /* Find the optimal set of induction variables (item 3, part 2). */
7350 iv_ca = find_optimal_iv_set (data);
7351 if (!iv_ca)
7352 goto finish;
7353 changed = true;
7355 /* Create the new induction variables (item 4, part 1). */
7356 create_new_ivs (data, iv_ca);
7357 iv_ca_free (&iv_ca);
7359 /* Rewrite the uses (item 4, part 2). */
7360 rewrite_uses (data);
7362 /* Remove the ivs that are unused after rewriting. */
7363 remove_unused_ivs (data);
7365 /* We have changed the structure of induction variables; it might happen
7366 that definitions in the scev database refer to some of them that were
7367 eliminated. */
7368 scev_reset ();
7370 finish:
7371 free_loop_data (data);
7373 return changed;
7376 /* Main entry point. Optimizes induction variables in loops. */
7378 void
7379 tree_ssa_iv_optimize (void)
7381 struct loop *loop;
7382 struct ivopts_data data;
7384 tree_ssa_iv_optimize_init (&data);
7386 /* Optimize the loops starting with the innermost ones. */
7387 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7389 if (dump_file && (dump_flags & TDF_DETAILS))
7390 flow_loop_dump (loop, dump_file, NULL, 1);
7392 tree_ssa_iv_optimize_loop (&data, loop);
7395 tree_ssa_iv_optimize_finalize (&data);