* g++.dg/cpp/ucn-1.C: Fix typo.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob5ae5e72458be1eb01fb2b5332d3206d703dd0e13
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
126 return niter;
129 /* Representation of the induction variable. */
130 struct iv
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
155 /* Types of uses. */
156 enum use_type
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
163 /* Cost of a computation. */
164 struct comp_cost
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
176 /* The candidate - cost pair. */
177 struct cost_pair
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
190 /* Use. */
191 struct iv_use
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand *selected;
207 /* The selected candidate. */
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
216 enum iv_position
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
225 /* The induction variable candidate. */
226 struct iv_cand
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
250 /* Loop invariant expression hashtable entry. */
251 struct iv_inv_expr_ent
253 tree expr;
254 int id;
255 hashval_t hash;
258 /* The data used by the induction variable optimizations. */
260 /* Hashtable helpers. */
262 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
264 static inline hashval_t hash (const iv_inv_expr_ent *);
265 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
268 /* Hash function for loop invariant expressions. */
270 inline hashval_t
271 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
273 return expr->hash;
276 /* Hash table equality function for expressions. */
278 inline bool
279 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
280 const iv_inv_expr_ent *expr2)
282 return expr1->hash == expr2->hash
283 && operand_equal_p (expr1->expr, expr2->expr, 0);
286 struct ivopts_data
288 /* The currently optimized loop. */
289 struct loop *current_loop;
290 source_location loop_loc;
292 /* Numbers of iterations for all exits of the current loop. */
293 hash_map<edge, tree_niter_desc *> *niters;
295 /* Number of registers used in it. */
296 unsigned regs_used;
298 /* The size of version_info array allocated. */
299 unsigned version_info_size;
301 /* The array of information for the ssa names. */
302 struct version_info *version_info;
304 /* The hashtable of loop invariant expressions created
305 by ivopt. */
306 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
308 /* Loop invariant expression id. */
309 int inv_expr_id;
311 /* The bitmap of indices in version_info whose value was changed. */
312 bitmap relevant;
314 /* The uses of induction variables. */
315 vec<iv_use *> iv_uses;
317 /* The candidates. */
318 vec<iv_cand *> iv_candidates;
320 /* A bitmap of important candidates. */
321 bitmap important_candidates;
323 /* Cache used by tree_to_aff_combination_expand. */
324 hash_map<tree, name_expansion *> *name_expansion_cache;
326 /* The maximum invariant id. */
327 unsigned max_inv_id;
329 /* Number of no_overflow BIVs which are not used in memory address. */
330 unsigned bivs_not_used_in_addr;
332 /* Obstack for iv structure. */
333 struct obstack iv_obstack;
335 /* Whether to consider just related and important candidates when replacing a
336 use. */
337 bool consider_all_candidates;
339 /* Are we optimizing for speed? */
340 bool speed;
342 /* Whether the loop body includes any function calls. */
343 bool body_includes_call;
345 /* Whether the loop body can only be exited via single exit. */
346 bool loop_single_exit_p;
349 /* An assignment of iv candidates to uses. */
351 struct iv_ca
353 /* The number of uses covered by the assignment. */
354 unsigned upto;
356 /* Number of uses that cannot be expressed by the candidates in the set. */
357 unsigned bad_uses;
359 /* Candidate assigned to a use, together with the related costs. */
360 struct cost_pair **cand_for_use;
362 /* Number of times each candidate is used. */
363 unsigned *n_cand_uses;
365 /* The candidates used. */
366 bitmap cands;
368 /* The number of candidates in the set. */
369 unsigned n_cands;
371 /* Total number of registers needed. */
372 unsigned n_regs;
374 /* Total cost of expressing uses. */
375 comp_cost cand_use_cost;
377 /* Total cost of candidates. */
378 unsigned cand_cost;
380 /* Number of times each invariant is used. */
381 unsigned *n_invariant_uses;
383 /* The array holding the number of uses of each loop
384 invariant expressions created by ivopt. */
385 unsigned *used_inv_expr;
387 /* The number of created loop invariants. */
388 unsigned num_used_inv_expr;
390 /* Total cost of the assignment. */
391 comp_cost cost;
394 /* Difference of two iv candidate assignments. */
396 struct iv_ca_delta
398 /* Changed use. */
399 struct iv_use *use;
401 /* An old assignment (for rollback purposes). */
402 struct cost_pair *old_cp;
404 /* A new assignment. */
405 struct cost_pair *new_cp;
407 /* Next change in the list. */
408 struct iv_ca_delta *next_change;
411 /* Bound on number of candidates below that all candidates are considered. */
413 #define CONSIDER_ALL_CANDIDATES_BOUND \
414 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
416 /* If there are more iv occurrences, we just give up (it is quite unlikely that
417 optimizing such a loop would help, and it would take ages). */
419 #define MAX_CONSIDERED_USES \
420 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
422 /* If there are at most this number of ivs in the set, try removing unnecessary
423 ivs from the set always. */
425 #define ALWAYS_PRUNE_CAND_SET_BOUND \
426 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
428 /* The list of trees for that the decl_rtl field must be reset is stored
429 here. */
431 static vec<tree> decl_rtl_to_reset;
433 static comp_cost force_expr_to_var_cost (tree, bool);
435 /* Number of uses recorded in DATA. */
437 static inline unsigned
438 n_iv_uses (struct ivopts_data *data)
440 return data->iv_uses.length ();
443 /* Ith use recorded in DATA. */
445 static inline struct iv_use *
446 iv_use (struct ivopts_data *data, unsigned i)
448 return data->iv_uses[i];
451 /* Number of candidates recorded in DATA. */
453 static inline unsigned
454 n_iv_cands (struct ivopts_data *data)
456 return data->iv_candidates.length ();
459 /* Ith candidate recorded in DATA. */
461 static inline struct iv_cand *
462 iv_cand (struct ivopts_data *data, unsigned i)
464 return data->iv_candidates[i];
467 /* The single loop exit if it dominates the latch, NULL otherwise. */
469 edge
470 single_dom_exit (struct loop *loop)
472 edge exit = single_exit (loop);
474 if (!exit)
475 return NULL;
477 if (!just_once_each_iteration_p (loop, exit->src))
478 return NULL;
480 return exit;
483 /* Dumps information about the induction variable IV to FILE. */
485 void
486 dump_iv (FILE *file, struct iv *iv, bool dump_name)
488 if (iv->ssa_name && dump_name)
490 fprintf (file, "ssa name ");
491 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
492 fprintf (file, "\n");
495 fprintf (file, " type ");
496 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
497 fprintf (file, "\n");
499 if (iv->step)
501 fprintf (file, " base ");
502 print_generic_expr (file, iv->base, TDF_SLIM);
503 fprintf (file, "\n");
505 fprintf (file, " step ");
506 print_generic_expr (file, iv->step, TDF_SLIM);
507 fprintf (file, "\n");
509 else
511 fprintf (file, " invariant ");
512 print_generic_expr (file, iv->base, TDF_SLIM);
513 fprintf (file, "\n");
516 if (iv->base_object)
518 fprintf (file, " base object ");
519 print_generic_expr (file, iv->base_object, TDF_SLIM);
520 fprintf (file, "\n");
523 if (iv->biv_p)
524 fprintf (file, " is a biv\n");
526 if (iv->no_overflow)
527 fprintf (file, " iv doesn't overflow wrto loop niter\n");
530 /* Dumps information about the USE to FILE. */
532 void
533 dump_use (FILE *file, struct iv_use *use)
535 fprintf (file, "use %d", use->id);
536 if (use->sub_id)
537 fprintf (file, ".%d", use->sub_id);
539 fprintf (file, "\n");
541 switch (use->type)
543 case USE_NONLINEAR_EXPR:
544 fprintf (file, " generic\n");
545 break;
547 case USE_ADDRESS:
548 fprintf (file, " address\n");
549 break;
551 case USE_COMPARE:
552 fprintf (file, " compare\n");
553 break;
555 default:
556 gcc_unreachable ();
559 fprintf (file, " in statement ");
560 print_gimple_stmt (file, use->stmt, 0, 0);
561 fprintf (file, "\n");
563 fprintf (file, " at position ");
564 if (use->op_p)
565 print_generic_expr (file, *use->op_p, TDF_SLIM);
566 fprintf (file, "\n");
568 dump_iv (file, use->iv, false);
570 if (use->related_cands)
572 fprintf (file, " related candidates ");
573 dump_bitmap (file, use->related_cands);
577 /* Dumps information about the uses to FILE. */
579 void
580 dump_uses (FILE *file, struct ivopts_data *data)
582 unsigned i;
583 struct iv_use *use;
585 for (i = 0; i < n_iv_uses (data); i++)
587 use = iv_use (data, i);
590 dump_use (file, use);
591 use = use->next;
593 while (use);
594 fprintf (file, "\n");
598 /* Dumps information about induction variable candidate CAND to FILE. */
600 void
601 dump_cand (FILE *file, struct iv_cand *cand)
603 struct iv *iv = cand->iv;
605 fprintf (file, "candidate %d%s\n",
606 cand->id, cand->important ? " (important)" : "");
608 if (cand->depends_on)
610 fprintf (file, " depends on ");
611 dump_bitmap (file, cand->depends_on);
614 if (!iv)
616 fprintf (file, " final value replacement\n");
617 return;
620 if (cand->var_before)
622 fprintf (file, " var_before ");
623 print_generic_expr (file, cand->var_before, TDF_SLIM);
624 fprintf (file, "\n");
626 if (cand->var_after)
628 fprintf (file, " var_after ");
629 print_generic_expr (file, cand->var_after, TDF_SLIM);
630 fprintf (file, "\n");
633 switch (cand->pos)
635 case IP_NORMAL:
636 fprintf (file, " incremented before exit test\n");
637 break;
639 case IP_BEFORE_USE:
640 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
641 break;
643 case IP_AFTER_USE:
644 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
645 break;
647 case IP_END:
648 fprintf (file, " incremented at end\n");
649 break;
651 case IP_ORIGINAL:
652 fprintf (file, " original biv\n");
653 break;
656 dump_iv (file, iv, false);
659 /* Returns the info for ssa version VER. */
661 static inline struct version_info *
662 ver_info (struct ivopts_data *data, unsigned ver)
664 return data->version_info + ver;
667 /* Returns the info for ssa name NAME. */
669 static inline struct version_info *
670 name_info (struct ivopts_data *data, tree name)
672 return ver_info (data, SSA_NAME_VERSION (name));
675 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
676 emitted in LOOP. */
678 static bool
679 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
681 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
683 gcc_assert (bb);
685 if (sbb == loop->latch)
686 return true;
688 if (sbb != bb)
689 return false;
691 return stmt == last_stmt (bb);
694 /* Returns true if STMT if after the place where the original induction
695 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
696 if the positions are identical. */
698 static bool
699 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
701 basic_block cand_bb = gimple_bb (cand->incremented_at);
702 basic_block stmt_bb = gimple_bb (stmt);
704 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
705 return false;
707 if (stmt_bb != cand_bb)
708 return true;
710 if (true_if_equal
711 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
712 return true;
713 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
716 /* Returns true if STMT if after the place where the induction variable
717 CAND is incremented in LOOP. */
719 static bool
720 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
722 switch (cand->pos)
724 case IP_END:
725 return false;
727 case IP_NORMAL:
728 return stmt_after_ip_normal_pos (loop, stmt);
730 case IP_ORIGINAL:
731 case IP_AFTER_USE:
732 return stmt_after_inc_pos (cand, stmt, false);
734 case IP_BEFORE_USE:
735 return stmt_after_inc_pos (cand, stmt, true);
737 default:
738 gcc_unreachable ();
742 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
744 static bool
745 abnormal_ssa_name_p (tree exp)
747 if (!exp)
748 return false;
750 if (TREE_CODE (exp) != SSA_NAME)
751 return false;
753 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
756 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
757 abnormal phi node. Callback for for_each_index. */
759 static bool
760 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
761 void *data ATTRIBUTE_UNUSED)
763 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
765 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
766 return false;
767 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
768 return false;
771 return !abnormal_ssa_name_p (*index);
774 /* Returns true if EXPR contains a ssa name that occurs in an
775 abnormal phi node. */
777 bool
778 contains_abnormal_ssa_name_p (tree expr)
780 enum tree_code code;
781 enum tree_code_class codeclass;
783 if (!expr)
784 return false;
786 code = TREE_CODE (expr);
787 codeclass = TREE_CODE_CLASS (code);
789 if (code == SSA_NAME)
790 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
792 if (code == INTEGER_CST
793 || is_gimple_min_invariant (expr))
794 return false;
796 if (code == ADDR_EXPR)
797 return !for_each_index (&TREE_OPERAND (expr, 0),
798 idx_contains_abnormal_ssa_name_p,
799 NULL);
801 if (code == COND_EXPR)
802 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
803 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
804 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
806 switch (codeclass)
808 case tcc_binary:
809 case tcc_comparison:
810 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
811 return true;
813 /* Fallthru. */
814 case tcc_unary:
815 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
816 return true;
818 break;
820 default:
821 gcc_unreachable ();
824 return false;
827 /* Returns the structure describing number of iterations determined from
828 EXIT of DATA->current_loop, or NULL if something goes wrong. */
830 static struct tree_niter_desc *
831 niter_for_exit (struct ivopts_data *data, edge exit)
833 struct tree_niter_desc *desc;
834 tree_niter_desc **slot;
836 if (!data->niters)
838 data->niters = new hash_map<edge, tree_niter_desc *>;
839 slot = NULL;
841 else
842 slot = data->niters->get (exit);
844 if (!slot)
846 /* Try to determine number of iterations. We cannot safely work with ssa
847 names that appear in phi nodes on abnormal edges, so that we do not
848 create overlapping life ranges for them (PR 27283). */
849 desc = XNEW (struct tree_niter_desc);
850 if (!number_of_iterations_exit (data->current_loop,
851 exit, desc, true)
852 || contains_abnormal_ssa_name_p (desc->niter))
854 XDELETE (desc);
855 desc = NULL;
857 data->niters->put (exit, desc);
859 else
860 desc = *slot;
862 return desc;
865 /* Returns the structure describing number of iterations determined from
866 single dominating exit of DATA->current_loop, or NULL if something
867 goes wrong. */
869 static struct tree_niter_desc *
870 niter_for_single_dom_exit (struct ivopts_data *data)
872 edge exit = single_dom_exit (data->current_loop);
874 if (!exit)
875 return NULL;
877 return niter_for_exit (data, exit);
880 /* Initializes data structures used by the iv optimization pass, stored
881 in DATA. */
883 static void
884 tree_ssa_iv_optimize_init (struct ivopts_data *data)
886 data->version_info_size = 2 * num_ssa_names;
887 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
888 data->relevant = BITMAP_ALLOC (NULL);
889 data->important_candidates = BITMAP_ALLOC (NULL);
890 data->max_inv_id = 0;
891 data->niters = NULL;
892 data->iv_uses.create (20);
893 data->iv_candidates.create (20);
894 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
895 data->inv_expr_id = 0;
896 data->name_expansion_cache = NULL;
897 decl_rtl_to_reset.create (20);
898 gcc_obstack_init (&data->iv_obstack);
901 /* Returns a memory object to that EXPR points. In case we are able to
902 determine that it does not point to any such object, NULL is returned. */
904 static tree
905 determine_base_object (tree expr)
907 enum tree_code code = TREE_CODE (expr);
908 tree base, obj;
910 /* If this is a pointer casted to any type, we need to determine
911 the base object for the pointer; so handle conversions before
912 throwing away non-pointer expressions. */
913 if (CONVERT_EXPR_P (expr))
914 return determine_base_object (TREE_OPERAND (expr, 0));
916 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
917 return NULL_TREE;
919 switch (code)
921 case INTEGER_CST:
922 return NULL_TREE;
924 case ADDR_EXPR:
925 obj = TREE_OPERAND (expr, 0);
926 base = get_base_address (obj);
928 if (!base)
929 return expr;
931 if (TREE_CODE (base) == MEM_REF)
932 return determine_base_object (TREE_OPERAND (base, 0));
934 return fold_convert (ptr_type_node,
935 build_fold_addr_expr (base));
937 case POINTER_PLUS_EXPR:
938 return determine_base_object (TREE_OPERAND (expr, 0));
940 case PLUS_EXPR:
941 case MINUS_EXPR:
942 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
943 gcc_unreachable ();
945 default:
946 return fold_convert (ptr_type_node, expr);
950 /* Return true if address expression with non-DECL_P operand appears
951 in EXPR. */
953 static bool
954 contain_complex_addr_expr (tree expr)
956 bool res = false;
958 STRIP_NOPS (expr);
959 switch (TREE_CODE (expr))
961 case POINTER_PLUS_EXPR:
962 case PLUS_EXPR:
963 case MINUS_EXPR:
964 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
965 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
966 break;
968 case ADDR_EXPR:
969 return (!DECL_P (TREE_OPERAND (expr, 0)));
971 default:
972 return false;
975 return res;
978 /* Allocates an induction variable with given initial value BASE and step STEP
979 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
981 static struct iv *
982 alloc_iv (struct ivopts_data *data, tree base, tree step,
983 bool no_overflow = false)
985 tree expr = base;
986 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
987 sizeof (struct iv));
988 gcc_assert (step != NULL_TREE);
990 /* Lower address expression in base except ones with DECL_P as operand.
991 By doing this:
992 1) More accurate cost can be computed for address expressions;
993 2) Duplicate candidates won't be created for bases in different
994 forms, like &a[0] and &a. */
995 STRIP_NOPS (expr);
996 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
997 || contain_complex_addr_expr (expr))
999 aff_tree comb;
1000 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1001 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1004 iv->base = base;
1005 iv->base_object = determine_base_object (base);
1006 iv->step = step;
1007 iv->biv_p = false;
1008 iv->have_use_for = false;
1009 iv->use_id = 0;
1010 iv->ssa_name = NULL_TREE;
1011 iv->no_overflow = no_overflow;
1012 iv->have_address_use = false;
1014 return iv;
1017 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1018 doesn't overflow. */
1020 static void
1021 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1022 bool no_overflow)
1024 struct version_info *info = name_info (data, iv);
1026 gcc_assert (!info->iv);
1028 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1029 info->iv = alloc_iv (data, base, step, no_overflow);
1030 info->iv->ssa_name = iv;
1033 /* Finds induction variable declaration for VAR. */
1035 static struct iv *
1036 get_iv (struct ivopts_data *data, tree var)
1038 basic_block bb;
1039 tree type = TREE_TYPE (var);
1041 if (!POINTER_TYPE_P (type)
1042 && !INTEGRAL_TYPE_P (type))
1043 return NULL;
1045 if (!name_info (data, var)->iv)
1047 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1049 if (!bb
1050 || !flow_bb_inside_loop_p (data->current_loop, bb))
1051 set_iv (data, var, var, build_int_cst (type, 0), true);
1054 return name_info (data, var)->iv;
1057 /* Return the first non-invariant ssa var found in EXPR. */
1059 static tree
1060 extract_single_var_from_expr (tree expr)
1062 int i, n;
1063 tree tmp;
1064 enum tree_code code;
1066 if (!expr || is_gimple_min_invariant (expr))
1067 return NULL;
1069 code = TREE_CODE (expr);
1070 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1072 n = TREE_OPERAND_LENGTH (expr);
1073 for (i = 0; i < n; i++)
1075 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1077 if (tmp)
1078 return tmp;
1081 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1084 /* Finds basic ivs. */
1086 static bool
1087 find_bivs (struct ivopts_data *data)
1089 gphi *phi;
1090 affine_iv iv;
1091 tree step, type, base, stop;
1092 bool found = false;
1093 struct loop *loop = data->current_loop;
1094 gphi_iterator psi;
1096 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1098 phi = psi.phi ();
1100 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1101 continue;
1103 if (virtual_operand_p (PHI_RESULT (phi)))
1104 continue;
1106 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1107 continue;
1109 if (integer_zerop (iv.step))
1110 continue;
1112 step = iv.step;
1113 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1114 /* Stop expanding iv base at the first ssa var referred by iv step.
1115 Ideally we should stop at any ssa var, because that's expensive
1116 and unusual to happen, we just do it on the first one.
1118 See PR64705 for the rationale. */
1119 stop = extract_single_var_from_expr (step);
1120 base = expand_simple_operations (base, stop);
1121 if (contains_abnormal_ssa_name_p (base)
1122 || contains_abnormal_ssa_name_p (step))
1123 continue;
1125 type = TREE_TYPE (PHI_RESULT (phi));
1126 base = fold_convert (type, base);
1127 if (step)
1129 if (POINTER_TYPE_P (type))
1130 step = convert_to_ptrofftype (step);
1131 else
1132 step = fold_convert (type, step);
1135 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1136 found = true;
1139 return found;
1142 /* Marks basic ivs. */
1144 static void
1145 mark_bivs (struct ivopts_data *data)
1147 gphi *phi;
1148 gimple *def;
1149 tree var;
1150 struct iv *iv, *incr_iv;
1151 struct loop *loop = data->current_loop;
1152 basic_block incr_bb;
1153 gphi_iterator psi;
1155 data->bivs_not_used_in_addr = 0;
1156 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1158 phi = psi.phi ();
1160 iv = get_iv (data, PHI_RESULT (phi));
1161 if (!iv)
1162 continue;
1164 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1165 def = SSA_NAME_DEF_STMT (var);
1166 /* Don't mark iv peeled from other one as biv. */
1167 if (def
1168 && gimple_code (def) == GIMPLE_PHI
1169 && gimple_bb (def) == loop->header)
1170 continue;
1172 incr_iv = get_iv (data, var);
1173 if (!incr_iv)
1174 continue;
1176 /* If the increment is in the subloop, ignore it. */
1177 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1178 if (incr_bb->loop_father != data->current_loop
1179 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1180 continue;
1182 iv->biv_p = true;
1183 incr_iv->biv_p = true;
1184 if (iv->no_overflow)
1185 data->bivs_not_used_in_addr++;
1186 if (incr_iv->no_overflow)
1187 data->bivs_not_used_in_addr++;
1191 /* Checks whether STMT defines a linear induction variable and stores its
1192 parameters to IV. */
1194 static bool
1195 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1197 tree lhs, stop;
1198 struct loop *loop = data->current_loop;
1200 iv->base = NULL_TREE;
1201 iv->step = NULL_TREE;
1203 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1204 return false;
1206 lhs = gimple_assign_lhs (stmt);
1207 if (TREE_CODE (lhs) != SSA_NAME)
1208 return false;
1210 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1211 return false;
1213 /* Stop expanding iv base at the first ssa var referred by iv step.
1214 Ideally we should stop at any ssa var, because that's expensive
1215 and unusual to happen, we just do it on the first one.
1217 See PR64705 for the rationale. */
1218 stop = extract_single_var_from_expr (iv->step);
1219 iv->base = expand_simple_operations (iv->base, stop);
1220 if (contains_abnormal_ssa_name_p (iv->base)
1221 || contains_abnormal_ssa_name_p (iv->step))
1222 return false;
1224 /* If STMT could throw, then do not consider STMT as defining a GIV.
1225 While this will suppress optimizations, we can not safely delete this
1226 GIV and associated statements, even if it appears it is not used. */
1227 if (stmt_could_throw_p (stmt))
1228 return false;
1230 return true;
1233 /* Finds general ivs in statement STMT. */
1235 static void
1236 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1238 affine_iv iv;
1240 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1241 return;
1243 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1246 /* Finds general ivs in basic block BB. */
1248 static void
1249 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1251 gimple_stmt_iterator bsi;
1253 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1254 find_givs_in_stmt (data, gsi_stmt (bsi));
1257 /* Finds general ivs. */
1259 static void
1260 find_givs (struct ivopts_data *data)
1262 struct loop *loop = data->current_loop;
1263 basic_block *body = get_loop_body_in_dom_order (loop);
1264 unsigned i;
1266 for (i = 0; i < loop->num_nodes; i++)
1267 find_givs_in_bb (data, body[i]);
1268 free (body);
1271 /* For each ssa name defined in LOOP determines whether it is an induction
1272 variable and if so, its initial value and step. */
1274 static bool
1275 find_induction_variables (struct ivopts_data *data)
1277 unsigned i;
1278 bitmap_iterator bi;
1280 if (!find_bivs (data))
1281 return false;
1283 find_givs (data);
1284 mark_bivs (data);
1286 if (dump_file && (dump_flags & TDF_DETAILS))
1288 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1290 if (niter)
1292 fprintf (dump_file, " number of iterations ");
1293 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1294 if (!integer_zerop (niter->may_be_zero))
1296 fprintf (dump_file, "; zero if ");
1297 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1299 fprintf (dump_file, "\n\n");
1302 fprintf (dump_file, "Induction variables:\n\n");
1304 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1306 if (ver_info (data, i)->iv)
1307 dump_iv (dump_file, ver_info (data, i)->iv, true);
1311 return true;
1314 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1315 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1316 is the const offset stripped from IV base. For uses of other types,
1317 ADDR_BASE and ADDR_OFFSET are zero by default. */
1319 static struct iv_use *
1320 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1321 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1322 unsigned HOST_WIDE_INT addr_offset = 0)
1324 struct iv_use *use = XCNEW (struct iv_use);
1326 use->id = n_iv_uses (data);
1327 use->sub_id = 0;
1328 use->type = use_type;
1329 use->iv = iv;
1330 use->stmt = stmt;
1331 use->op_p = use_p;
1332 use->related_cands = BITMAP_ALLOC (NULL);
1333 use->next = NULL;
1334 use->addr_base = addr_base;
1335 use->addr_offset = addr_offset;
1337 data->iv_uses.safe_push (use);
1339 return use;
1342 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1343 The sub use is recorded under the one whose use id is ID_GROUP. */
1345 static struct iv_use *
1346 record_sub_use (struct ivopts_data *data, tree *use_p,
1347 struct iv *iv, gimple *stmt, enum use_type use_type,
1348 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1349 unsigned int id_group)
1351 struct iv_use *use = XCNEW (struct iv_use);
1352 struct iv_use *group = iv_use (data, id_group);
1354 use->id = group->id;
1355 use->sub_id = 0;
1356 use->type = use_type;
1357 use->iv = iv;
1358 use->stmt = stmt;
1359 use->op_p = use_p;
1360 use->related_cands = NULL;
1361 use->addr_base = addr_base;
1362 use->addr_offset = addr_offset;
1364 /* Sub use list is maintained in offset ascending order. */
1365 if (addr_offset <= group->addr_offset)
1367 use->related_cands = group->related_cands;
1368 group->related_cands = NULL;
1369 use->next = group;
1370 data->iv_uses[id_group] = use;
1372 else
1374 struct iv_use *pre;
1377 pre = group;
1378 group = group->next;
1380 while (group && addr_offset > group->addr_offset);
1381 use->next = pre->next;
1382 pre->next = use;
1385 return use;
1388 /* Checks whether OP is a loop-level invariant and if so, records it.
1389 NONLINEAR_USE is true if the invariant is used in a way we do not
1390 handle specially. */
1392 static void
1393 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1395 basic_block bb;
1396 struct version_info *info;
1398 if (TREE_CODE (op) != SSA_NAME
1399 || virtual_operand_p (op))
1400 return;
1402 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1403 if (bb
1404 && flow_bb_inside_loop_p (data->current_loop, bb))
1405 return;
1407 info = name_info (data, op);
1408 info->name = op;
1409 info->has_nonlin_use |= nonlinear_use;
1410 if (!info->inv_id)
1411 info->inv_id = ++data->max_inv_id;
1412 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1415 /* Checks whether the use OP is interesting and if so, records it. */
1417 static struct iv_use *
1418 find_interesting_uses_op (struct ivopts_data *data, tree op)
1420 struct iv *iv;
1421 gimple *stmt;
1422 struct iv_use *use;
1424 if (TREE_CODE (op) != SSA_NAME)
1425 return NULL;
1427 iv = get_iv (data, op);
1428 if (!iv)
1429 return NULL;
1431 if (iv->have_use_for)
1433 use = iv_use (data, iv->use_id);
1435 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1436 return use;
1439 if (integer_zerop (iv->step))
1441 record_invariant (data, op, true);
1442 return NULL;
1444 iv->have_use_for = true;
1446 stmt = SSA_NAME_DEF_STMT (op);
1447 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1448 || is_gimple_assign (stmt));
1450 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1451 iv->use_id = use->id;
1453 return use;
1456 /* Given a condition in statement STMT, checks whether it is a compare
1457 of an induction variable and an invariant. If this is the case,
1458 CONTROL_VAR is set to location of the iv, BOUND to the location of
1459 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1460 induction variable descriptions, and true is returned. If this is not
1461 the case, CONTROL_VAR and BOUND are set to the arguments of the
1462 condition and false is returned. */
1464 static bool
1465 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1466 tree **control_var, tree **bound,
1467 struct iv **iv_var, struct iv **iv_bound)
1469 /* The objects returned when COND has constant operands. */
1470 static struct iv const_iv;
1471 static tree zero;
1472 tree *op0 = &zero, *op1 = &zero;
1473 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1474 bool ret = false;
1476 if (gimple_code (stmt) == GIMPLE_COND)
1478 gcond *cond_stmt = as_a <gcond *> (stmt);
1479 op0 = gimple_cond_lhs_ptr (cond_stmt);
1480 op1 = gimple_cond_rhs_ptr (cond_stmt);
1482 else
1484 op0 = gimple_assign_rhs1_ptr (stmt);
1485 op1 = gimple_assign_rhs2_ptr (stmt);
1488 zero = integer_zero_node;
1489 const_iv.step = integer_zero_node;
1491 if (TREE_CODE (*op0) == SSA_NAME)
1492 iv0 = get_iv (data, *op0);
1493 if (TREE_CODE (*op1) == SSA_NAME)
1494 iv1 = get_iv (data, *op1);
1496 /* Exactly one of the compared values must be an iv, and the other one must
1497 be an invariant. */
1498 if (!iv0 || !iv1)
1499 goto end;
1501 if (integer_zerop (iv0->step))
1503 /* Control variable may be on the other side. */
1504 std::swap (op0, op1);
1505 std::swap (iv0, iv1);
1507 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1509 end:
1510 if (control_var)
1511 *control_var = op0;
1512 if (iv_var)
1513 *iv_var = iv0;
1514 if (bound)
1515 *bound = op1;
1516 if (iv_bound)
1517 *iv_bound = iv1;
1519 return ret;
1522 /* Checks whether the condition in STMT is interesting and if so,
1523 records it. */
1525 static void
1526 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1528 tree *var_p, *bound_p;
1529 struct iv *var_iv;
1531 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1533 find_interesting_uses_op (data, *var_p);
1534 find_interesting_uses_op (data, *bound_p);
1535 return;
1538 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1541 /* Returns the outermost loop EXPR is obviously invariant in
1542 relative to the loop LOOP, i.e. if all its operands are defined
1543 outside of the returned loop. Returns NULL if EXPR is not
1544 even obviously invariant in LOOP. */
1546 struct loop *
1547 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1549 basic_block def_bb;
1550 unsigned i, len;
1552 if (is_gimple_min_invariant (expr))
1553 return current_loops->tree_root;
1555 if (TREE_CODE (expr) == SSA_NAME)
1557 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1558 if (def_bb)
1560 if (flow_bb_inside_loop_p (loop, def_bb))
1561 return NULL;
1562 return superloop_at_depth (loop,
1563 loop_depth (def_bb->loop_father) + 1);
1566 return current_loops->tree_root;
1569 if (!EXPR_P (expr))
1570 return NULL;
1572 unsigned maxdepth = 0;
1573 len = TREE_OPERAND_LENGTH (expr);
1574 for (i = 0; i < len; i++)
1576 struct loop *ivloop;
1577 if (!TREE_OPERAND (expr, i))
1578 continue;
1580 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1581 if (!ivloop)
1582 return NULL;
1583 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1586 return superloop_at_depth (loop, maxdepth);
1589 /* Returns true if expression EXPR is obviously invariant in LOOP,
1590 i.e. if all its operands are defined outside of the LOOP. LOOP
1591 should not be the function body. */
1593 bool
1594 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1596 basic_block def_bb;
1597 unsigned i, len;
1599 gcc_assert (loop_depth (loop) > 0);
1601 if (is_gimple_min_invariant (expr))
1602 return true;
1604 if (TREE_CODE (expr) == SSA_NAME)
1606 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1607 if (def_bb
1608 && flow_bb_inside_loop_p (loop, def_bb))
1609 return false;
1611 return true;
1614 if (!EXPR_P (expr))
1615 return false;
1617 len = TREE_OPERAND_LENGTH (expr);
1618 for (i = 0; i < len; i++)
1619 if (TREE_OPERAND (expr, i)
1620 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1621 return false;
1623 return true;
1626 /* Given expression EXPR which computes inductive values with respect
1627 to loop recorded in DATA, this function returns biv from which EXPR
1628 is derived by tracing definition chains of ssa variables in EXPR. */
1630 static struct iv*
1631 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1633 struct iv *iv;
1634 unsigned i, n;
1635 tree e2, e1;
1636 enum tree_code code;
1637 gimple *stmt;
1639 if (expr == NULL_TREE)
1640 return NULL;
1642 if (is_gimple_min_invariant (expr))
1643 return NULL;
1645 code = TREE_CODE (expr);
1646 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1648 n = TREE_OPERAND_LENGTH (expr);
1649 for (i = 0; i < n; i++)
1651 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1652 if (iv)
1653 return iv;
1657 /* Stop if it's not ssa name. */
1658 if (code != SSA_NAME)
1659 return NULL;
1661 iv = get_iv (data, expr);
1662 if (!iv || integer_zerop (iv->step))
1663 return NULL;
1664 else if (iv->biv_p)
1665 return iv;
1667 stmt = SSA_NAME_DEF_STMT (expr);
1668 if (gphi *phi = dyn_cast <gphi *> (stmt))
1670 ssa_op_iter iter;
1671 use_operand_p use_p;
1673 if (virtual_operand_p (gimple_phi_result (phi)))
1674 return NULL;
1676 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1678 tree use = USE_FROM_PTR (use_p);
1679 iv = find_deriving_biv_for_expr (data, use);
1680 if (iv)
1681 return iv;
1683 return NULL;
1685 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1686 return NULL;
1688 e1 = gimple_assign_rhs1 (stmt);
1689 code = gimple_assign_rhs_code (stmt);
1690 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1691 return find_deriving_biv_for_expr (data, e1);
1693 switch (code)
1695 case MULT_EXPR:
1696 case PLUS_EXPR:
1697 case MINUS_EXPR:
1698 case POINTER_PLUS_EXPR:
1699 /* Increments, decrements and multiplications by a constant
1700 are simple. */
1701 e2 = gimple_assign_rhs2 (stmt);
1702 iv = find_deriving_biv_for_expr (data, e2);
1703 if (iv)
1704 return iv;
1706 /* Fallthru. */
1707 CASE_CONVERT:
1708 /* Casts are simple. */
1709 return find_deriving_biv_for_expr (data, e1);
1711 default:
1712 break;
1715 return NULL;
1718 /* Record BIV, its predecessor and successor that they are used in
1719 address type uses. */
1721 static void
1722 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1724 unsigned i;
1725 tree type, base_1, base_2;
1726 bitmap_iterator bi;
1728 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1729 || biv->have_address_use || !biv->no_overflow)
1730 return;
1732 type = TREE_TYPE (biv->base);
1733 if (!INTEGRAL_TYPE_P (type))
1734 return;
1736 biv->have_address_use = true;
1737 data->bivs_not_used_in_addr--;
1738 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1739 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1741 struct iv *iv = ver_info (data, i)->iv;
1743 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1744 || iv->have_address_use || !iv->no_overflow)
1745 continue;
1747 if (type != TREE_TYPE (iv->base)
1748 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1749 continue;
1751 if (!operand_equal_p (biv->step, iv->step, 0))
1752 continue;
1754 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1755 if (operand_equal_p (base_1, iv->base, 0)
1756 || operand_equal_p (base_2, biv->base, 0))
1758 iv->have_address_use = true;
1759 data->bivs_not_used_in_addr--;
1764 /* Cumulates the steps of indices into DATA and replaces their values with the
1765 initial ones. Returns false when the value of the index cannot be determined.
1766 Callback for for_each_index. */
1768 struct ifs_ivopts_data
1770 struct ivopts_data *ivopts_data;
1771 gimple *stmt;
1772 tree step;
1775 static bool
1776 idx_find_step (tree base, tree *idx, void *data)
1778 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1779 struct iv *iv;
1780 bool use_overflow_semantics = false;
1781 tree step, iv_base, iv_step, lbound, off;
1782 struct loop *loop = dta->ivopts_data->current_loop;
1784 /* If base is a component ref, require that the offset of the reference
1785 be invariant. */
1786 if (TREE_CODE (base) == COMPONENT_REF)
1788 off = component_ref_field_offset (base);
1789 return expr_invariant_in_loop_p (loop, off);
1792 /* If base is array, first check whether we will be able to move the
1793 reference out of the loop (in order to take its address in strength
1794 reduction). In order for this to work we need both lower bound
1795 and step to be loop invariants. */
1796 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1798 /* Moreover, for a range, the size needs to be invariant as well. */
1799 if (TREE_CODE (base) == ARRAY_RANGE_REF
1800 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1801 return false;
1803 step = array_ref_element_size (base);
1804 lbound = array_ref_low_bound (base);
1806 if (!expr_invariant_in_loop_p (loop, step)
1807 || !expr_invariant_in_loop_p (loop, lbound))
1808 return false;
1811 if (TREE_CODE (*idx) != SSA_NAME)
1812 return true;
1814 iv = get_iv (dta->ivopts_data, *idx);
1815 if (!iv)
1816 return false;
1818 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1819 *&x[0], which is not folded and does not trigger the
1820 ARRAY_REF path below. */
1821 *idx = iv->base;
1823 if (integer_zerop (iv->step))
1824 return true;
1826 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1828 step = array_ref_element_size (base);
1830 /* We only handle addresses whose step is an integer constant. */
1831 if (TREE_CODE (step) != INTEGER_CST)
1832 return false;
1834 else
1835 /* The step for pointer arithmetics already is 1 byte. */
1836 step = size_one_node;
1838 iv_base = iv->base;
1839 iv_step = iv->step;
1840 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1841 use_overflow_semantics = true;
1843 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1844 sizetype, &iv_base, &iv_step, dta->stmt,
1845 use_overflow_semantics))
1847 /* The index might wrap. */
1848 return false;
1851 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1852 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1854 if (dta->ivopts_data->bivs_not_used_in_addr)
1856 if (!iv->biv_p)
1857 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1859 record_biv_for_address_use (dta->ivopts_data, iv);
1861 return true;
1864 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1865 object is passed to it in DATA. */
1867 static bool
1868 idx_record_use (tree base, tree *idx,
1869 void *vdata)
1871 struct ivopts_data *data = (struct ivopts_data *) vdata;
1872 find_interesting_uses_op (data, *idx);
1873 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1875 find_interesting_uses_op (data, array_ref_element_size (base));
1876 find_interesting_uses_op (data, array_ref_low_bound (base));
1878 return true;
1881 /* If we can prove that TOP = cst * BOT for some constant cst,
1882 store cst to MUL and return true. Otherwise return false.
1883 The returned value is always sign-extended, regardless of the
1884 signedness of TOP and BOT. */
1886 static bool
1887 constant_multiple_of (tree top, tree bot, widest_int *mul)
1889 tree mby;
1890 enum tree_code code;
1891 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1892 widest_int res, p0, p1;
1894 STRIP_NOPS (top);
1895 STRIP_NOPS (bot);
1897 if (operand_equal_p (top, bot, 0))
1899 *mul = 1;
1900 return true;
1903 code = TREE_CODE (top);
1904 switch (code)
1906 case MULT_EXPR:
1907 mby = TREE_OPERAND (top, 1);
1908 if (TREE_CODE (mby) != INTEGER_CST)
1909 return false;
1911 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1912 return false;
1914 *mul = wi::sext (res * wi::to_widest (mby), precision);
1915 return true;
1917 case PLUS_EXPR:
1918 case MINUS_EXPR:
1919 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1920 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1921 return false;
1923 if (code == MINUS_EXPR)
1924 p1 = -p1;
1925 *mul = wi::sext (p0 + p1, precision);
1926 return true;
1928 case INTEGER_CST:
1929 if (TREE_CODE (bot) != INTEGER_CST)
1930 return false;
1932 p0 = widest_int::from (top, SIGNED);
1933 p1 = widest_int::from (bot, SIGNED);
1934 if (p1 == 0)
1935 return false;
1936 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1937 return res == 0;
1939 default:
1940 return false;
1944 /* Return true if memory reference REF with step STEP may be unaligned. */
1946 static bool
1947 may_be_unaligned_p (tree ref, tree step)
1949 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1950 thus they are not misaligned. */
1951 if (TREE_CODE (ref) == TARGET_MEM_REF)
1952 return false;
1954 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1955 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1956 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1958 unsigned HOST_WIDE_INT bitpos;
1959 unsigned int ref_align;
1960 get_object_alignment_1 (ref, &ref_align, &bitpos);
1961 if (ref_align < align
1962 || (bitpos % align) != 0
1963 || (bitpos % BITS_PER_UNIT) != 0)
1964 return true;
1966 unsigned int trailing_zeros = tree_ctz (step);
1967 if (trailing_zeros < HOST_BITS_PER_INT
1968 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1969 return true;
1971 return false;
1974 /* Return true if EXPR may be non-addressable. */
1976 bool
1977 may_be_nonaddressable_p (tree expr)
1979 switch (TREE_CODE (expr))
1981 case TARGET_MEM_REF:
1982 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1983 target, thus they are always addressable. */
1984 return false;
1986 case MEM_REF:
1987 /* Likewise for MEM_REFs, modulo the storage order. */
1988 return REF_REVERSE_STORAGE_ORDER (expr);
1990 case BIT_FIELD_REF:
1991 if (REF_REVERSE_STORAGE_ORDER (expr))
1992 return true;
1993 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1995 case COMPONENT_REF:
1996 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1997 return true;
1998 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1999 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2001 case ARRAY_REF:
2002 case ARRAY_RANGE_REF:
2003 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2004 return true;
2005 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2007 case VIEW_CONVERT_EXPR:
2008 /* This kind of view-conversions may wrap non-addressable objects
2009 and make them look addressable. After some processing the
2010 non-addressability may be uncovered again, causing ADDR_EXPRs
2011 of inappropriate objects to be built. */
2012 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2013 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2014 return true;
2015 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2017 CASE_CONVERT:
2018 return true;
2020 default:
2021 break;
2024 return false;
2027 static tree
2028 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2030 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2031 If there is an existing use which has same stripped iv base and step,
2032 this function records this one as a sub use to that; otherwise records
2033 it as a normal one. */
2035 static struct iv_use *
2036 record_group_use (struct ivopts_data *data, tree *use_p,
2037 struct iv *iv, gimple *stmt, enum use_type use_type)
2039 unsigned int i;
2040 struct iv_use *use;
2041 tree addr_base;
2042 unsigned HOST_WIDE_INT addr_offset;
2044 /* Only support sub use for address type uses, that is, with base
2045 object. */
2046 if (!iv->base_object)
2047 return record_use (data, use_p, iv, stmt, use_type);
2049 addr_base = strip_offset (iv->base, &addr_offset);
2050 for (i = 0; i < n_iv_uses (data); i++)
2052 use = iv_use (data, i);
2053 if (use->type != USE_ADDRESS || !use->iv->base_object)
2054 continue;
2056 /* Check if it has the same stripped base and step. */
2057 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2058 && operand_equal_p (iv->step, use->iv->step, 0)
2059 && operand_equal_p (addr_base, use->addr_base, 0))
2060 break;
2063 if (i == n_iv_uses (data))
2064 return record_use (data, use_p, iv, stmt,
2065 use_type, addr_base, addr_offset);
2066 else
2067 return record_sub_use (data, use_p, iv, stmt,
2068 use_type, addr_base, addr_offset, i);
2071 /* Finds addresses in *OP_P inside STMT. */
2073 static void
2074 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2075 tree *op_p)
2077 tree base = *op_p, step = size_zero_node;
2078 struct iv *civ;
2079 struct ifs_ivopts_data ifs_ivopts_data;
2081 /* Do not play with volatile memory references. A bit too conservative,
2082 perhaps, but safe. */
2083 if (gimple_has_volatile_ops (stmt))
2084 goto fail;
2086 /* Ignore bitfields for now. Not really something terribly complicated
2087 to handle. TODO. */
2088 if (TREE_CODE (base) == BIT_FIELD_REF)
2089 goto fail;
2091 base = unshare_expr (base);
2093 if (TREE_CODE (base) == TARGET_MEM_REF)
2095 tree type = build_pointer_type (TREE_TYPE (base));
2096 tree astep;
2098 if (TMR_BASE (base)
2099 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2101 civ = get_iv (data, TMR_BASE (base));
2102 if (!civ)
2103 goto fail;
2105 TMR_BASE (base) = civ->base;
2106 step = civ->step;
2108 if (TMR_INDEX2 (base)
2109 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2111 civ = get_iv (data, TMR_INDEX2 (base));
2112 if (!civ)
2113 goto fail;
2115 TMR_INDEX2 (base) = civ->base;
2116 step = civ->step;
2118 if (TMR_INDEX (base)
2119 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2121 civ = get_iv (data, TMR_INDEX (base));
2122 if (!civ)
2123 goto fail;
2125 TMR_INDEX (base) = civ->base;
2126 astep = civ->step;
2128 if (astep)
2130 if (TMR_STEP (base))
2131 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2133 step = fold_build2 (PLUS_EXPR, type, step, astep);
2137 if (integer_zerop (step))
2138 goto fail;
2139 base = tree_mem_ref_addr (type, base);
2141 else
2143 ifs_ivopts_data.ivopts_data = data;
2144 ifs_ivopts_data.stmt = stmt;
2145 ifs_ivopts_data.step = size_zero_node;
2146 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2147 || integer_zerop (ifs_ivopts_data.step))
2148 goto fail;
2149 step = ifs_ivopts_data.step;
2151 /* Check that the base expression is addressable. This needs
2152 to be done after substituting bases of IVs into it. */
2153 if (may_be_nonaddressable_p (base))
2154 goto fail;
2156 /* Moreover, on strict alignment platforms, check that it is
2157 sufficiently aligned. */
2158 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2159 goto fail;
2161 base = build_fold_addr_expr (base);
2163 /* Substituting bases of IVs into the base expression might
2164 have caused folding opportunities. */
2165 if (TREE_CODE (base) == ADDR_EXPR)
2167 tree *ref = &TREE_OPERAND (base, 0);
2168 while (handled_component_p (*ref))
2169 ref = &TREE_OPERAND (*ref, 0);
2170 if (TREE_CODE (*ref) == MEM_REF)
2172 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2173 TREE_OPERAND (*ref, 0),
2174 TREE_OPERAND (*ref, 1));
2175 if (tem)
2176 *ref = tem;
2181 civ = alloc_iv (data, base, step);
2182 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2183 return;
2185 fail:
2186 for_each_index (op_p, idx_record_use, data);
2189 /* Finds and records invariants used in STMT. */
2191 static void
2192 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2194 ssa_op_iter iter;
2195 use_operand_p use_p;
2196 tree op;
2198 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2200 op = USE_FROM_PTR (use_p);
2201 record_invariant (data, op, false);
2205 /* Finds interesting uses of induction variables in the statement STMT. */
2207 static void
2208 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2210 struct iv *iv;
2211 tree op, *lhs, *rhs;
2212 ssa_op_iter iter;
2213 use_operand_p use_p;
2214 enum tree_code code;
2216 find_invariants_stmt (data, stmt);
2218 if (gimple_code (stmt) == GIMPLE_COND)
2220 find_interesting_uses_cond (data, stmt);
2221 return;
2224 if (is_gimple_assign (stmt))
2226 lhs = gimple_assign_lhs_ptr (stmt);
2227 rhs = gimple_assign_rhs1_ptr (stmt);
2229 if (TREE_CODE (*lhs) == SSA_NAME)
2231 /* If the statement defines an induction variable, the uses are not
2232 interesting by themselves. */
2234 iv = get_iv (data, *lhs);
2236 if (iv && !integer_zerop (iv->step))
2237 return;
2240 code = gimple_assign_rhs_code (stmt);
2241 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2242 && (REFERENCE_CLASS_P (*rhs)
2243 || is_gimple_val (*rhs)))
2245 if (REFERENCE_CLASS_P (*rhs))
2246 find_interesting_uses_address (data, stmt, rhs);
2247 else
2248 find_interesting_uses_op (data, *rhs);
2250 if (REFERENCE_CLASS_P (*lhs))
2251 find_interesting_uses_address (data, stmt, lhs);
2252 return;
2254 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2256 find_interesting_uses_cond (data, stmt);
2257 return;
2260 /* TODO -- we should also handle address uses of type
2262 memory = call (whatever);
2266 call (memory). */
2269 if (gimple_code (stmt) == GIMPLE_PHI
2270 && gimple_bb (stmt) == data->current_loop->header)
2272 iv = get_iv (data, PHI_RESULT (stmt));
2274 if (iv && !integer_zerop (iv->step))
2275 return;
2278 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2280 op = USE_FROM_PTR (use_p);
2282 if (TREE_CODE (op) != SSA_NAME)
2283 continue;
2285 iv = get_iv (data, op);
2286 if (!iv)
2287 continue;
2289 find_interesting_uses_op (data, op);
2293 /* Finds interesting uses of induction variables outside of loops
2294 on loop exit edge EXIT. */
2296 static void
2297 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2299 gphi *phi;
2300 gphi_iterator psi;
2301 tree def;
2303 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2305 phi = psi.phi ();
2306 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2307 if (!virtual_operand_p (def))
2308 find_interesting_uses_op (data, def);
2312 /* Finds uses of the induction variables that are interesting. */
2314 static void
2315 find_interesting_uses (struct ivopts_data *data)
2317 basic_block bb;
2318 gimple_stmt_iterator bsi;
2319 basic_block *body = get_loop_body (data->current_loop);
2320 unsigned i;
2321 struct version_info *info;
2322 edge e;
2324 if (dump_file && (dump_flags & TDF_DETAILS))
2325 fprintf (dump_file, "Uses:\n\n");
2327 for (i = 0; i < data->current_loop->num_nodes; i++)
2329 edge_iterator ei;
2330 bb = body[i];
2332 FOR_EACH_EDGE (e, ei, bb->succs)
2333 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2334 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2335 find_interesting_uses_outside (data, e);
2337 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2339 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2340 if (!is_gimple_debug (gsi_stmt (bsi)))
2341 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2344 if (dump_file && (dump_flags & TDF_DETAILS))
2346 bitmap_iterator bi;
2348 fprintf (dump_file, "\n");
2350 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2352 info = ver_info (data, i);
2353 if (info->inv_id)
2355 fprintf (dump_file, " ");
2356 print_generic_expr (dump_file, info->name, TDF_SLIM);
2357 fprintf (dump_file, " is invariant (%d)%s\n",
2358 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2362 fprintf (dump_file, "\n");
2365 free (body);
2368 /* Compute maximum offset of [base + offset] addressing mode
2369 for memory reference represented by USE. */
2371 static HOST_WIDE_INT
2372 compute_max_addr_offset (struct iv_use *use)
2374 int width;
2375 rtx reg, addr;
2376 HOST_WIDE_INT i, off;
2377 unsigned list_index, num;
2378 addr_space_t as;
2379 machine_mode mem_mode, addr_mode;
2380 static vec<HOST_WIDE_INT> max_offset_list;
2382 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2383 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2385 num = max_offset_list.length ();
2386 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2387 if (list_index >= num)
2389 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2390 for (; num < max_offset_list.length (); num++)
2391 max_offset_list[num] = -1;
2394 off = max_offset_list[list_index];
2395 if (off != -1)
2396 return off;
2398 addr_mode = targetm.addr_space.address_mode (as);
2399 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2400 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2402 width = GET_MODE_BITSIZE (addr_mode) - 1;
2403 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2404 width = HOST_BITS_PER_WIDE_INT - 1;
2406 for (i = width; i > 0; i--)
2408 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2409 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2410 if (memory_address_addr_space_p (mem_mode, addr, as))
2411 break;
2413 /* For some strict-alignment targets, the offset must be naturally
2414 aligned. Try an aligned offset if mem_mode is not QImode. */
2415 off = ((unsigned HOST_WIDE_INT) 1 << i);
2416 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2418 off -= GET_MODE_SIZE (mem_mode);
2419 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2420 if (memory_address_addr_space_p (mem_mode, addr, as))
2421 break;
2424 if (i == 0)
2425 off = 0;
2427 max_offset_list[list_index] = off;
2428 return off;
2431 /* Check if all small groups should be split. Return true if and
2432 only if:
2434 1) At least one groups contain two uses with different offsets.
2435 2) No group contains more than two uses with different offsets.
2437 Return false otherwise. We want to split such groups because:
2439 1) Small groups don't have much benefit and may interfer with
2440 general candidate selection.
2441 2) Size for problem with only small groups is usually small and
2442 general algorithm can handle it well.
2444 TODO -- Above claim may not hold when auto increment is supported. */
2446 static bool
2447 split_all_small_groups (struct ivopts_data *data)
2449 bool split_p = false;
2450 unsigned int i, n, distinct;
2451 struct iv_use *pre, *use;
2453 n = n_iv_uses (data);
2454 for (i = 0; i < n; i++)
2456 use = iv_use (data, i);
2457 if (!use->next)
2458 continue;
2460 distinct = 1;
2461 gcc_assert (use->type == USE_ADDRESS);
2462 for (pre = use, use = use->next; use; pre = use, use = use->next)
2464 if (pre->addr_offset != use->addr_offset)
2465 distinct++;
2467 if (distinct > 2)
2468 return false;
2470 if (distinct == 2)
2471 split_p = true;
2474 return split_p;
2477 /* For each group of address type uses, this function further groups
2478 these uses according to the maximum offset supported by target's
2479 [base + offset] addressing mode. */
2481 static void
2482 group_address_uses (struct ivopts_data *data)
2484 HOST_WIDE_INT max_offset = -1;
2485 unsigned int i, n, sub_id;
2486 struct iv_use *pre, *use;
2487 unsigned HOST_WIDE_INT addr_offset_first;
2489 /* Reset max offset to split all small groups. */
2490 if (split_all_small_groups (data))
2491 max_offset = 0;
2493 n = n_iv_uses (data);
2494 for (i = 0; i < n; i++)
2496 use = iv_use (data, i);
2497 if (!use->next)
2498 continue;
2500 gcc_assert (use->type == USE_ADDRESS);
2501 if (max_offset != 0)
2502 max_offset = compute_max_addr_offset (use);
2504 while (use)
2506 sub_id = 0;
2507 addr_offset_first = use->addr_offset;
2508 /* Only uses with offset that can fit in offset part against
2509 the first use can be grouped together. */
2510 for (pre = use, use = use->next;
2511 use && (use->addr_offset - addr_offset_first
2512 <= (unsigned HOST_WIDE_INT) max_offset);
2513 pre = use, use = use->next)
2515 use->id = pre->id;
2516 use->sub_id = ++sub_id;
2519 /* Break the list and create new group. */
2520 if (use)
2522 pre->next = NULL;
2523 use->id = n_iv_uses (data);
2524 use->related_cands = BITMAP_ALLOC (NULL);
2525 data->iv_uses.safe_push (use);
2530 if (dump_file && (dump_flags & TDF_DETAILS))
2531 dump_uses (dump_file, data);
2534 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2535 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2536 we are at the top-level of the processed address. */
2538 static tree
2539 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2540 HOST_WIDE_INT *offset)
2542 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2543 enum tree_code code;
2544 tree type, orig_type = TREE_TYPE (expr);
2545 HOST_WIDE_INT off0, off1, st;
2546 tree orig_expr = expr;
2548 STRIP_NOPS (expr);
2550 type = TREE_TYPE (expr);
2551 code = TREE_CODE (expr);
2552 *offset = 0;
2554 switch (code)
2556 case INTEGER_CST:
2557 if (!cst_and_fits_in_hwi (expr)
2558 || integer_zerop (expr))
2559 return orig_expr;
2561 *offset = int_cst_value (expr);
2562 return build_int_cst (orig_type, 0);
2564 case POINTER_PLUS_EXPR:
2565 case PLUS_EXPR:
2566 case MINUS_EXPR:
2567 op0 = TREE_OPERAND (expr, 0);
2568 op1 = TREE_OPERAND (expr, 1);
2570 op0 = strip_offset_1 (op0, false, false, &off0);
2571 op1 = strip_offset_1 (op1, false, false, &off1);
2573 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2574 if (op0 == TREE_OPERAND (expr, 0)
2575 && op1 == TREE_OPERAND (expr, 1))
2576 return orig_expr;
2578 if (integer_zerop (op1))
2579 expr = op0;
2580 else if (integer_zerop (op0))
2582 if (code == MINUS_EXPR)
2583 expr = fold_build1 (NEGATE_EXPR, type, op1);
2584 else
2585 expr = op1;
2587 else
2588 expr = fold_build2 (code, type, op0, op1);
2590 return fold_convert (orig_type, expr);
2592 case MULT_EXPR:
2593 op1 = TREE_OPERAND (expr, 1);
2594 if (!cst_and_fits_in_hwi (op1))
2595 return orig_expr;
2597 op0 = TREE_OPERAND (expr, 0);
2598 op0 = strip_offset_1 (op0, false, false, &off0);
2599 if (op0 == TREE_OPERAND (expr, 0))
2600 return orig_expr;
2602 *offset = off0 * int_cst_value (op1);
2603 if (integer_zerop (op0))
2604 expr = op0;
2605 else
2606 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2608 return fold_convert (orig_type, expr);
2610 case ARRAY_REF:
2611 case ARRAY_RANGE_REF:
2612 if (!inside_addr)
2613 return orig_expr;
2615 step = array_ref_element_size (expr);
2616 if (!cst_and_fits_in_hwi (step))
2617 break;
2619 st = int_cst_value (step);
2620 op1 = TREE_OPERAND (expr, 1);
2621 op1 = strip_offset_1 (op1, false, false, &off1);
2622 *offset = off1 * st;
2624 if (top_compref
2625 && integer_zerop (op1))
2627 /* Strip the component reference completely. */
2628 op0 = TREE_OPERAND (expr, 0);
2629 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2630 *offset += off0;
2631 return op0;
2633 break;
2635 case COMPONENT_REF:
2637 tree field;
2639 if (!inside_addr)
2640 return orig_expr;
2642 tmp = component_ref_field_offset (expr);
2643 field = TREE_OPERAND (expr, 1);
2644 if (top_compref
2645 && cst_and_fits_in_hwi (tmp)
2646 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2648 HOST_WIDE_INT boffset, abs_off;
2650 /* Strip the component reference completely. */
2651 op0 = TREE_OPERAND (expr, 0);
2652 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2653 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2654 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2655 if (boffset < 0)
2656 abs_off = -abs_off;
2658 *offset = off0 + int_cst_value (tmp) + abs_off;
2659 return op0;
2662 break;
2664 case ADDR_EXPR:
2665 op0 = TREE_OPERAND (expr, 0);
2666 op0 = strip_offset_1 (op0, true, true, &off0);
2667 *offset += off0;
2669 if (op0 == TREE_OPERAND (expr, 0))
2670 return orig_expr;
2672 expr = build_fold_addr_expr (op0);
2673 return fold_convert (orig_type, expr);
2675 case MEM_REF:
2676 /* ??? Offset operand? */
2677 inside_addr = false;
2678 break;
2680 default:
2681 return orig_expr;
2684 /* Default handling of expressions for that we want to recurse into
2685 the first operand. */
2686 op0 = TREE_OPERAND (expr, 0);
2687 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2688 *offset += off0;
2690 if (op0 == TREE_OPERAND (expr, 0)
2691 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2692 return orig_expr;
2694 expr = copy_node (expr);
2695 TREE_OPERAND (expr, 0) = op0;
2696 if (op1)
2697 TREE_OPERAND (expr, 1) = op1;
2699 /* Inside address, we might strip the top level component references,
2700 thus changing type of the expression. Handling of ADDR_EXPR
2701 will fix that. */
2702 expr = fold_convert (orig_type, expr);
2704 return expr;
2707 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2709 static tree
2710 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2712 HOST_WIDE_INT off;
2713 tree core = strip_offset_1 (expr, false, false, &off);
2714 *offset = off;
2715 return core;
2718 /* Returns variant of TYPE that can be used as base for different uses.
2719 We return unsigned type with the same precision, which avoids problems
2720 with overflows. */
2722 static tree
2723 generic_type_for (tree type)
2725 if (POINTER_TYPE_P (type))
2726 return unsigned_type_for (type);
2728 if (TYPE_UNSIGNED (type))
2729 return type;
2731 return unsigned_type_for (type);
2734 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2735 the bitmap to that we should store it. */
2737 static struct ivopts_data *fd_ivopts_data;
2738 static tree
2739 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2741 bitmap *depends_on = (bitmap *) data;
2742 struct version_info *info;
2744 if (TREE_CODE (*expr_p) != SSA_NAME)
2745 return NULL_TREE;
2746 info = name_info (fd_ivopts_data, *expr_p);
2748 if (!info->inv_id || info->has_nonlin_use)
2749 return NULL_TREE;
2751 if (!*depends_on)
2752 *depends_on = BITMAP_ALLOC (NULL);
2753 bitmap_set_bit (*depends_on, info->inv_id);
2755 return NULL_TREE;
2758 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2759 position to POS. If USE is not NULL, the candidate is set as related to
2760 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2761 replacement of the final value of the iv by a direct computation. */
2763 static struct iv_cand *
2764 add_candidate_1 (struct ivopts_data *data,
2765 tree base, tree step, bool important, enum iv_position pos,
2766 struct iv_use *use, gimple *incremented_at,
2767 struct iv *orig_iv = NULL)
2769 unsigned i;
2770 struct iv_cand *cand = NULL;
2771 tree type, orig_type;
2773 /* For non-original variables, make sure their values are computed in a type
2774 that does not invoke undefined behavior on overflows (since in general,
2775 we cannot prove that these induction variables are non-wrapping). */
2776 if (pos != IP_ORIGINAL)
2778 orig_type = TREE_TYPE (base);
2779 type = generic_type_for (orig_type);
2780 if (type != orig_type)
2782 base = fold_convert (type, base);
2783 step = fold_convert (type, step);
2787 for (i = 0; i < n_iv_cands (data); i++)
2789 cand = iv_cand (data, i);
2791 if (cand->pos != pos)
2792 continue;
2794 if (cand->incremented_at != incremented_at
2795 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2796 && cand->ainc_use != use))
2797 continue;
2799 if (!cand->iv)
2801 if (!base && !step)
2802 break;
2804 continue;
2807 if (!base && !step)
2808 continue;
2810 if (operand_equal_p (base, cand->iv->base, 0)
2811 && operand_equal_p (step, cand->iv->step, 0)
2812 && (TYPE_PRECISION (TREE_TYPE (base))
2813 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2814 break;
2817 if (i == n_iv_cands (data))
2819 cand = XCNEW (struct iv_cand);
2820 cand->id = i;
2822 if (!base && !step)
2823 cand->iv = NULL;
2824 else
2825 cand->iv = alloc_iv (data, base, step);
2827 cand->pos = pos;
2828 if (pos != IP_ORIGINAL && cand->iv)
2830 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2831 cand->var_after = cand->var_before;
2833 cand->important = important;
2834 cand->incremented_at = incremented_at;
2835 data->iv_candidates.safe_push (cand);
2837 if (step
2838 && TREE_CODE (step) != INTEGER_CST)
2840 fd_ivopts_data = data;
2841 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2844 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2845 cand->ainc_use = use;
2846 else
2847 cand->ainc_use = NULL;
2849 cand->orig_iv = orig_iv;
2850 if (dump_file && (dump_flags & TDF_DETAILS))
2851 dump_cand (dump_file, cand);
2854 if (important && !cand->important)
2856 cand->important = true;
2857 if (dump_file && (dump_flags & TDF_DETAILS))
2858 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2861 if (use)
2863 bitmap_set_bit (use->related_cands, i);
2864 if (dump_file && (dump_flags & TDF_DETAILS))
2865 fprintf (dump_file, "Candidate %d is related to use %d\n",
2866 cand->id, use->id);
2869 return cand;
2872 /* Returns true if incrementing the induction variable at the end of the LOOP
2873 is allowed.
2875 The purpose is to avoid splitting latch edge with a biv increment, thus
2876 creating a jump, possibly confusing other optimization passes and leaving
2877 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2878 is not available (so we do not have a better alternative), or if the latch
2879 edge is already nonempty. */
2881 static bool
2882 allow_ip_end_pos_p (struct loop *loop)
2884 if (!ip_normal_pos (loop))
2885 return true;
2887 if (!empty_block_p (ip_end_pos (loop)))
2888 return true;
2890 return false;
2893 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2894 Important field is set to IMPORTANT. */
2896 static void
2897 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2898 bool important, struct iv_use *use)
2900 basic_block use_bb = gimple_bb (use->stmt);
2901 machine_mode mem_mode;
2902 unsigned HOST_WIDE_INT cstepi;
2904 /* If we insert the increment in any position other than the standard
2905 ones, we must ensure that it is incremented once per iteration.
2906 It must not be in an inner nested loop, or one side of an if
2907 statement. */
2908 if (use_bb->loop_father != data->current_loop
2909 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2910 || stmt_could_throw_p (use->stmt)
2911 || !cst_and_fits_in_hwi (step))
2912 return;
2914 cstepi = int_cst_value (step);
2916 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2917 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2918 || USE_STORE_PRE_INCREMENT (mem_mode))
2919 && GET_MODE_SIZE (mem_mode) == cstepi)
2920 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2921 || USE_STORE_PRE_DECREMENT (mem_mode))
2922 && GET_MODE_SIZE (mem_mode) == -cstepi))
2924 enum tree_code code = MINUS_EXPR;
2925 tree new_base;
2926 tree new_step = step;
2928 if (POINTER_TYPE_P (TREE_TYPE (base)))
2930 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2931 code = POINTER_PLUS_EXPR;
2933 else
2934 new_step = fold_convert (TREE_TYPE (base), new_step);
2935 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2936 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2937 use->stmt);
2939 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2940 || USE_STORE_POST_INCREMENT (mem_mode))
2941 && GET_MODE_SIZE (mem_mode) == cstepi)
2942 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2943 || USE_STORE_POST_DECREMENT (mem_mode))
2944 && GET_MODE_SIZE (mem_mode) == -cstepi))
2946 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2947 use->stmt);
2951 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2952 position to POS. If USE is not NULL, the candidate is set as related to
2953 it. The candidate computation is scheduled before exit condition and at
2954 the end of loop. */
2956 static void
2957 add_candidate (struct ivopts_data *data,
2958 tree base, tree step, bool important, struct iv_use *use,
2959 struct iv *orig_iv = NULL)
2961 gcc_assert (use == NULL || use->sub_id == 0);
2963 if (ip_normal_pos (data->current_loop))
2964 add_candidate_1 (data, base, step, important,
2965 IP_NORMAL, use, NULL, orig_iv);
2966 if (ip_end_pos (data->current_loop)
2967 && allow_ip_end_pos_p (data->current_loop))
2968 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2971 /* Adds standard iv candidates. */
2973 static void
2974 add_standard_iv_candidates (struct ivopts_data *data)
2976 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2978 /* The same for a double-integer type if it is still fast enough. */
2979 if (TYPE_PRECISION
2980 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2981 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2982 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2983 build_int_cst (long_integer_type_node, 1), true, NULL);
2985 /* The same for a double-integer type if it is still fast enough. */
2986 if (TYPE_PRECISION
2987 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2988 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2989 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2990 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2994 /* Adds candidates bases on the old induction variable IV. */
2996 static void
2997 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2999 gimple *phi;
3000 tree def;
3001 struct iv_cand *cand;
3003 /* Check if this biv is used in address type use. */
3004 if (iv->no_overflow && iv->have_address_use
3005 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3006 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3008 tree base = fold_convert (sizetype, iv->base);
3009 tree step = fold_convert (sizetype, iv->step);
3011 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3012 add_candidate (data, base, step, true, NULL, iv);
3013 /* Add iv cand of the original type only if it has nonlinear use. */
3014 if (iv->have_use_for)
3015 add_candidate (data, iv->base, iv->step, true, NULL);
3017 else
3018 add_candidate (data, iv->base, iv->step, true, NULL);
3020 /* The same, but with initial value zero. */
3021 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3022 add_candidate (data, size_int (0), iv->step, true, NULL);
3023 else
3024 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3025 iv->step, true, NULL);
3027 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3028 if (gimple_code (phi) == GIMPLE_PHI)
3030 /* Additionally record the possibility of leaving the original iv
3031 untouched. */
3032 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3033 /* Don't add candidate if it's from another PHI node because
3034 it's an affine iv appearing in the form of PEELED_CHREC. */
3035 phi = SSA_NAME_DEF_STMT (def);
3036 if (gimple_code (phi) != GIMPLE_PHI)
3038 cand = add_candidate_1 (data,
3039 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3040 SSA_NAME_DEF_STMT (def));
3041 cand->var_before = iv->ssa_name;
3042 cand->var_after = def;
3044 else
3045 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3049 /* Adds candidates based on the old induction variables. */
3051 static void
3052 add_iv_candidate_for_bivs (struct ivopts_data *data)
3054 unsigned i;
3055 struct iv *iv;
3056 bitmap_iterator bi;
3058 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3060 iv = ver_info (data, i)->iv;
3061 if (iv && iv->biv_p && !integer_zerop (iv->step))
3062 add_iv_candidate_for_biv (data, iv);
3066 /* Adds candidates based on the value of USE's iv. */
3068 static void
3069 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3071 unsigned HOST_WIDE_INT offset;
3072 tree base;
3073 tree basetype;
3074 struct iv *iv = use->iv;
3076 add_candidate (data, iv->base, iv->step, false, use);
3078 /* The same, but with initial value zero. Make such variable important,
3079 since it is generic enough so that possibly many uses may be based
3080 on it. */
3081 basetype = TREE_TYPE (iv->base);
3082 if (POINTER_TYPE_P (basetype))
3083 basetype = sizetype;
3084 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3086 /* Third, try removing the constant offset. Make sure to even
3087 add a candidate for &a[0] vs. (T *)&a. */
3088 base = strip_offset (iv->base, &offset);
3089 if (offset || base != iv->base)
3090 add_candidate (data, base, iv->step, false, use);
3092 /* At last, add auto-incremental candidates. Make such variables
3093 important since other iv uses with same base object may be based
3094 on it. */
3095 if (use != NULL && use->type == USE_ADDRESS)
3096 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3099 /* Adds candidates based on the uses. */
3101 static void
3102 add_iv_candidate_for_uses (struct ivopts_data *data)
3104 unsigned i;
3106 for (i = 0; i < n_iv_uses (data); i++)
3108 struct iv_use *use = iv_use (data, i);
3110 if (!use)
3111 continue;
3113 switch (use->type)
3115 case USE_NONLINEAR_EXPR:
3116 case USE_COMPARE:
3117 case USE_ADDRESS:
3118 /* Just add the ivs based on the value of the iv used here. */
3119 add_iv_candidate_for_use (data, use);
3120 break;
3122 default:
3123 gcc_unreachable ();
3128 /* Record important candidates and add them to related_cands bitmaps
3129 if needed. */
3131 static void
3132 record_important_candidates (struct ivopts_data *data)
3134 unsigned i;
3135 struct iv_use *use;
3137 for (i = 0; i < n_iv_cands (data); i++)
3139 struct iv_cand *cand = iv_cand (data, i);
3141 if (cand->important)
3142 bitmap_set_bit (data->important_candidates, i);
3145 data->consider_all_candidates = (n_iv_cands (data)
3146 <= CONSIDER_ALL_CANDIDATES_BOUND);
3148 if (data->consider_all_candidates)
3150 /* We will not need "related_cands" bitmaps in this case,
3151 so release them to decrease peak memory consumption. */
3152 for (i = 0; i < n_iv_uses (data); i++)
3154 use = iv_use (data, i);
3155 BITMAP_FREE (use->related_cands);
3158 else
3160 /* Add important candidates to the related_cands bitmaps. */
3161 for (i = 0; i < n_iv_uses (data); i++)
3162 bitmap_ior_into (iv_use (data, i)->related_cands,
3163 data->important_candidates);
3167 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3168 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3169 we allocate a simple list to every use. */
3171 static void
3172 alloc_use_cost_map (struct ivopts_data *data)
3174 unsigned i, size, s;
3176 for (i = 0; i < n_iv_uses (data); i++)
3178 struct iv_use *use = iv_use (data, i);
3180 if (data->consider_all_candidates)
3181 size = n_iv_cands (data);
3182 else
3184 s = bitmap_count_bits (use->related_cands);
3186 /* Round up to the power of two, so that moduling by it is fast. */
3187 size = s ? (1 << ceil_log2 (s)) : 1;
3190 use->n_map_members = size;
3191 use->cost_map = XCNEWVEC (struct cost_pair, size);
3195 /* Returns description of computation cost of expression whose runtime
3196 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3198 static comp_cost
3199 new_cost (unsigned runtime, unsigned complexity)
3201 comp_cost cost;
3203 cost.cost = runtime;
3204 cost.complexity = complexity;
3206 return cost;
3209 /* Returns true if COST is infinite. */
3211 static bool
3212 infinite_cost_p (comp_cost cost)
3214 return cost.cost == INFTY;
3217 /* Adds costs COST1 and COST2. */
3219 static comp_cost
3220 add_costs (comp_cost cost1, comp_cost cost2)
3222 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3223 return infinite_cost;
3225 cost1.cost += cost2.cost;
3226 cost1.complexity += cost2.complexity;
3228 return cost1;
3230 /* Subtracts costs COST1 and COST2. */
3232 static comp_cost
3233 sub_costs (comp_cost cost1, comp_cost cost2)
3235 cost1.cost -= cost2.cost;
3236 cost1.complexity -= cost2.complexity;
3238 return cost1;
3241 /* Returns a negative number if COST1 < COST2, a positive number if
3242 COST1 > COST2, and 0 if COST1 = COST2. */
3244 static int
3245 compare_costs (comp_cost cost1, comp_cost cost2)
3247 if (cost1.cost == cost2.cost)
3248 return cost1.complexity - cost2.complexity;
3250 return cost1.cost - cost2.cost;
3253 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3254 on invariants DEPENDS_ON and that the value used in expressing it
3255 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3257 static void
3258 set_use_iv_cost (struct ivopts_data *data,
3259 struct iv_use *use, struct iv_cand *cand,
3260 comp_cost cost, bitmap depends_on, tree value,
3261 enum tree_code comp, int inv_expr_id)
3263 unsigned i, s;
3265 if (infinite_cost_p (cost))
3267 BITMAP_FREE (depends_on);
3268 return;
3271 if (data->consider_all_candidates)
3273 use->cost_map[cand->id].cand = cand;
3274 use->cost_map[cand->id].cost = cost;
3275 use->cost_map[cand->id].depends_on = depends_on;
3276 use->cost_map[cand->id].value = value;
3277 use->cost_map[cand->id].comp = comp;
3278 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3279 return;
3282 /* n_map_members is a power of two, so this computes modulo. */
3283 s = cand->id & (use->n_map_members - 1);
3284 for (i = s; i < use->n_map_members; i++)
3285 if (!use->cost_map[i].cand)
3286 goto found;
3287 for (i = 0; i < s; i++)
3288 if (!use->cost_map[i].cand)
3289 goto found;
3291 gcc_unreachable ();
3293 found:
3294 use->cost_map[i].cand = cand;
3295 use->cost_map[i].cost = cost;
3296 use->cost_map[i].depends_on = depends_on;
3297 use->cost_map[i].value = value;
3298 use->cost_map[i].comp = comp;
3299 use->cost_map[i].inv_expr_id = inv_expr_id;
3302 /* Gets cost of (USE, CANDIDATE) pair. */
3304 static struct cost_pair *
3305 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3306 struct iv_cand *cand)
3308 unsigned i, s;
3309 struct cost_pair *ret;
3311 if (!cand)
3312 return NULL;
3314 if (data->consider_all_candidates)
3316 ret = use->cost_map + cand->id;
3317 if (!ret->cand)
3318 return NULL;
3320 return ret;
3323 /* n_map_members is a power of two, so this computes modulo. */
3324 s = cand->id & (use->n_map_members - 1);
3325 for (i = s; i < use->n_map_members; i++)
3326 if (use->cost_map[i].cand == cand)
3327 return use->cost_map + i;
3328 else if (use->cost_map[i].cand == NULL)
3329 return NULL;
3330 for (i = 0; i < s; i++)
3331 if (use->cost_map[i].cand == cand)
3332 return use->cost_map + i;
3333 else if (use->cost_map[i].cand == NULL)
3334 return NULL;
3336 return NULL;
3339 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3340 static rtx
3341 produce_memory_decl_rtl (tree obj, int *regno)
3343 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3344 machine_mode address_mode = targetm.addr_space.address_mode (as);
3345 rtx x;
3347 gcc_assert (obj);
3348 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3350 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3351 x = gen_rtx_SYMBOL_REF (address_mode, name);
3352 SET_SYMBOL_REF_DECL (x, obj);
3353 x = gen_rtx_MEM (DECL_MODE (obj), x);
3354 set_mem_addr_space (x, as);
3355 targetm.encode_section_info (obj, x, true);
3357 else
3359 x = gen_raw_REG (address_mode, (*regno)++);
3360 x = gen_rtx_MEM (DECL_MODE (obj), x);
3361 set_mem_addr_space (x, as);
3364 return x;
3367 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3368 walk_tree. DATA contains the actual fake register number. */
3370 static tree
3371 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3373 tree obj = NULL_TREE;
3374 rtx x = NULL_RTX;
3375 int *regno = (int *) data;
3377 switch (TREE_CODE (*expr_p))
3379 case ADDR_EXPR:
3380 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3381 handled_component_p (*expr_p);
3382 expr_p = &TREE_OPERAND (*expr_p, 0))
3383 continue;
3384 obj = *expr_p;
3385 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3386 x = produce_memory_decl_rtl (obj, regno);
3387 break;
3389 case SSA_NAME:
3390 *ws = 0;
3391 obj = SSA_NAME_VAR (*expr_p);
3392 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3393 if (!obj)
3394 return NULL_TREE;
3395 if (!DECL_RTL_SET_P (obj))
3396 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3397 break;
3399 case VAR_DECL:
3400 case PARM_DECL:
3401 case RESULT_DECL:
3402 *ws = 0;
3403 obj = *expr_p;
3405 if (DECL_RTL_SET_P (obj))
3406 break;
3408 if (DECL_MODE (obj) == BLKmode)
3409 x = produce_memory_decl_rtl (obj, regno);
3410 else
3411 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3413 break;
3415 default:
3416 break;
3419 if (x)
3421 decl_rtl_to_reset.safe_push (obj);
3422 SET_DECL_RTL (obj, x);
3425 return NULL_TREE;
3428 /* Determines cost of the computation of EXPR. */
3430 static unsigned
3431 computation_cost (tree expr, bool speed)
3433 rtx_insn *seq;
3434 rtx rslt;
3435 tree type = TREE_TYPE (expr);
3436 unsigned cost;
3437 /* Avoid using hard regs in ways which may be unsupported. */
3438 int regno = LAST_VIRTUAL_REGISTER + 1;
3439 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3440 enum node_frequency real_frequency = node->frequency;
3442 node->frequency = NODE_FREQUENCY_NORMAL;
3443 crtl->maybe_hot_insn_p = speed;
3444 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3445 start_sequence ();
3446 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3447 seq = get_insns ();
3448 end_sequence ();
3449 default_rtl_profile ();
3450 node->frequency = real_frequency;
3452 cost = seq_cost (seq, speed);
3453 if (MEM_P (rslt))
3454 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3455 TYPE_ADDR_SPACE (type), speed);
3456 else if (!REG_P (rslt))
3457 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3459 return cost;
3462 /* Returns variable containing the value of candidate CAND at statement AT. */
3464 static tree
3465 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3467 if (stmt_after_increment (loop, cand, stmt))
3468 return cand->var_after;
3469 else
3470 return cand->var_before;
3473 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3474 same precision that is at least as wide as the precision of TYPE, stores
3475 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3476 type of A and B. */
3478 static tree
3479 determine_common_wider_type (tree *a, tree *b)
3481 tree wider_type = NULL;
3482 tree suba, subb;
3483 tree atype = TREE_TYPE (*a);
3485 if (CONVERT_EXPR_P (*a))
3487 suba = TREE_OPERAND (*a, 0);
3488 wider_type = TREE_TYPE (suba);
3489 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3490 return atype;
3492 else
3493 return atype;
3495 if (CONVERT_EXPR_P (*b))
3497 subb = TREE_OPERAND (*b, 0);
3498 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3499 return atype;
3501 else
3502 return atype;
3504 *a = suba;
3505 *b = subb;
3506 return wider_type;
3509 /* Determines the expression by that USE is expressed from induction variable
3510 CAND at statement AT in LOOP. The expression is stored in a decomposed
3511 form into AFF. Returns false if USE cannot be expressed using CAND. */
3513 static bool
3514 get_computation_aff (struct loop *loop,
3515 struct iv_use *use, struct iv_cand *cand, gimple *at,
3516 struct aff_tree *aff)
3518 tree ubase = use->iv->base;
3519 tree ustep = use->iv->step;
3520 tree cbase = cand->iv->base;
3521 tree cstep = cand->iv->step, cstep_common;
3522 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3523 tree common_type, var;
3524 tree uutype;
3525 aff_tree cbase_aff, var_aff;
3526 widest_int rat;
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3530 /* We do not have a precision to express the values of use. */
3531 return false;
3534 var = var_at_stmt (loop, cand, at);
3535 uutype = unsigned_type_for (utype);
3537 /* If the conversion is not noop, perform it. */
3538 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3540 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3541 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3543 tree inner_base, inner_step, inner_type;
3544 inner_base = TREE_OPERAND (cbase, 0);
3545 if (CONVERT_EXPR_P (cstep))
3546 inner_step = TREE_OPERAND (cstep, 0);
3547 else
3548 inner_step = cstep;
3550 inner_type = TREE_TYPE (inner_base);
3551 /* If candidate is added from a biv whose type is smaller than
3552 ctype, we know both candidate and the biv won't overflow.
3553 In this case, it's safe to skip the convertion in candidate.
3554 As an example, (unsigned short)((unsigned long)A) equals to
3555 (unsigned short)A, if A has a type no larger than short. */
3556 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3558 cbase = inner_base;
3559 cstep = inner_step;
3562 cstep = fold_convert (uutype, cstep);
3563 cbase = fold_convert (uutype, cbase);
3564 var = fold_convert (uutype, var);
3567 if (!constant_multiple_of (ustep, cstep, &rat))
3568 return false;
3570 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3571 type, we achieve better folding by computing their difference in this
3572 wider type, and cast the result to UUTYPE. We do not need to worry about
3573 overflows, as all the arithmetics will in the end be performed in UUTYPE
3574 anyway. */
3575 common_type = determine_common_wider_type (&ubase, &cbase);
3577 /* use = ubase - ratio * cbase + ratio * var. */
3578 tree_to_aff_combination (ubase, common_type, aff);
3579 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3580 tree_to_aff_combination (var, uutype, &var_aff);
3582 /* We need to shift the value if we are after the increment. */
3583 if (stmt_after_increment (loop, cand, at))
3585 aff_tree cstep_aff;
3587 if (common_type != uutype)
3588 cstep_common = fold_convert (common_type, cstep);
3589 else
3590 cstep_common = cstep;
3592 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3593 aff_combination_add (&cbase_aff, &cstep_aff);
3596 aff_combination_scale (&cbase_aff, -rat);
3597 aff_combination_add (aff, &cbase_aff);
3598 if (common_type != uutype)
3599 aff_combination_convert (aff, uutype);
3601 aff_combination_scale (&var_aff, rat);
3602 aff_combination_add (aff, &var_aff);
3604 return true;
3607 /* Return the type of USE. */
3609 static tree
3610 get_use_type (struct iv_use *use)
3612 tree base_type = TREE_TYPE (use->iv->base);
3613 tree type;
3615 if (use->type == USE_ADDRESS)
3617 /* The base_type may be a void pointer. Create a pointer type based on
3618 the mem_ref instead. */
3619 type = build_pointer_type (TREE_TYPE (*use->op_p));
3620 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3621 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3623 else
3624 type = base_type;
3626 return type;
3629 /* Determines the expression by that USE is expressed from induction variable
3630 CAND at statement AT in LOOP. The computation is unshared. */
3632 static tree
3633 get_computation_at (struct loop *loop,
3634 struct iv_use *use, struct iv_cand *cand, gimple *at)
3636 aff_tree aff;
3637 tree type = get_use_type (use);
3639 if (!get_computation_aff (loop, use, cand, at, &aff))
3640 return NULL_TREE;
3641 unshare_aff_combination (&aff);
3642 return fold_convert (type, aff_combination_to_tree (&aff));
3645 /* Determines the expression by that USE is expressed from induction variable
3646 CAND in LOOP. The computation is unshared. */
3648 static tree
3649 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3651 return get_computation_at (loop, use, cand, use->stmt);
3654 /* Adjust the cost COST for being in loop setup rather than loop body.
3655 If we're optimizing for space, the loop setup overhead is constant;
3656 if we're optimizing for speed, amortize it over the per-iteration cost. */
3657 static unsigned
3658 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3660 if (cost == INFTY)
3661 return cost;
3662 else if (optimize_loop_for_speed_p (data->current_loop))
3663 return cost / avg_loop_niter (data->current_loop);
3664 else
3665 return cost;
3668 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3669 validity for a memory reference accessing memory of mode MODE in
3670 address space AS. */
3673 bool
3674 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3675 addr_space_t as)
3677 #define MAX_RATIO 128
3678 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3679 static vec<sbitmap> valid_mult_list;
3680 sbitmap valid_mult;
3682 if (data_index >= valid_mult_list.length ())
3683 valid_mult_list.safe_grow_cleared (data_index + 1);
3685 valid_mult = valid_mult_list[data_index];
3686 if (!valid_mult)
3688 machine_mode address_mode = targetm.addr_space.address_mode (as);
3689 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3690 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3691 rtx addr, scaled;
3692 HOST_WIDE_INT i;
3694 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3695 bitmap_clear (valid_mult);
3696 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3697 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3698 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3700 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3701 if (memory_address_addr_space_p (mode, addr, as)
3702 || memory_address_addr_space_p (mode, scaled, as))
3703 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3708 fprintf (dump_file, " allowed multipliers:");
3709 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3710 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3711 fprintf (dump_file, " %d", (int) i);
3712 fprintf (dump_file, "\n");
3713 fprintf (dump_file, "\n");
3716 valid_mult_list[data_index] = valid_mult;
3719 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3720 return false;
3722 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3725 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3726 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3727 variable is omitted. Compute the cost for a memory reference that accesses
3728 a memory location of mode MEM_MODE in address space AS.
3730 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3731 size of MEM_MODE / RATIO) is available. To make this determination, we
3732 look at the size of the increment to be made, which is given in CSTEP.
3733 CSTEP may be zero if the step is unknown.
3734 STMT_AFTER_INC is true iff the statement we're looking at is after the
3735 increment of the original biv.
3737 TODO -- there must be some better way. This all is quite crude. */
3739 enum ainc_type
3741 AINC_PRE_INC, /* Pre increment. */
3742 AINC_PRE_DEC, /* Pre decrement. */
3743 AINC_POST_INC, /* Post increment. */
3744 AINC_POST_DEC, /* Post decrement. */
3745 AINC_NONE /* Also the number of auto increment types. */
3748 struct address_cost_data
3750 HOST_WIDE_INT min_offset, max_offset;
3751 unsigned costs[2][2][2][2];
3752 unsigned ainc_costs[AINC_NONE];
3756 static comp_cost
3757 get_address_cost (bool symbol_present, bool var_present,
3758 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3759 HOST_WIDE_INT cstep, machine_mode mem_mode,
3760 addr_space_t as, bool speed,
3761 bool stmt_after_inc, bool *may_autoinc)
3763 machine_mode address_mode = targetm.addr_space.address_mode (as);
3764 static vec<address_cost_data *> address_cost_data_list;
3765 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3766 address_cost_data *data;
3767 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3768 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3769 unsigned cost, acost, complexity;
3770 enum ainc_type autoinc_type;
3771 bool offset_p, ratio_p, autoinc;
3772 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3773 unsigned HOST_WIDE_INT mask;
3774 unsigned bits;
3776 if (data_index >= address_cost_data_list.length ())
3777 address_cost_data_list.safe_grow_cleared (data_index + 1);
3779 data = address_cost_data_list[data_index];
3780 if (!data)
3782 HOST_WIDE_INT i;
3783 HOST_WIDE_INT rat, off = 0;
3784 int old_cse_not_expected, width;
3785 unsigned sym_p, var_p, off_p, rat_p, add_c;
3786 rtx_insn *seq;
3787 rtx addr, base;
3788 rtx reg0, reg1;
3790 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3792 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3794 width = GET_MODE_BITSIZE (address_mode) - 1;
3795 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3796 width = HOST_BITS_PER_WIDE_INT - 1;
3797 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3799 for (i = width; i >= 0; i--)
3801 off = -((unsigned HOST_WIDE_INT) 1 << i);
3802 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3803 if (memory_address_addr_space_p (mem_mode, addr, as))
3804 break;
3806 data->min_offset = (i == -1? 0 : off);
3808 for (i = width; i >= 0; i--)
3810 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3811 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3812 if (memory_address_addr_space_p (mem_mode, addr, as))
3813 break;
3814 /* For some strict-alignment targets, the offset must be naturally
3815 aligned. Try an aligned offset if mem_mode is not QImode. */
3816 off = mem_mode != QImode
3817 ? ((unsigned HOST_WIDE_INT) 1 << i)
3818 - GET_MODE_SIZE (mem_mode)
3819 : 0;
3820 if (off > 0)
3822 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3823 if (memory_address_addr_space_p (mem_mode, addr, as))
3824 break;
3827 if (i == -1)
3828 off = 0;
3829 data->max_offset = off;
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (dump_file, "get_address_cost:\n");
3834 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3835 GET_MODE_NAME (mem_mode),
3836 data->min_offset);
3837 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3838 GET_MODE_NAME (mem_mode),
3839 data->max_offset);
3842 rat = 1;
3843 for (i = 2; i <= MAX_RATIO; i++)
3844 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3846 rat = i;
3847 break;
3850 /* Compute the cost of various addressing modes. */
3851 acost = 0;
3852 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3853 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3855 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3856 || USE_STORE_PRE_DECREMENT (mem_mode))
3858 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3859 has_predec[mem_mode]
3860 = memory_address_addr_space_p (mem_mode, addr, as);
3862 if (has_predec[mem_mode])
3863 data->ainc_costs[AINC_PRE_DEC]
3864 = address_cost (addr, mem_mode, as, speed);
3866 if (USE_LOAD_POST_DECREMENT (mem_mode)
3867 || USE_STORE_POST_DECREMENT (mem_mode))
3869 addr = gen_rtx_POST_DEC (address_mode, reg0);
3870 has_postdec[mem_mode]
3871 = memory_address_addr_space_p (mem_mode, addr, as);
3873 if (has_postdec[mem_mode])
3874 data->ainc_costs[AINC_POST_DEC]
3875 = address_cost (addr, mem_mode, as, speed);
3877 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3878 || USE_STORE_PRE_DECREMENT (mem_mode))
3880 addr = gen_rtx_PRE_INC (address_mode, reg0);
3881 has_preinc[mem_mode]
3882 = memory_address_addr_space_p (mem_mode, addr, as);
3884 if (has_preinc[mem_mode])
3885 data->ainc_costs[AINC_PRE_INC]
3886 = address_cost (addr, mem_mode, as, speed);
3888 if (USE_LOAD_POST_INCREMENT (mem_mode)
3889 || USE_STORE_POST_INCREMENT (mem_mode))
3891 addr = gen_rtx_POST_INC (address_mode, reg0);
3892 has_postinc[mem_mode]
3893 = memory_address_addr_space_p (mem_mode, addr, as);
3895 if (has_postinc[mem_mode])
3896 data->ainc_costs[AINC_POST_INC]
3897 = address_cost (addr, mem_mode, as, speed);
3899 for (i = 0; i < 16; i++)
3901 sym_p = i & 1;
3902 var_p = (i >> 1) & 1;
3903 off_p = (i >> 2) & 1;
3904 rat_p = (i >> 3) & 1;
3906 addr = reg0;
3907 if (rat_p)
3908 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3909 gen_int_mode (rat, address_mode));
3911 if (var_p)
3912 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3914 if (sym_p)
3916 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3917 /* ??? We can run into trouble with some backends by presenting
3918 it with symbols which haven't been properly passed through
3919 targetm.encode_section_info. By setting the local bit, we
3920 enhance the probability of things working. */
3921 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3923 if (off_p)
3924 base = gen_rtx_fmt_e (CONST, address_mode,
3925 gen_rtx_fmt_ee
3926 (PLUS, address_mode, base,
3927 gen_int_mode (off, address_mode)));
3929 else if (off_p)
3930 base = gen_int_mode (off, address_mode);
3931 else
3932 base = NULL_RTX;
3934 if (base)
3935 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3937 start_sequence ();
3938 /* To avoid splitting addressing modes, pretend that no cse will
3939 follow. */
3940 old_cse_not_expected = cse_not_expected;
3941 cse_not_expected = true;
3942 addr = memory_address_addr_space (mem_mode, addr, as);
3943 cse_not_expected = old_cse_not_expected;
3944 seq = get_insns ();
3945 end_sequence ();
3947 acost = seq_cost (seq, speed);
3948 acost += address_cost (addr, mem_mode, as, speed);
3950 if (!acost)
3951 acost = 1;
3952 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3955 /* On some targets, it is quite expensive to load symbol to a register,
3956 which makes addresses that contain symbols look much more expensive.
3957 However, the symbol will have to be loaded in any case before the
3958 loop (and quite likely we have it in register already), so it does not
3959 make much sense to penalize them too heavily. So make some final
3960 tweaks for the SYMBOL_PRESENT modes:
3962 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3963 var is cheaper, use this mode with small penalty.
3964 If VAR_PRESENT is true, try whether the mode with
3965 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3966 if this is the case, use it. */
3967 add_c = add_cost (speed, address_mode);
3968 for (i = 0; i < 8; i++)
3970 var_p = i & 1;
3971 off_p = (i >> 1) & 1;
3972 rat_p = (i >> 2) & 1;
3974 acost = data->costs[0][1][off_p][rat_p] + 1;
3975 if (var_p)
3976 acost += add_c;
3978 if (acost < data->costs[1][var_p][off_p][rat_p])
3979 data->costs[1][var_p][off_p][rat_p] = acost;
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3984 fprintf (dump_file, "Address costs:\n");
3986 for (i = 0; i < 16; i++)
3988 sym_p = i & 1;
3989 var_p = (i >> 1) & 1;
3990 off_p = (i >> 2) & 1;
3991 rat_p = (i >> 3) & 1;
3993 fprintf (dump_file, " ");
3994 if (sym_p)
3995 fprintf (dump_file, "sym + ");
3996 if (var_p)
3997 fprintf (dump_file, "var + ");
3998 if (off_p)
3999 fprintf (dump_file, "cst + ");
4000 if (rat_p)
4001 fprintf (dump_file, "rat * ");
4003 acost = data->costs[sym_p][var_p][off_p][rat_p];
4004 fprintf (dump_file, "index costs %d\n", acost);
4006 if (has_predec[mem_mode] || has_postdec[mem_mode]
4007 || has_preinc[mem_mode] || has_postinc[mem_mode])
4008 fprintf (dump_file, " May include autoinc/dec\n");
4009 fprintf (dump_file, "\n");
4012 address_cost_data_list[data_index] = data;
4015 bits = GET_MODE_BITSIZE (address_mode);
4016 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4017 offset &= mask;
4018 if ((offset >> (bits - 1) & 1))
4019 offset |= ~mask;
4020 s_offset = offset;
4022 autoinc = false;
4023 autoinc_type = AINC_NONE;
4024 msize = GET_MODE_SIZE (mem_mode);
4025 autoinc_offset = offset;
4026 if (stmt_after_inc)
4027 autoinc_offset += ratio * cstep;
4028 if (symbol_present || var_present || ratio != 1)
4029 autoinc = false;
4030 else
4032 if (has_postinc[mem_mode] && autoinc_offset == 0
4033 && msize == cstep)
4034 autoinc_type = AINC_POST_INC;
4035 else if (has_postdec[mem_mode] && autoinc_offset == 0
4036 && msize == -cstep)
4037 autoinc_type = AINC_POST_DEC;
4038 else if (has_preinc[mem_mode] && autoinc_offset == msize
4039 && msize == cstep)
4040 autoinc_type = AINC_PRE_INC;
4041 else if (has_predec[mem_mode] && autoinc_offset == -msize
4042 && msize == -cstep)
4043 autoinc_type = AINC_PRE_DEC;
4045 if (autoinc_type != AINC_NONE)
4046 autoinc = true;
4049 cost = 0;
4050 offset_p = (s_offset != 0
4051 && data->min_offset <= s_offset
4052 && s_offset <= data->max_offset);
4053 ratio_p = (ratio != 1
4054 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4056 if (ratio != 1 && !ratio_p)
4057 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4059 if (s_offset && !offset_p && !symbol_present)
4060 cost += add_cost (speed, address_mode);
4062 if (may_autoinc)
4063 *may_autoinc = autoinc;
4064 if (autoinc)
4065 acost = data->ainc_costs[autoinc_type];
4066 else
4067 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4068 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4069 return new_cost (cost + acost, complexity);
4072 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4073 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4074 calculating the operands of EXPR. Returns true if successful, and returns
4075 the cost in COST. */
4077 static bool
4078 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4079 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4081 comp_cost res;
4082 tree op1 = TREE_OPERAND (expr, 1);
4083 tree cst = TREE_OPERAND (mult, 1);
4084 tree multop = TREE_OPERAND (mult, 0);
4085 int m = exact_log2 (int_cst_value (cst));
4086 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4087 int as_cost, sa_cost;
4088 bool mult_in_op1;
4090 if (!(m >= 0 && m < maxm))
4091 return false;
4093 STRIP_NOPS (op1);
4094 mult_in_op1 = operand_equal_p (op1, mult, 0);
4096 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4098 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4099 use that in preference to a shift insn followed by an add insn. */
4100 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4101 ? shiftadd_cost (speed, mode, m)
4102 : (mult_in_op1
4103 ? shiftsub1_cost (speed, mode, m)
4104 : shiftsub0_cost (speed, mode, m)));
4106 res = new_cost (MIN (as_cost, sa_cost), 0);
4107 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4109 STRIP_NOPS (multop);
4110 if (!is_gimple_val (multop))
4111 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4113 *cost = res;
4114 return true;
4117 /* Estimates cost of forcing expression EXPR into a variable. */
4119 static comp_cost
4120 force_expr_to_var_cost (tree expr, bool speed)
4122 static bool costs_initialized = false;
4123 static unsigned integer_cost [2];
4124 static unsigned symbol_cost [2];
4125 static unsigned address_cost [2];
4126 tree op0, op1;
4127 comp_cost cost0, cost1, cost;
4128 machine_mode mode;
4130 if (!costs_initialized)
4132 tree type = build_pointer_type (integer_type_node);
4133 tree var, addr;
4134 rtx x;
4135 int i;
4137 var = create_tmp_var_raw (integer_type_node, "test_var");
4138 TREE_STATIC (var) = 1;
4139 x = produce_memory_decl_rtl (var, NULL);
4140 SET_DECL_RTL (var, x);
4142 addr = build1 (ADDR_EXPR, type, var);
4145 for (i = 0; i < 2; i++)
4147 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4148 2000), i);
4150 symbol_cost[i] = computation_cost (addr, i) + 1;
4152 address_cost[i]
4153 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4157 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4158 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4159 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4160 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4161 fprintf (dump_file, "\n");
4165 costs_initialized = true;
4168 STRIP_NOPS (expr);
4170 if (SSA_VAR_P (expr))
4171 return no_cost;
4173 if (is_gimple_min_invariant (expr))
4175 if (TREE_CODE (expr) == INTEGER_CST)
4176 return new_cost (integer_cost [speed], 0);
4178 if (TREE_CODE (expr) == ADDR_EXPR)
4180 tree obj = TREE_OPERAND (expr, 0);
4182 if (TREE_CODE (obj) == VAR_DECL
4183 || TREE_CODE (obj) == PARM_DECL
4184 || TREE_CODE (obj) == RESULT_DECL)
4185 return new_cost (symbol_cost [speed], 0);
4188 return new_cost (address_cost [speed], 0);
4191 switch (TREE_CODE (expr))
4193 case POINTER_PLUS_EXPR:
4194 case PLUS_EXPR:
4195 case MINUS_EXPR:
4196 case MULT_EXPR:
4197 op0 = TREE_OPERAND (expr, 0);
4198 op1 = TREE_OPERAND (expr, 1);
4199 STRIP_NOPS (op0);
4200 STRIP_NOPS (op1);
4201 break;
4203 CASE_CONVERT:
4204 case NEGATE_EXPR:
4205 op0 = TREE_OPERAND (expr, 0);
4206 STRIP_NOPS (op0);
4207 op1 = NULL_TREE;
4208 break;
4210 default:
4211 /* Just an arbitrary value, FIXME. */
4212 return new_cost (target_spill_cost[speed], 0);
4215 if (op0 == NULL_TREE
4216 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4217 cost0 = no_cost;
4218 else
4219 cost0 = force_expr_to_var_cost (op0, speed);
4221 if (op1 == NULL_TREE
4222 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4223 cost1 = no_cost;
4224 else
4225 cost1 = force_expr_to_var_cost (op1, speed);
4227 mode = TYPE_MODE (TREE_TYPE (expr));
4228 switch (TREE_CODE (expr))
4230 case POINTER_PLUS_EXPR:
4231 case PLUS_EXPR:
4232 case MINUS_EXPR:
4233 case NEGATE_EXPR:
4234 cost = new_cost (add_cost (speed, mode), 0);
4235 if (TREE_CODE (expr) != NEGATE_EXPR)
4237 tree mult = NULL_TREE;
4238 comp_cost sa_cost;
4239 if (TREE_CODE (op1) == MULT_EXPR)
4240 mult = op1;
4241 else if (TREE_CODE (op0) == MULT_EXPR)
4242 mult = op0;
4244 if (mult != NULL_TREE
4245 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4246 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4247 speed, &sa_cost))
4248 return sa_cost;
4250 break;
4252 CASE_CONVERT:
4254 tree inner_mode, outer_mode;
4255 outer_mode = TREE_TYPE (expr);
4256 inner_mode = TREE_TYPE (op0);
4257 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4258 TYPE_MODE (inner_mode), speed), 0);
4260 break;
4262 case MULT_EXPR:
4263 if (cst_and_fits_in_hwi (op0))
4264 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4265 mode, speed), 0);
4266 else if (cst_and_fits_in_hwi (op1))
4267 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4268 mode, speed), 0);
4269 else
4270 return new_cost (target_spill_cost [speed], 0);
4271 break;
4273 default:
4274 gcc_unreachable ();
4277 cost = add_costs (cost, cost0);
4278 cost = add_costs (cost, cost1);
4280 /* Bound the cost by target_spill_cost. The parts of complicated
4281 computations often are either loop invariant or at least can
4282 be shared between several iv uses, so letting this grow without
4283 limits would not give reasonable results. */
4284 if (cost.cost > (int) target_spill_cost [speed])
4285 cost.cost = target_spill_cost [speed];
4287 return cost;
4290 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4291 invariants the computation depends on. */
4293 static comp_cost
4294 force_var_cost (struct ivopts_data *data,
4295 tree expr, bitmap *depends_on)
4297 if (depends_on)
4299 fd_ivopts_data = data;
4300 walk_tree (&expr, find_depends, depends_on, NULL);
4303 return force_expr_to_var_cost (expr, data->speed);
4306 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4307 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4308 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4309 invariants the computation depends on. */
4311 static comp_cost
4312 split_address_cost (struct ivopts_data *data,
4313 tree addr, bool *symbol_present, bool *var_present,
4314 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4316 tree core;
4317 HOST_WIDE_INT bitsize;
4318 HOST_WIDE_INT bitpos;
4319 tree toffset;
4320 machine_mode mode;
4321 int unsignedp, reversep, volatilep;
4323 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4324 &unsignedp, &reversep, &volatilep, false);
4326 if (toffset != 0
4327 || bitpos % BITS_PER_UNIT != 0
4328 || reversep
4329 || TREE_CODE (core) != VAR_DECL)
4331 *symbol_present = false;
4332 *var_present = true;
4333 fd_ivopts_data = data;
4334 if (depends_on)
4335 walk_tree (&addr, find_depends, depends_on, NULL);
4337 return new_cost (target_spill_cost[data->speed], 0);
4340 *offset += bitpos / BITS_PER_UNIT;
4341 if (TREE_STATIC (core)
4342 || DECL_EXTERNAL (core))
4344 *symbol_present = true;
4345 *var_present = false;
4346 return no_cost;
4349 *symbol_present = false;
4350 *var_present = true;
4351 return no_cost;
4354 /* Estimates cost of expressing difference of addresses E1 - E2 as
4355 var + symbol + offset. The value of offset is added to OFFSET,
4356 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4357 part is missing. DEPENDS_ON is a set of the invariants the computation
4358 depends on. */
4360 static comp_cost
4361 ptr_difference_cost (struct ivopts_data *data,
4362 tree e1, tree e2, bool *symbol_present, bool *var_present,
4363 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4365 HOST_WIDE_INT diff = 0;
4366 aff_tree aff_e1, aff_e2;
4367 tree type;
4369 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4371 if (ptr_difference_const (e1, e2, &diff))
4373 *offset += diff;
4374 *symbol_present = false;
4375 *var_present = false;
4376 return no_cost;
4379 if (integer_zerop (e2))
4380 return split_address_cost (data, TREE_OPERAND (e1, 0),
4381 symbol_present, var_present, offset, depends_on);
4383 *symbol_present = false;
4384 *var_present = true;
4386 type = signed_type_for (TREE_TYPE (e1));
4387 tree_to_aff_combination (e1, type, &aff_e1);
4388 tree_to_aff_combination (e2, type, &aff_e2);
4389 aff_combination_scale (&aff_e2, -1);
4390 aff_combination_add (&aff_e1, &aff_e2);
4392 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4395 /* Estimates cost of expressing difference E1 - E2 as
4396 var + symbol + offset. The value of offset is added to OFFSET,
4397 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4398 part is missing. DEPENDS_ON is a set of the invariants the computation
4399 depends on. */
4401 static comp_cost
4402 difference_cost (struct ivopts_data *data,
4403 tree e1, tree e2, bool *symbol_present, bool *var_present,
4404 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4406 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4407 unsigned HOST_WIDE_INT off1, off2;
4408 aff_tree aff_e1, aff_e2;
4409 tree type;
4411 e1 = strip_offset (e1, &off1);
4412 e2 = strip_offset (e2, &off2);
4413 *offset += off1 - off2;
4415 STRIP_NOPS (e1);
4416 STRIP_NOPS (e2);
4418 if (TREE_CODE (e1) == ADDR_EXPR)
4419 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4420 offset, depends_on);
4421 *symbol_present = false;
4423 if (operand_equal_p (e1, e2, 0))
4425 *var_present = false;
4426 return no_cost;
4429 *var_present = true;
4431 if (integer_zerop (e2))
4432 return force_var_cost (data, e1, depends_on);
4434 if (integer_zerop (e1))
4436 comp_cost cost = force_var_cost (data, e2, depends_on);
4437 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4438 return cost;
4441 type = signed_type_for (TREE_TYPE (e1));
4442 tree_to_aff_combination (e1, type, &aff_e1);
4443 tree_to_aff_combination (e2, type, &aff_e2);
4444 aff_combination_scale (&aff_e2, -1);
4445 aff_combination_add (&aff_e1, &aff_e2);
4447 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4450 /* Returns true if AFF1 and AFF2 are identical. */
4452 static bool
4453 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4455 unsigned i;
4457 if (aff1->n != aff2->n)
4458 return false;
4460 for (i = 0; i < aff1->n; i++)
4462 if (aff1->elts[i].coef != aff2->elts[i].coef)
4463 return false;
4465 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4466 return false;
4468 return true;
4471 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4473 static int
4474 get_expr_id (struct ivopts_data *data, tree expr)
4476 struct iv_inv_expr_ent ent;
4477 struct iv_inv_expr_ent **slot;
4479 ent.expr = expr;
4480 ent.hash = iterative_hash_expr (expr, 0);
4481 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4482 if (*slot)
4483 return (*slot)->id;
4485 *slot = XNEW (struct iv_inv_expr_ent);
4486 (*slot)->expr = expr;
4487 (*slot)->hash = ent.hash;
4488 (*slot)->id = data->inv_expr_id++;
4489 return (*slot)->id;
4492 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4493 requires a new compiler generated temporary. Returns -1 otherwise.
4494 ADDRESS_P is a flag indicating if the expression is for address
4495 computation. */
4497 static int
4498 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4499 tree cbase, HOST_WIDE_INT ratio,
4500 bool address_p)
4502 aff_tree ubase_aff, cbase_aff;
4503 tree expr, ub, cb;
4505 STRIP_NOPS (ubase);
4506 STRIP_NOPS (cbase);
4507 ub = ubase;
4508 cb = cbase;
4510 if ((TREE_CODE (ubase) == INTEGER_CST)
4511 && (TREE_CODE (cbase) == INTEGER_CST))
4512 return -1;
4514 /* Strips the constant part. */
4515 if (TREE_CODE (ubase) == PLUS_EXPR
4516 || TREE_CODE (ubase) == MINUS_EXPR
4517 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4519 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4520 ubase = TREE_OPERAND (ubase, 0);
4523 /* Strips the constant part. */
4524 if (TREE_CODE (cbase) == PLUS_EXPR
4525 || TREE_CODE (cbase) == MINUS_EXPR
4526 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4528 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4529 cbase = TREE_OPERAND (cbase, 0);
4532 if (address_p)
4534 if (((TREE_CODE (ubase) == SSA_NAME)
4535 || (TREE_CODE (ubase) == ADDR_EXPR
4536 && is_gimple_min_invariant (ubase)))
4537 && (TREE_CODE (cbase) == INTEGER_CST))
4538 return -1;
4540 if (((TREE_CODE (cbase) == SSA_NAME)
4541 || (TREE_CODE (cbase) == ADDR_EXPR
4542 && is_gimple_min_invariant (cbase)))
4543 && (TREE_CODE (ubase) == INTEGER_CST))
4544 return -1;
4547 if (ratio == 1)
4549 if (operand_equal_p (ubase, cbase, 0))
4550 return -1;
4552 if (TREE_CODE (ubase) == ADDR_EXPR
4553 && TREE_CODE (cbase) == ADDR_EXPR)
4555 tree usym, csym;
4557 usym = TREE_OPERAND (ubase, 0);
4558 csym = TREE_OPERAND (cbase, 0);
4559 if (TREE_CODE (usym) == ARRAY_REF)
4561 tree ind = TREE_OPERAND (usym, 1);
4562 if (TREE_CODE (ind) == INTEGER_CST
4563 && tree_fits_shwi_p (ind)
4564 && tree_to_shwi (ind) == 0)
4565 usym = TREE_OPERAND (usym, 0);
4567 if (TREE_CODE (csym) == ARRAY_REF)
4569 tree ind = TREE_OPERAND (csym, 1);
4570 if (TREE_CODE (ind) == INTEGER_CST
4571 && tree_fits_shwi_p (ind)
4572 && tree_to_shwi (ind) == 0)
4573 csym = TREE_OPERAND (csym, 0);
4575 if (operand_equal_p (usym, csym, 0))
4576 return -1;
4578 /* Now do more complex comparison */
4579 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4580 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4581 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4582 return -1;
4585 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4586 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4588 aff_combination_scale (&cbase_aff, -1 * ratio);
4589 aff_combination_add (&ubase_aff, &cbase_aff);
4590 expr = aff_combination_to_tree (&ubase_aff);
4591 return get_expr_id (data, expr);
4596 /* Determines the cost of the computation by that USE is expressed
4597 from induction variable CAND. If ADDRESS_P is true, we just need
4598 to create an address from it, otherwise we want to get it into
4599 register. A set of invariants we depend on is stored in
4600 DEPENDS_ON. AT is the statement at that the value is computed.
4601 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4602 addressing is likely. */
4604 static comp_cost
4605 get_computation_cost_at (struct ivopts_data *data,
4606 struct iv_use *use, struct iv_cand *cand,
4607 bool address_p, bitmap *depends_on, gimple *at,
4608 bool *can_autoinc,
4609 int *inv_expr_id)
4611 tree ubase = use->iv->base, ustep = use->iv->step;
4612 tree cbase, cstep;
4613 tree utype = TREE_TYPE (ubase), ctype;
4614 unsigned HOST_WIDE_INT cstepi, offset = 0;
4615 HOST_WIDE_INT ratio, aratio;
4616 bool var_present, symbol_present, stmt_is_after_inc;
4617 comp_cost cost;
4618 widest_int rat;
4619 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4620 machine_mode mem_mode = (address_p
4621 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4622 : VOIDmode);
4624 if (depends_on)
4625 *depends_on = NULL;
4627 /* Only consider real candidates. */
4628 if (!cand->iv)
4629 return infinite_cost;
4631 cbase = cand->iv->base;
4632 cstep = cand->iv->step;
4633 ctype = TREE_TYPE (cbase);
4635 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4637 /* We do not have a precision to express the values of use. */
4638 return infinite_cost;
4641 if (address_p
4642 || (use->iv->base_object
4643 && cand->iv->base_object
4644 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4645 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4647 /* Do not try to express address of an object with computation based
4648 on address of a different object. This may cause problems in rtl
4649 level alias analysis (that does not expect this to be happening,
4650 as this is illegal in C), and would be unlikely to be useful
4651 anyway. */
4652 if (use->iv->base_object
4653 && cand->iv->base_object
4654 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4655 return infinite_cost;
4658 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4660 /* TODO -- add direct handling of this case. */
4661 goto fallback;
4664 /* CSTEPI is removed from the offset in case statement is after the
4665 increment. If the step is not constant, we use zero instead.
4666 This is a bit imprecise (there is the extra addition), but
4667 redundancy elimination is likely to transform the code so that
4668 it uses value of the variable before increment anyway,
4669 so it is not that much unrealistic. */
4670 if (cst_and_fits_in_hwi (cstep))
4671 cstepi = int_cst_value (cstep);
4672 else
4673 cstepi = 0;
4675 if (!constant_multiple_of (ustep, cstep, &rat))
4676 return infinite_cost;
4678 if (wi::fits_shwi_p (rat))
4679 ratio = rat.to_shwi ();
4680 else
4681 return infinite_cost;
4683 STRIP_NOPS (cbase);
4684 ctype = TREE_TYPE (cbase);
4686 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4688 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4689 or ratio == 1, it is better to handle this like
4691 ubase - ratio * cbase + ratio * var
4693 (also holds in the case ratio == -1, TODO. */
4695 if (cst_and_fits_in_hwi (cbase))
4697 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4698 cost = difference_cost (data,
4699 ubase, build_int_cst (utype, 0),
4700 &symbol_present, &var_present, &offset,
4701 depends_on);
4702 cost.cost /= avg_loop_niter (data->current_loop);
4704 else if (ratio == 1)
4706 tree real_cbase = cbase;
4708 /* Check to see if any adjustment is needed. */
4709 if (cstepi == 0 && stmt_is_after_inc)
4711 aff_tree real_cbase_aff;
4712 aff_tree cstep_aff;
4714 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4715 &real_cbase_aff);
4716 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4718 aff_combination_add (&real_cbase_aff, &cstep_aff);
4719 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4722 cost = difference_cost (data,
4723 ubase, real_cbase,
4724 &symbol_present, &var_present, &offset,
4725 depends_on);
4726 cost.cost /= avg_loop_niter (data->current_loop);
4728 else if (address_p
4729 && !POINTER_TYPE_P (ctype)
4730 && multiplier_allowed_in_address_p
4731 (ratio, mem_mode,
4732 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4734 if (cstepi == 0 && stmt_is_after_inc)
4736 if (POINTER_TYPE_P (ctype))
4737 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4738 else
4739 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4741 cbase
4742 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4743 cost = difference_cost (data,
4744 ubase, cbase,
4745 &symbol_present, &var_present, &offset,
4746 depends_on);
4747 cost.cost /= avg_loop_niter (data->current_loop);
4749 else
4751 cost = force_var_cost (data, cbase, depends_on);
4752 cost = add_costs (cost,
4753 difference_cost (data,
4754 ubase, build_int_cst (utype, 0),
4755 &symbol_present, &var_present,
4756 &offset, depends_on));
4757 cost.cost /= avg_loop_niter (data->current_loop);
4758 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4761 /* Set of invariants depended on by sub use has already been computed
4762 for the first use in the group. */
4763 if (use->sub_id)
4765 cost.cost = 0;
4766 if (depends_on && *depends_on)
4767 bitmap_clear (*depends_on);
4769 else if (inv_expr_id)
4771 *inv_expr_id =
4772 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4773 /* Clear depends on. */
4774 if (*inv_expr_id != -1 && depends_on && *depends_on)
4775 bitmap_clear (*depends_on);
4778 /* If we are after the increment, the value of the candidate is higher by
4779 one iteration. */
4780 if (stmt_is_after_inc)
4781 offset -= ratio * cstepi;
4783 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4784 (symbol/var1/const parts may be omitted). If we are looking for an
4785 address, find the cost of addressing this. */
4786 if (address_p)
4787 return add_costs (cost,
4788 get_address_cost (symbol_present, var_present,
4789 offset, ratio, cstepi,
4790 mem_mode,
4791 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4792 speed, stmt_is_after_inc,
4793 can_autoinc));
4795 /* Otherwise estimate the costs for computing the expression. */
4796 if (!symbol_present && !var_present && !offset)
4798 if (ratio != 1)
4799 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4800 return cost;
4803 /* Symbol + offset should be compile-time computable so consider that they
4804 are added once to the variable, if present. */
4805 if (var_present && (symbol_present || offset))
4806 cost.cost += adjust_setup_cost (data,
4807 add_cost (speed, TYPE_MODE (ctype)));
4809 /* Having offset does not affect runtime cost in case it is added to
4810 symbol, but it increases complexity. */
4811 if (offset)
4812 cost.complexity++;
4814 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4816 aratio = ratio > 0 ? ratio : -ratio;
4817 if (aratio != 1)
4818 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4819 return cost;
4821 fallback:
4822 if (can_autoinc)
4823 *can_autoinc = false;
4826 /* Just get the expression, expand it and measure the cost. */
4827 tree comp = get_computation_at (data->current_loop, use, cand, at);
4829 if (!comp)
4830 return infinite_cost;
4832 if (address_p)
4833 comp = build_simple_mem_ref (comp);
4835 return new_cost (computation_cost (comp, speed), 0);
4839 /* Determines the cost of the computation by that USE is expressed
4840 from induction variable CAND. If ADDRESS_P is true, we just need
4841 to create an address from it, otherwise we want to get it into
4842 register. A set of invariants we depend on is stored in
4843 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4844 autoinc addressing is likely. */
4846 static comp_cost
4847 get_computation_cost (struct ivopts_data *data,
4848 struct iv_use *use, struct iv_cand *cand,
4849 bool address_p, bitmap *depends_on,
4850 bool *can_autoinc, int *inv_expr_id)
4852 return get_computation_cost_at (data,
4853 use, cand, address_p, depends_on, use->stmt,
4854 can_autoinc, inv_expr_id);
4857 /* Determines cost of basing replacement of USE on CAND in a generic
4858 expression. */
4860 static bool
4861 determine_use_iv_cost_generic (struct ivopts_data *data,
4862 struct iv_use *use, struct iv_cand *cand)
4864 bitmap depends_on;
4865 comp_cost cost;
4866 int inv_expr_id = -1;
4868 /* The simple case first -- if we need to express value of the preserved
4869 original biv, the cost is 0. This also prevents us from counting the
4870 cost of increment twice -- once at this use and once in the cost of
4871 the candidate. */
4872 if (cand->pos == IP_ORIGINAL
4873 && cand->incremented_at == use->stmt)
4875 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4876 ERROR_MARK, -1);
4877 return true;
4880 cost = get_computation_cost (data, use, cand, false, &depends_on,
4881 NULL, &inv_expr_id);
4883 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4884 inv_expr_id);
4886 return !infinite_cost_p (cost);
4889 /* Determines cost of basing replacement of USE on CAND in an address. */
4891 static bool
4892 determine_use_iv_cost_address (struct ivopts_data *data,
4893 struct iv_use *use, struct iv_cand *cand)
4895 bitmap depends_on;
4896 bool can_autoinc;
4897 int inv_expr_id = -1;
4898 struct iv_use *sub_use;
4899 comp_cost sub_cost;
4900 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4901 &can_autoinc, &inv_expr_id);
4903 if (cand->ainc_use == use)
4905 if (can_autoinc)
4906 cost.cost -= cand->cost_step;
4907 /* If we generated the candidate solely for exploiting autoincrement
4908 opportunities, and it turns out it can't be used, set the cost to
4909 infinity to make sure we ignore it. */
4910 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4911 cost = infinite_cost;
4913 for (sub_use = use->next;
4914 sub_use && !infinite_cost_p (cost);
4915 sub_use = sub_use->next)
4917 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
4918 &can_autoinc, NULL);
4919 cost = add_costs (cost, sub_cost);
4922 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4923 inv_expr_id);
4925 return !infinite_cost_p (cost);
4928 /* Computes value of candidate CAND at position AT in iteration NITER, and
4929 stores it to VAL. */
4931 static void
4932 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4933 aff_tree *val)
4935 aff_tree step, delta, nit;
4936 struct iv *iv = cand->iv;
4937 tree type = TREE_TYPE (iv->base);
4938 tree steptype = type;
4939 if (POINTER_TYPE_P (type))
4940 steptype = sizetype;
4941 steptype = unsigned_type_for (type);
4943 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4944 aff_combination_convert (&step, steptype);
4945 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4946 aff_combination_convert (&nit, steptype);
4947 aff_combination_mult (&nit, &step, &delta);
4948 if (stmt_after_increment (loop, cand, at))
4949 aff_combination_add (&delta, &step);
4951 tree_to_aff_combination (iv->base, type, val);
4952 if (!POINTER_TYPE_P (type))
4953 aff_combination_convert (val, steptype);
4954 aff_combination_add (val, &delta);
4957 /* Returns period of induction variable iv. */
4959 static tree
4960 iv_period (struct iv *iv)
4962 tree step = iv->step, period, type;
4963 tree pow2div;
4965 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4967 type = unsigned_type_for (TREE_TYPE (step));
4968 /* Period of the iv is lcm (step, type_range)/step -1,
4969 i.e., N*type_range/step - 1. Since type range is power
4970 of two, N == (step >> num_of_ending_zeros_binary (step),
4971 so the final result is
4973 (type_range >> num_of_ending_zeros_binary (step)) - 1
4976 pow2div = num_ending_zeros (step);
4978 period = build_low_bits_mask (type,
4979 (TYPE_PRECISION (type)
4980 - tree_to_uhwi (pow2div)));
4982 return period;
4985 /* Returns the comparison operator used when eliminating the iv USE. */
4987 static enum tree_code
4988 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4990 struct loop *loop = data->current_loop;
4991 basic_block ex_bb;
4992 edge exit;
4994 ex_bb = gimple_bb (use->stmt);
4995 exit = EDGE_SUCC (ex_bb, 0);
4996 if (flow_bb_inside_loop_p (loop, exit->dest))
4997 exit = EDGE_SUCC (ex_bb, 1);
4999 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5002 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5003 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5004 calculation is performed in non-wrapping type.
5006 TODO: More generally, we could test for the situation that
5007 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5008 This would require knowing the sign of OFFSET. */
5010 static bool
5011 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5013 enum tree_code code;
5014 tree e1, e2;
5015 aff_tree aff_e1, aff_e2, aff_offset;
5017 if (!nowrap_type_p (TREE_TYPE (base)))
5018 return false;
5020 base = expand_simple_operations (base);
5022 if (TREE_CODE (base) == SSA_NAME)
5024 gimple *stmt = SSA_NAME_DEF_STMT (base);
5026 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5027 return false;
5029 code = gimple_assign_rhs_code (stmt);
5030 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5031 return false;
5033 e1 = gimple_assign_rhs1 (stmt);
5034 e2 = gimple_assign_rhs2 (stmt);
5036 else
5038 code = TREE_CODE (base);
5039 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5040 return false;
5041 e1 = TREE_OPERAND (base, 0);
5042 e2 = TREE_OPERAND (base, 1);
5045 /* Use affine expansion as deeper inspection to prove the equality. */
5046 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5047 &aff_e2, &data->name_expansion_cache);
5048 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5049 &aff_offset, &data->name_expansion_cache);
5050 aff_combination_scale (&aff_offset, -1);
5051 switch (code)
5053 case PLUS_EXPR:
5054 aff_combination_add (&aff_e2, &aff_offset);
5055 if (aff_combination_zero_p (&aff_e2))
5056 return true;
5058 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5059 &aff_e1, &data->name_expansion_cache);
5060 aff_combination_add (&aff_e1, &aff_offset);
5061 return aff_combination_zero_p (&aff_e1);
5063 case POINTER_PLUS_EXPR:
5064 aff_combination_add (&aff_e2, &aff_offset);
5065 return aff_combination_zero_p (&aff_e2);
5067 default:
5068 return false;
5072 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5073 comparison with CAND. NITER describes the number of iterations of
5074 the loops. If successful, the comparison in COMP_P is altered accordingly.
5076 We aim to handle the following situation:
5078 sometype *base, *p;
5079 int a, b, i;
5081 i = a;
5082 p = p_0 = base + a;
5086 bla (*p);
5087 p++;
5088 i++;
5090 while (i < b);
5092 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5093 We aim to optimize this to
5095 p = p_0 = base + a;
5098 bla (*p);
5099 p++;
5101 while (p < p_0 - a + b);
5103 This preserves the correctness, since the pointer arithmetics does not
5104 overflow. More precisely:
5106 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5107 overflow in computing it or the values of p.
5108 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5109 overflow. To prove this, we use the fact that p_0 = base + a. */
5111 static bool
5112 iv_elimination_compare_lt (struct ivopts_data *data,
5113 struct iv_cand *cand, enum tree_code *comp_p,
5114 struct tree_niter_desc *niter)
5116 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5117 struct aff_tree nit, tmpa, tmpb;
5118 enum tree_code comp;
5119 HOST_WIDE_INT step;
5121 /* We need to know that the candidate induction variable does not overflow.
5122 While more complex analysis may be used to prove this, for now just
5123 check that the variable appears in the original program and that it
5124 is computed in a type that guarantees no overflows. */
5125 cand_type = TREE_TYPE (cand->iv->base);
5126 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5127 return false;
5129 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5130 the calculation of the BOUND could overflow, making the comparison
5131 invalid. */
5132 if (!data->loop_single_exit_p)
5133 return false;
5135 /* We need to be able to decide whether candidate is increasing or decreasing
5136 in order to choose the right comparison operator. */
5137 if (!cst_and_fits_in_hwi (cand->iv->step))
5138 return false;
5139 step = int_cst_value (cand->iv->step);
5141 /* Check that the number of iterations matches the expected pattern:
5142 a + 1 > b ? 0 : b - a - 1. */
5143 mbz = niter->may_be_zero;
5144 if (TREE_CODE (mbz) == GT_EXPR)
5146 /* Handle a + 1 > b. */
5147 tree op0 = TREE_OPERAND (mbz, 0);
5148 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5150 a = TREE_OPERAND (op0, 0);
5151 b = TREE_OPERAND (mbz, 1);
5153 else
5154 return false;
5156 else if (TREE_CODE (mbz) == LT_EXPR)
5158 tree op1 = TREE_OPERAND (mbz, 1);
5160 /* Handle b < a + 1. */
5161 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5163 a = TREE_OPERAND (op1, 0);
5164 b = TREE_OPERAND (mbz, 0);
5166 else
5167 return false;
5169 else
5170 return false;
5172 /* Expected number of iterations is B - A - 1. Check that it matches
5173 the actual number, i.e., that B - A - NITER = 1. */
5174 tree_to_aff_combination (niter->niter, nit_type, &nit);
5175 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5176 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5177 aff_combination_scale (&nit, -1);
5178 aff_combination_scale (&tmpa, -1);
5179 aff_combination_add (&tmpb, &tmpa);
5180 aff_combination_add (&tmpb, &nit);
5181 if (tmpb.n != 0 || tmpb.offset != 1)
5182 return false;
5184 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5185 overflow. */
5186 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5187 cand->iv->step,
5188 fold_convert (TREE_TYPE (cand->iv->step), a));
5189 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5190 return false;
5192 /* Determine the new comparison operator. */
5193 comp = step < 0 ? GT_EXPR : LT_EXPR;
5194 if (*comp_p == NE_EXPR)
5195 *comp_p = comp;
5196 else if (*comp_p == EQ_EXPR)
5197 *comp_p = invert_tree_comparison (comp, false);
5198 else
5199 gcc_unreachable ();
5201 return true;
5204 /* Check whether it is possible to express the condition in USE by comparison
5205 of candidate CAND. If so, store the value compared with to BOUND, and the
5206 comparison operator to COMP. */
5208 static bool
5209 may_eliminate_iv (struct ivopts_data *data,
5210 struct iv_use *use, struct iv_cand *cand, tree *bound,
5211 enum tree_code *comp)
5213 basic_block ex_bb;
5214 edge exit;
5215 tree period;
5216 struct loop *loop = data->current_loop;
5217 aff_tree bnd;
5218 struct tree_niter_desc *desc = NULL;
5220 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5221 return false;
5223 /* For now works only for exits that dominate the loop latch.
5224 TODO: extend to other conditions inside loop body. */
5225 ex_bb = gimple_bb (use->stmt);
5226 if (use->stmt != last_stmt (ex_bb)
5227 || gimple_code (use->stmt) != GIMPLE_COND
5228 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5229 return false;
5231 exit = EDGE_SUCC (ex_bb, 0);
5232 if (flow_bb_inside_loop_p (loop, exit->dest))
5233 exit = EDGE_SUCC (ex_bb, 1);
5234 if (flow_bb_inside_loop_p (loop, exit->dest))
5235 return false;
5237 desc = niter_for_exit (data, exit);
5238 if (!desc)
5239 return false;
5241 /* Determine whether we can use the variable to test the exit condition.
5242 This is the case iff the period of the induction variable is greater
5243 than the number of iterations for which the exit condition is true. */
5244 period = iv_period (cand->iv);
5246 /* If the number of iterations is constant, compare against it directly. */
5247 if (TREE_CODE (desc->niter) == INTEGER_CST)
5249 /* See cand_value_at. */
5250 if (stmt_after_increment (loop, cand, use->stmt))
5252 if (!tree_int_cst_lt (desc->niter, period))
5253 return false;
5255 else
5257 if (tree_int_cst_lt (period, desc->niter))
5258 return false;
5262 /* If not, and if this is the only possible exit of the loop, see whether
5263 we can get a conservative estimate on the number of iterations of the
5264 entire loop and compare against that instead. */
5265 else
5267 widest_int period_value, max_niter;
5269 max_niter = desc->max;
5270 if (stmt_after_increment (loop, cand, use->stmt))
5271 max_niter += 1;
5272 period_value = wi::to_widest (period);
5273 if (wi::gtu_p (max_niter, period_value))
5275 /* See if we can take advantage of inferred loop bound information. */
5276 if (data->loop_single_exit_p)
5278 if (!max_loop_iterations (loop, &max_niter))
5279 return false;
5280 /* The loop bound is already adjusted by adding 1. */
5281 if (wi::gtu_p (max_niter, period_value))
5282 return false;
5284 else
5285 return false;
5289 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5291 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5292 aff_combination_to_tree (&bnd));
5293 *comp = iv_elimination_compare (data, use);
5295 /* It is unlikely that computing the number of iterations using division
5296 would be more profitable than keeping the original induction variable. */
5297 if (expression_expensive_p (*bound))
5298 return false;
5300 /* Sometimes, it is possible to handle the situation that the number of
5301 iterations may be zero unless additional assumtions by using <
5302 instead of != in the exit condition.
5304 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5305 base the exit condition on it. However, that is often too
5306 expensive. */
5307 if (!integer_zerop (desc->may_be_zero))
5308 return iv_elimination_compare_lt (data, cand, comp, desc);
5310 return true;
5313 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5314 be copied, if it is used in the loop body and DATA->body_includes_call. */
5316 static int
5317 parm_decl_cost (struct ivopts_data *data, tree bound)
5319 tree sbound = bound;
5320 STRIP_NOPS (sbound);
5322 if (TREE_CODE (sbound) == SSA_NAME
5323 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5324 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5325 && data->body_includes_call)
5326 return COSTS_N_INSNS (1);
5328 return 0;
5331 /* Determines cost of basing replacement of USE on CAND in a condition. */
5333 static bool
5334 determine_use_iv_cost_condition (struct ivopts_data *data,
5335 struct iv_use *use, struct iv_cand *cand)
5337 tree bound = NULL_TREE;
5338 struct iv *cmp_iv;
5339 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5340 comp_cost elim_cost, express_cost, cost, bound_cost;
5341 bool ok;
5342 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5343 tree *control_var, *bound_cst;
5344 enum tree_code comp = ERROR_MARK;
5346 /* Only consider real candidates. */
5347 if (!cand->iv)
5349 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5350 ERROR_MARK, -1);
5351 return false;
5354 /* Try iv elimination. */
5355 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5357 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5358 if (elim_cost.cost == 0)
5359 elim_cost.cost = parm_decl_cost (data, bound);
5360 else if (TREE_CODE (bound) == INTEGER_CST)
5361 elim_cost.cost = 0;
5362 /* If we replace a loop condition 'i < n' with 'p < base + n',
5363 depends_on_elim will have 'base' and 'n' set, which implies
5364 that both 'base' and 'n' will be live during the loop. More likely,
5365 'base + n' will be loop invariant, resulting in only one live value
5366 during the loop. So in that case we clear depends_on_elim and set
5367 elim_inv_expr_id instead. */
5368 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5370 elim_inv_expr_id = get_expr_id (data, bound);
5371 bitmap_clear (depends_on_elim);
5373 /* The bound is a loop invariant, so it will be only computed
5374 once. */
5375 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5377 else
5378 elim_cost = infinite_cost;
5380 /* Try expressing the original giv. If it is compared with an invariant,
5381 note that we cannot get rid of it. */
5382 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5383 NULL, &cmp_iv);
5384 gcc_assert (ok);
5386 /* When the condition is a comparison of the candidate IV against
5387 zero, prefer this IV.
5389 TODO: The constant that we're subtracting from the cost should
5390 be target-dependent. This information should be added to the
5391 target costs for each backend. */
5392 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5393 && integer_zerop (*bound_cst)
5394 && (operand_equal_p (*control_var, cand->var_after, 0)
5395 || operand_equal_p (*control_var, cand->var_before, 0)))
5396 elim_cost.cost -= 1;
5398 express_cost = get_computation_cost (data, use, cand, false,
5399 &depends_on_express, NULL,
5400 &express_inv_expr_id);
5401 fd_ivopts_data = data;
5402 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5404 /* Count the cost of the original bound as well. */
5405 bound_cost = force_var_cost (data, *bound_cst, NULL);
5406 if (bound_cost.cost == 0)
5407 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5408 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5409 bound_cost.cost = 0;
5410 express_cost.cost += bound_cost.cost;
5412 /* Choose the better approach, preferring the eliminated IV. */
5413 if (compare_costs (elim_cost, express_cost) <= 0)
5415 cost = elim_cost;
5416 depends_on = depends_on_elim;
5417 depends_on_elim = NULL;
5418 inv_expr_id = elim_inv_expr_id;
5420 else
5422 cost = express_cost;
5423 depends_on = depends_on_express;
5424 depends_on_express = NULL;
5425 bound = NULL_TREE;
5426 comp = ERROR_MARK;
5427 inv_expr_id = express_inv_expr_id;
5430 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5432 if (depends_on_elim)
5433 BITMAP_FREE (depends_on_elim);
5434 if (depends_on_express)
5435 BITMAP_FREE (depends_on_express);
5437 return !infinite_cost_p (cost);
5440 /* Determines cost of basing replacement of USE on CAND. Returns false
5441 if USE cannot be based on CAND. */
5443 static bool
5444 determine_use_iv_cost (struct ivopts_data *data,
5445 struct iv_use *use, struct iv_cand *cand)
5447 switch (use->type)
5449 case USE_NONLINEAR_EXPR:
5450 return determine_use_iv_cost_generic (data, use, cand);
5452 case USE_ADDRESS:
5453 return determine_use_iv_cost_address (data, use, cand);
5455 case USE_COMPARE:
5456 return determine_use_iv_cost_condition (data, use, cand);
5458 default:
5459 gcc_unreachable ();
5463 /* Return true if get_computation_cost indicates that autoincrement is
5464 a possibility for the pair of USE and CAND, false otherwise. */
5466 static bool
5467 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5468 struct iv_cand *cand)
5470 bitmap depends_on;
5471 bool can_autoinc;
5472 comp_cost cost;
5474 if (use->type != USE_ADDRESS)
5475 return false;
5477 cost = get_computation_cost (data, use, cand, true, &depends_on,
5478 &can_autoinc, NULL);
5480 BITMAP_FREE (depends_on);
5482 return !infinite_cost_p (cost) && can_autoinc;
5485 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5486 use that allows autoincrement, and set their AINC_USE if possible. */
5488 static void
5489 set_autoinc_for_original_candidates (struct ivopts_data *data)
5491 unsigned i, j;
5493 for (i = 0; i < n_iv_cands (data); i++)
5495 struct iv_cand *cand = iv_cand (data, i);
5496 struct iv_use *closest_before = NULL;
5497 struct iv_use *closest_after = NULL;
5498 if (cand->pos != IP_ORIGINAL)
5499 continue;
5501 for (j = 0; j < n_iv_uses (data); j++)
5503 struct iv_use *use = iv_use (data, j);
5504 unsigned uid = gimple_uid (use->stmt);
5506 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5507 continue;
5509 if (uid < gimple_uid (cand->incremented_at)
5510 && (closest_before == NULL
5511 || uid > gimple_uid (closest_before->stmt)))
5512 closest_before = use;
5514 if (uid > gimple_uid (cand->incremented_at)
5515 && (closest_after == NULL
5516 || uid < gimple_uid (closest_after->stmt)))
5517 closest_after = use;
5520 if (closest_before != NULL
5521 && autoinc_possible_for_pair (data, closest_before, cand))
5522 cand->ainc_use = closest_before;
5523 else if (closest_after != NULL
5524 && autoinc_possible_for_pair (data, closest_after, cand))
5525 cand->ainc_use = closest_after;
5529 /* Finds the candidates for the induction variables. */
5531 static void
5532 find_iv_candidates (struct ivopts_data *data)
5534 /* Add commonly used ivs. */
5535 add_standard_iv_candidates (data);
5537 /* Add old induction variables. */
5538 add_iv_candidate_for_bivs (data);
5540 /* Add induction variables derived from uses. */
5541 add_iv_candidate_for_uses (data);
5543 set_autoinc_for_original_candidates (data);
5545 /* Record the important candidates. */
5546 record_important_candidates (data);
5549 /* Determines costs of basing the use of the iv on an iv candidate. */
5551 static void
5552 determine_use_iv_costs (struct ivopts_data *data)
5554 unsigned i, j;
5555 struct iv_use *use;
5556 struct iv_cand *cand;
5557 bitmap to_clear = BITMAP_ALLOC (NULL);
5559 alloc_use_cost_map (data);
5561 for (i = 0; i < n_iv_uses (data); i++)
5563 use = iv_use (data, i);
5565 if (data->consider_all_candidates)
5567 for (j = 0; j < n_iv_cands (data); j++)
5569 cand = iv_cand (data, j);
5570 determine_use_iv_cost (data, use, cand);
5573 else
5575 bitmap_iterator bi;
5577 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5579 cand = iv_cand (data, j);
5580 if (!determine_use_iv_cost (data, use, cand))
5581 bitmap_set_bit (to_clear, j);
5584 /* Remove the candidates for that the cost is infinite from
5585 the list of related candidates. */
5586 bitmap_and_compl_into (use->related_cands, to_clear);
5587 bitmap_clear (to_clear);
5591 BITMAP_FREE (to_clear);
5593 if (dump_file && (dump_flags & TDF_DETAILS))
5595 fprintf (dump_file, "Use-candidate costs:\n");
5597 for (i = 0; i < n_iv_uses (data); i++)
5599 use = iv_use (data, i);
5601 fprintf (dump_file, "Use %d:\n", i);
5602 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5603 for (j = 0; j < use->n_map_members; j++)
5605 if (!use->cost_map[j].cand
5606 || infinite_cost_p (use->cost_map[j].cost))
5607 continue;
5609 fprintf (dump_file, " %d\t%d\t%d\t",
5610 use->cost_map[j].cand->id,
5611 use->cost_map[j].cost.cost,
5612 use->cost_map[j].cost.complexity);
5613 if (use->cost_map[j].depends_on)
5614 bitmap_print (dump_file,
5615 use->cost_map[j].depends_on, "","");
5616 if (use->cost_map[j].inv_expr_id != -1)
5617 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5618 fprintf (dump_file, "\n");
5621 fprintf (dump_file, "\n");
5623 fprintf (dump_file, "\n");
5627 /* Determines cost of the candidate CAND. */
5629 static void
5630 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5632 comp_cost cost_base;
5633 unsigned cost, cost_step;
5634 tree base;
5636 if (!cand->iv)
5638 cand->cost = 0;
5639 return;
5642 /* There are two costs associated with the candidate -- its increment
5643 and its initialization. The second is almost negligible for any loop
5644 that rolls enough, so we take it just very little into account. */
5646 base = cand->iv->base;
5647 cost_base = force_var_cost (data, base, NULL);
5648 /* It will be exceptional that the iv register happens to be initialized with
5649 the proper value at no cost. In general, there will at least be a regcopy
5650 or a const set. */
5651 if (cost_base.cost == 0)
5652 cost_base.cost = COSTS_N_INSNS (1);
5653 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5655 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5657 /* Prefer the original ivs unless we may gain something by replacing it.
5658 The reason is to make debugging simpler; so this is not relevant for
5659 artificial ivs created by other optimization passes. */
5660 if (cand->pos != IP_ORIGINAL
5661 || !SSA_NAME_VAR (cand->var_before)
5662 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5663 cost++;
5665 /* Prefer not to insert statements into latch unless there are some
5666 already (so that we do not create unnecessary jumps). */
5667 if (cand->pos == IP_END
5668 && empty_block_p (ip_end_pos (data->current_loop)))
5669 cost++;
5671 cand->cost = cost;
5672 cand->cost_step = cost_step;
5675 /* Determines costs of computation of the candidates. */
5677 static void
5678 determine_iv_costs (struct ivopts_data *data)
5680 unsigned i;
5682 if (dump_file && (dump_flags & TDF_DETAILS))
5684 fprintf (dump_file, "Candidate costs:\n");
5685 fprintf (dump_file, " cand\tcost\n");
5688 for (i = 0; i < n_iv_cands (data); i++)
5690 struct iv_cand *cand = iv_cand (data, i);
5692 determine_iv_cost (data, cand);
5694 if (dump_file && (dump_flags & TDF_DETAILS))
5695 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5698 if (dump_file && (dump_flags & TDF_DETAILS))
5699 fprintf (dump_file, "\n");
5702 /* Calculates cost for having SIZE induction variables. */
5704 static unsigned
5705 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5707 /* We add size to the cost, so that we prefer eliminating ivs
5708 if possible. */
5709 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5710 data->body_includes_call);
5713 /* For each size of the induction variable set determine the penalty. */
5715 static void
5716 determine_set_costs (struct ivopts_data *data)
5718 unsigned j, n;
5719 gphi *phi;
5720 gphi_iterator psi;
5721 tree op;
5722 struct loop *loop = data->current_loop;
5723 bitmap_iterator bi;
5725 if (dump_file && (dump_flags & TDF_DETAILS))
5727 fprintf (dump_file, "Global costs:\n");
5728 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5729 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5730 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5731 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5734 n = 0;
5735 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5737 phi = psi.phi ();
5738 op = PHI_RESULT (phi);
5740 if (virtual_operand_p (op))
5741 continue;
5743 if (get_iv (data, op))
5744 continue;
5746 n++;
5749 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5751 struct version_info *info = ver_info (data, j);
5753 if (info->inv_id && info->has_nonlin_use)
5754 n++;
5757 data->regs_used = n;
5758 if (dump_file && (dump_flags & TDF_DETAILS))
5759 fprintf (dump_file, " regs_used %d\n", n);
5761 if (dump_file && (dump_flags & TDF_DETAILS))
5763 fprintf (dump_file, " cost for size:\n");
5764 fprintf (dump_file, " ivs\tcost\n");
5765 for (j = 0; j <= 2 * target_avail_regs; j++)
5766 fprintf (dump_file, " %d\t%d\n", j,
5767 ivopts_global_cost_for_size (data, j));
5768 fprintf (dump_file, "\n");
5772 /* Returns true if A is a cheaper cost pair than B. */
5774 static bool
5775 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5777 int cmp;
5779 if (!a)
5780 return false;
5782 if (!b)
5783 return true;
5785 cmp = compare_costs (a->cost, b->cost);
5786 if (cmp < 0)
5787 return true;
5789 if (cmp > 0)
5790 return false;
5792 /* In case the costs are the same, prefer the cheaper candidate. */
5793 if (a->cand->cost < b->cand->cost)
5794 return true;
5796 return false;
5800 /* Returns candidate by that USE is expressed in IVS. */
5802 static struct cost_pair *
5803 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5805 return ivs->cand_for_use[use->id];
5808 /* Computes the cost field of IVS structure. */
5810 static void
5811 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5813 comp_cost cost = ivs->cand_use_cost;
5815 cost.cost += ivs->cand_cost;
5817 cost.cost += ivopts_global_cost_for_size (data,
5818 ivs->n_regs + ivs->num_used_inv_expr);
5820 ivs->cost = cost;
5823 /* Remove invariants in set INVS to set IVS. */
5825 static void
5826 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5828 bitmap_iterator bi;
5829 unsigned iid;
5831 if (!invs)
5832 return;
5834 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5836 ivs->n_invariant_uses[iid]--;
5837 if (ivs->n_invariant_uses[iid] == 0)
5838 ivs->n_regs--;
5842 /* Set USE not to be expressed by any candidate in IVS. */
5844 static void
5845 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5846 struct iv_use *use)
5848 unsigned uid = use->id, cid;
5849 struct cost_pair *cp;
5851 cp = ivs->cand_for_use[uid];
5852 if (!cp)
5853 return;
5854 cid = cp->cand->id;
5856 ivs->bad_uses++;
5857 ivs->cand_for_use[uid] = NULL;
5858 ivs->n_cand_uses[cid]--;
5860 if (ivs->n_cand_uses[cid] == 0)
5862 bitmap_clear_bit (ivs->cands, cid);
5863 /* Do not count the pseudocandidates. */
5864 if (cp->cand->iv)
5865 ivs->n_regs--;
5866 ivs->n_cands--;
5867 ivs->cand_cost -= cp->cand->cost;
5869 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5872 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5874 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5876 if (cp->inv_expr_id != -1)
5878 ivs->used_inv_expr[cp->inv_expr_id]--;
5879 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5880 ivs->num_used_inv_expr--;
5882 iv_ca_recount_cost (data, ivs);
5885 /* Add invariants in set INVS to set IVS. */
5887 static void
5888 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5890 bitmap_iterator bi;
5891 unsigned iid;
5893 if (!invs)
5894 return;
5896 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5898 ivs->n_invariant_uses[iid]++;
5899 if (ivs->n_invariant_uses[iid] == 1)
5900 ivs->n_regs++;
5904 /* Set cost pair for USE in set IVS to CP. */
5906 static void
5907 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5908 struct iv_use *use, struct cost_pair *cp)
5910 unsigned uid = use->id, cid;
5912 if (ivs->cand_for_use[uid] == cp)
5913 return;
5915 if (ivs->cand_for_use[uid])
5916 iv_ca_set_no_cp (data, ivs, use);
5918 if (cp)
5920 cid = cp->cand->id;
5922 ivs->bad_uses--;
5923 ivs->cand_for_use[uid] = cp;
5924 ivs->n_cand_uses[cid]++;
5925 if (ivs->n_cand_uses[cid] == 1)
5927 bitmap_set_bit (ivs->cands, cid);
5928 /* Do not count the pseudocandidates. */
5929 if (cp->cand->iv)
5930 ivs->n_regs++;
5931 ivs->n_cands++;
5932 ivs->cand_cost += cp->cand->cost;
5934 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5937 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5938 iv_ca_set_add_invariants (ivs, cp->depends_on);
5940 if (cp->inv_expr_id != -1)
5942 ivs->used_inv_expr[cp->inv_expr_id]++;
5943 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5944 ivs->num_used_inv_expr++;
5946 iv_ca_recount_cost (data, ivs);
5950 /* Extend set IVS by expressing USE by some of the candidates in it
5951 if possible. Consider all important candidates if candidates in
5952 set IVS don't give any result. */
5954 static void
5955 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5956 struct iv_use *use)
5958 struct cost_pair *best_cp = NULL, *cp;
5959 bitmap_iterator bi;
5960 unsigned i;
5961 struct iv_cand *cand;
5963 gcc_assert (ivs->upto >= use->id);
5964 ivs->upto++;
5965 ivs->bad_uses++;
5967 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5969 cand = iv_cand (data, i);
5970 cp = get_use_iv_cost (data, use, cand);
5971 if (cheaper_cost_pair (cp, best_cp))
5972 best_cp = cp;
5975 if (best_cp == NULL)
5977 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5979 cand = iv_cand (data, i);
5980 cp = get_use_iv_cost (data, use, cand);
5981 if (cheaper_cost_pair (cp, best_cp))
5982 best_cp = cp;
5986 iv_ca_set_cp (data, ivs, use, best_cp);
5989 /* Get cost for assignment IVS. */
5991 static comp_cost
5992 iv_ca_cost (struct iv_ca *ivs)
5994 /* This was a conditional expression but it triggered a bug in
5995 Sun C 5.5. */
5996 if (ivs->bad_uses)
5997 return infinite_cost;
5998 else
5999 return ivs->cost;
6002 /* Returns true if all dependences of CP are among invariants in IVS. */
6004 static bool
6005 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6007 unsigned i;
6008 bitmap_iterator bi;
6010 if (!cp->depends_on)
6011 return true;
6013 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6015 if (ivs->n_invariant_uses[i] == 0)
6016 return false;
6019 return true;
6022 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6023 it before NEXT_CHANGE. */
6025 static struct iv_ca_delta *
6026 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6027 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6029 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6031 change->use = use;
6032 change->old_cp = old_cp;
6033 change->new_cp = new_cp;
6034 change->next_change = next_change;
6036 return change;
6039 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6040 are rewritten. */
6042 static struct iv_ca_delta *
6043 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6045 struct iv_ca_delta *last;
6047 if (!l2)
6048 return l1;
6050 if (!l1)
6051 return l2;
6053 for (last = l1; last->next_change; last = last->next_change)
6054 continue;
6055 last->next_change = l2;
6057 return l1;
6060 /* Reverse the list of changes DELTA, forming the inverse to it. */
6062 static struct iv_ca_delta *
6063 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6065 struct iv_ca_delta *act, *next, *prev = NULL;
6067 for (act = delta; act; act = next)
6069 next = act->next_change;
6070 act->next_change = prev;
6071 prev = act;
6073 std::swap (act->old_cp, act->new_cp);
6076 return prev;
6079 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6080 reverted instead. */
6082 static void
6083 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6084 struct iv_ca_delta *delta, bool forward)
6086 struct cost_pair *from, *to;
6087 struct iv_ca_delta *act;
6089 if (!forward)
6090 delta = iv_ca_delta_reverse (delta);
6092 for (act = delta; act; act = act->next_change)
6094 from = act->old_cp;
6095 to = act->new_cp;
6096 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6097 iv_ca_set_cp (data, ivs, act->use, to);
6100 if (!forward)
6101 iv_ca_delta_reverse (delta);
6104 /* Returns true if CAND is used in IVS. */
6106 static bool
6107 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6109 return ivs->n_cand_uses[cand->id] > 0;
6112 /* Returns number of induction variable candidates in the set IVS. */
6114 static unsigned
6115 iv_ca_n_cands (struct iv_ca *ivs)
6117 return ivs->n_cands;
6120 /* Free the list of changes DELTA. */
6122 static void
6123 iv_ca_delta_free (struct iv_ca_delta **delta)
6125 struct iv_ca_delta *act, *next;
6127 for (act = *delta; act; act = next)
6129 next = act->next_change;
6130 free (act);
6133 *delta = NULL;
6136 /* Allocates new iv candidates assignment. */
6138 static struct iv_ca *
6139 iv_ca_new (struct ivopts_data *data)
6141 struct iv_ca *nw = XNEW (struct iv_ca);
6143 nw->upto = 0;
6144 nw->bad_uses = 0;
6145 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6146 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6147 nw->cands = BITMAP_ALLOC (NULL);
6148 nw->n_cands = 0;
6149 nw->n_regs = 0;
6150 nw->cand_use_cost = no_cost;
6151 nw->cand_cost = 0;
6152 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6153 nw->cost = no_cost;
6154 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6155 nw->num_used_inv_expr = 0;
6157 return nw;
6160 /* Free memory occupied by the set IVS. */
6162 static void
6163 iv_ca_free (struct iv_ca **ivs)
6165 free ((*ivs)->cand_for_use);
6166 free ((*ivs)->n_cand_uses);
6167 BITMAP_FREE ((*ivs)->cands);
6168 free ((*ivs)->n_invariant_uses);
6169 free ((*ivs)->used_inv_expr);
6170 free (*ivs);
6171 *ivs = NULL;
6174 /* Dumps IVS to FILE. */
6176 static void
6177 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6179 const char *pref = " invariants ";
6180 unsigned i;
6181 comp_cost cost = iv_ca_cost (ivs);
6183 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6184 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6185 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6186 bitmap_print (file, ivs->cands, " candidates: ","\n");
6188 for (i = 0; i < ivs->upto; i++)
6190 struct iv_use *use = iv_use (data, i);
6191 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6192 if (cp)
6193 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6194 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6195 else
6196 fprintf (file, " use:%d --> ??\n", use->id);
6199 for (i = 1; i <= data->max_inv_id; i++)
6200 if (ivs->n_invariant_uses[i])
6202 fprintf (file, "%s%d", pref, i);
6203 pref = ", ";
6205 fprintf (file, "\n\n");
6208 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6209 new set, and store differences in DELTA. Number of induction variables
6210 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6211 the function will try to find a solution with mimimal iv candidates. */
6213 static comp_cost
6214 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6215 struct iv_cand *cand, struct iv_ca_delta **delta,
6216 unsigned *n_ivs, bool min_ncand)
6218 unsigned i;
6219 comp_cost cost;
6220 struct iv_use *use;
6221 struct cost_pair *old_cp, *new_cp;
6223 *delta = NULL;
6224 for (i = 0; i < ivs->upto; i++)
6226 use = iv_use (data, i);
6227 old_cp = iv_ca_cand_for_use (ivs, use);
6229 if (old_cp
6230 && old_cp->cand == cand)
6231 continue;
6233 new_cp = get_use_iv_cost (data, use, cand);
6234 if (!new_cp)
6235 continue;
6237 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6238 continue;
6240 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6241 continue;
6243 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6246 iv_ca_delta_commit (data, ivs, *delta, true);
6247 cost = iv_ca_cost (ivs);
6248 if (n_ivs)
6249 *n_ivs = iv_ca_n_cands (ivs);
6250 iv_ca_delta_commit (data, ivs, *delta, false);
6252 return cost;
6255 /* Try narrowing set IVS by removing CAND. Return the cost of
6256 the new set and store the differences in DELTA. START is
6257 the candidate with which we start narrowing. */
6259 static comp_cost
6260 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6261 struct iv_cand *cand, struct iv_cand *start,
6262 struct iv_ca_delta **delta)
6264 unsigned i, ci;
6265 struct iv_use *use;
6266 struct cost_pair *old_cp, *new_cp, *cp;
6267 bitmap_iterator bi;
6268 struct iv_cand *cnd;
6269 comp_cost cost, best_cost, acost;
6271 *delta = NULL;
6272 for (i = 0; i < n_iv_uses (data); i++)
6274 use = iv_use (data, i);
6276 old_cp = iv_ca_cand_for_use (ivs, use);
6277 if (old_cp->cand != cand)
6278 continue;
6280 best_cost = iv_ca_cost (ivs);
6281 /* Start narrowing with START. */
6282 new_cp = get_use_iv_cost (data, use, start);
6284 if (data->consider_all_candidates)
6286 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6288 if (ci == cand->id || (start && ci == start->id))
6289 continue;
6291 cnd = iv_cand (data, ci);
6293 cp = get_use_iv_cost (data, use, cnd);
6294 if (!cp)
6295 continue;
6297 iv_ca_set_cp (data, ivs, use, cp);
6298 acost = iv_ca_cost (ivs);
6300 if (compare_costs (acost, best_cost) < 0)
6302 best_cost = acost;
6303 new_cp = cp;
6307 else
6309 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6311 if (ci == cand->id || (start && ci == start->id))
6312 continue;
6314 cnd = iv_cand (data, ci);
6316 cp = get_use_iv_cost (data, use, cnd);
6317 if (!cp)
6318 continue;
6320 iv_ca_set_cp (data, ivs, use, cp);
6321 acost = iv_ca_cost (ivs);
6323 if (compare_costs (acost, best_cost) < 0)
6325 best_cost = acost;
6326 new_cp = cp;
6330 /* Restore to old cp for use. */
6331 iv_ca_set_cp (data, ivs, use, old_cp);
6333 if (!new_cp)
6335 iv_ca_delta_free (delta);
6336 return infinite_cost;
6339 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6342 iv_ca_delta_commit (data, ivs, *delta, true);
6343 cost = iv_ca_cost (ivs);
6344 iv_ca_delta_commit (data, ivs, *delta, false);
6346 return cost;
6349 /* Try optimizing the set of candidates IVS by removing candidates different
6350 from to EXCEPT_CAND from it. Return cost of the new set, and store
6351 differences in DELTA. */
6353 static comp_cost
6354 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6355 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6357 bitmap_iterator bi;
6358 struct iv_ca_delta *act_delta, *best_delta;
6359 unsigned i;
6360 comp_cost best_cost, acost;
6361 struct iv_cand *cand;
6363 best_delta = NULL;
6364 best_cost = iv_ca_cost (ivs);
6366 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6368 cand = iv_cand (data, i);
6370 if (cand == except_cand)
6371 continue;
6373 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6375 if (compare_costs (acost, best_cost) < 0)
6377 best_cost = acost;
6378 iv_ca_delta_free (&best_delta);
6379 best_delta = act_delta;
6381 else
6382 iv_ca_delta_free (&act_delta);
6385 if (!best_delta)
6387 *delta = NULL;
6388 return best_cost;
6391 /* Recurse to possibly remove other unnecessary ivs. */
6392 iv_ca_delta_commit (data, ivs, best_delta, true);
6393 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6394 iv_ca_delta_commit (data, ivs, best_delta, false);
6395 *delta = iv_ca_delta_join (best_delta, *delta);
6396 return best_cost;
6399 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6400 cheaper local cost for USE than BEST_CP. Return pointer to
6401 the corresponding cost_pair, otherwise just return BEST_CP. */
6403 static struct cost_pair*
6404 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6405 unsigned int cand_idx, struct iv_cand *old_cand,
6406 struct cost_pair *best_cp)
6408 struct iv_cand *cand;
6409 struct cost_pair *cp;
6411 gcc_assert (old_cand != NULL && best_cp != NULL);
6412 if (cand_idx == old_cand->id)
6413 return best_cp;
6415 cand = iv_cand (data, cand_idx);
6416 cp = get_use_iv_cost (data, use, cand);
6417 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6418 return cp;
6420 return best_cp;
6423 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6424 which are used by more than one iv uses. For each of those candidates,
6425 this function tries to represent iv uses under that candidate using
6426 other ones with lower local cost, then tries to prune the new set.
6427 If the new set has lower cost, It returns the new cost after recording
6428 candidate replacement in list DELTA. */
6430 static comp_cost
6431 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6432 struct iv_ca_delta **delta)
6434 bitmap_iterator bi, bj;
6435 unsigned int i, j, k;
6436 struct iv_use *use;
6437 struct iv_cand *cand;
6438 comp_cost orig_cost, acost;
6439 struct iv_ca_delta *act_delta, *tmp_delta;
6440 struct cost_pair *old_cp, *best_cp = NULL;
6442 *delta = NULL;
6443 orig_cost = iv_ca_cost (ivs);
6445 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6447 if (ivs->n_cand_uses[i] == 1
6448 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6449 continue;
6451 cand = iv_cand (data, i);
6453 act_delta = NULL;
6454 /* Represent uses under current candidate using other ones with
6455 lower local cost. */
6456 for (j = 0; j < ivs->upto; j++)
6458 use = iv_use (data, j);
6459 old_cp = iv_ca_cand_for_use (ivs, use);
6461 if (old_cp->cand != cand)
6462 continue;
6464 best_cp = old_cp;
6465 if (data->consider_all_candidates)
6466 for (k = 0; k < n_iv_cands (data); k++)
6467 best_cp = cheaper_cost_with_cand (data, use, k,
6468 old_cp->cand, best_cp);
6469 else
6470 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6471 best_cp = cheaper_cost_with_cand (data, use, k,
6472 old_cp->cand, best_cp);
6474 if (best_cp == old_cp)
6475 continue;
6477 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6479 /* No need for further prune. */
6480 if (!act_delta)
6481 continue;
6483 /* Prune the new candidate set. */
6484 iv_ca_delta_commit (data, ivs, act_delta, true);
6485 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6486 iv_ca_delta_commit (data, ivs, act_delta, false);
6487 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6489 if (compare_costs (acost, orig_cost) < 0)
6491 *delta = act_delta;
6492 return acost;
6494 else
6495 iv_ca_delta_free (&act_delta);
6498 return orig_cost;
6501 /* Tries to extend the sets IVS in the best possible way in order
6502 to express the USE. If ORIGINALP is true, prefer candidates from
6503 the original set of IVs, otherwise favor important candidates not
6504 based on any memory object. */
6506 static bool
6507 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6508 struct iv_use *use, bool originalp)
6510 comp_cost best_cost, act_cost;
6511 unsigned i;
6512 bitmap_iterator bi;
6513 struct iv_cand *cand;
6514 struct iv_ca_delta *best_delta = NULL, *act_delta;
6515 struct cost_pair *cp;
6517 iv_ca_add_use (data, ivs, use);
6518 best_cost = iv_ca_cost (ivs);
6519 cp = iv_ca_cand_for_use (ivs, use);
6520 if (cp)
6522 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6523 iv_ca_set_no_cp (data, ivs, use);
6526 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6527 first try important candidates not based on any memory object. Only if
6528 this fails, try the specific ones. Rationale -- in loops with many
6529 variables the best choice often is to use just one generic biv. If we
6530 added here many ivs specific to the uses, the optimization algorithm later
6531 would be likely to get stuck in a local minimum, thus causing us to create
6532 too many ivs. The approach from few ivs to more seems more likely to be
6533 successful -- starting from few ivs, replacing an expensive use by a
6534 specific iv should always be a win. */
6535 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6537 cand = iv_cand (data, i);
6539 if (originalp && cand->pos !=IP_ORIGINAL)
6540 continue;
6542 if (!originalp && cand->iv->base_object != NULL_TREE)
6543 continue;
6545 if (iv_ca_cand_used_p (ivs, cand))
6546 continue;
6548 cp = get_use_iv_cost (data, use, cand);
6549 if (!cp)
6550 continue;
6552 iv_ca_set_cp (data, ivs, use, cp);
6553 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6554 true);
6555 iv_ca_set_no_cp (data, ivs, use);
6556 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6558 if (compare_costs (act_cost, best_cost) < 0)
6560 best_cost = act_cost;
6562 iv_ca_delta_free (&best_delta);
6563 best_delta = act_delta;
6565 else
6566 iv_ca_delta_free (&act_delta);
6569 if (infinite_cost_p (best_cost))
6571 for (i = 0; i < use->n_map_members; i++)
6573 cp = use->cost_map + i;
6574 cand = cp->cand;
6575 if (!cand)
6576 continue;
6578 /* Already tried this. */
6579 if (cand->important)
6581 if (originalp && cand->pos == IP_ORIGINAL)
6582 continue;
6583 if (!originalp && cand->iv->base_object == NULL_TREE)
6584 continue;
6587 if (iv_ca_cand_used_p (ivs, cand))
6588 continue;
6590 act_delta = NULL;
6591 iv_ca_set_cp (data, ivs, use, cp);
6592 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6593 iv_ca_set_no_cp (data, ivs, use);
6594 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6595 cp, act_delta);
6597 if (compare_costs (act_cost, best_cost) < 0)
6599 best_cost = act_cost;
6601 if (best_delta)
6602 iv_ca_delta_free (&best_delta);
6603 best_delta = act_delta;
6605 else
6606 iv_ca_delta_free (&act_delta);
6610 iv_ca_delta_commit (data, ivs, best_delta, true);
6611 iv_ca_delta_free (&best_delta);
6613 return !infinite_cost_p (best_cost);
6616 /* Finds an initial assignment of candidates to uses. */
6618 static struct iv_ca *
6619 get_initial_solution (struct ivopts_data *data, bool originalp)
6621 struct iv_ca *ivs = iv_ca_new (data);
6622 unsigned i;
6624 for (i = 0; i < n_iv_uses (data); i++)
6625 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6627 iv_ca_free (&ivs);
6628 return NULL;
6631 return ivs;
6634 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6635 points to a bool variable, this function tries to break local
6636 optimal fixed-point by replacing candidates in IVS if it's true. */
6638 static bool
6639 try_improve_iv_set (struct ivopts_data *data,
6640 struct iv_ca *ivs, bool *try_replace_p)
6642 unsigned i, n_ivs;
6643 comp_cost acost, best_cost = iv_ca_cost (ivs);
6644 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6645 struct iv_cand *cand;
6647 /* Try extending the set of induction variables by one. */
6648 for (i = 0; i < n_iv_cands (data); i++)
6650 cand = iv_cand (data, i);
6652 if (iv_ca_cand_used_p (ivs, cand))
6653 continue;
6655 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6656 if (!act_delta)
6657 continue;
6659 /* If we successfully added the candidate and the set is small enough,
6660 try optimizing it by removing other candidates. */
6661 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6663 iv_ca_delta_commit (data, ivs, act_delta, true);
6664 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6665 iv_ca_delta_commit (data, ivs, act_delta, false);
6666 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6669 if (compare_costs (acost, best_cost) < 0)
6671 best_cost = acost;
6672 iv_ca_delta_free (&best_delta);
6673 best_delta = act_delta;
6675 else
6676 iv_ca_delta_free (&act_delta);
6679 if (!best_delta)
6681 /* Try removing the candidates from the set instead. */
6682 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6684 if (!best_delta && *try_replace_p)
6686 *try_replace_p = false;
6687 /* So far candidate selecting algorithm tends to choose fewer IVs
6688 so that it can handle cases in which loops have many variables
6689 but the best choice is often to use only one general biv. One
6690 weakness is it can't handle opposite cases, in which different
6691 candidates should be chosen with respect to each use. To solve
6692 the problem, we replace candidates in a manner described by the
6693 comments of iv_ca_replace, thus give general algorithm a chance
6694 to break local optimal fixed-point in these cases. */
6695 best_cost = iv_ca_replace (data, ivs, &best_delta);
6698 if (!best_delta)
6699 return false;
6702 iv_ca_delta_commit (data, ivs, best_delta, true);
6703 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6704 iv_ca_delta_free (&best_delta);
6705 return true;
6708 /* Attempts to find the optimal set of induction variables. We do simple
6709 greedy heuristic -- we try to replace at most one candidate in the selected
6710 solution and remove the unused ivs while this improves the cost. */
6712 static struct iv_ca *
6713 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6715 struct iv_ca *set;
6716 bool try_replace_p = true;
6718 /* Get the initial solution. */
6719 set = get_initial_solution (data, originalp);
6720 if (!set)
6722 if (dump_file && (dump_flags & TDF_DETAILS))
6723 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6724 return NULL;
6727 if (dump_file && (dump_flags & TDF_DETAILS))
6729 fprintf (dump_file, "Initial set of candidates:\n");
6730 iv_ca_dump (data, dump_file, set);
6733 while (try_improve_iv_set (data, set, &try_replace_p))
6735 if (dump_file && (dump_flags & TDF_DETAILS))
6737 fprintf (dump_file, "Improved to:\n");
6738 iv_ca_dump (data, dump_file, set);
6742 return set;
6745 static struct iv_ca *
6746 find_optimal_iv_set (struct ivopts_data *data)
6748 unsigned i;
6749 struct iv_ca *set, *origset;
6750 struct iv_use *use;
6751 comp_cost cost, origcost;
6753 /* Determine the cost based on a strategy that starts with original IVs,
6754 and try again using a strategy that prefers candidates not based
6755 on any IVs. */
6756 origset = find_optimal_iv_set_1 (data, true);
6757 set = find_optimal_iv_set_1 (data, false);
6759 if (!origset && !set)
6760 return NULL;
6762 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6763 cost = set ? iv_ca_cost (set) : infinite_cost;
6765 if (dump_file && (dump_flags & TDF_DETAILS))
6767 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6768 origcost.cost, origcost.complexity);
6769 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6770 cost.cost, cost.complexity);
6773 /* Choose the one with the best cost. */
6774 if (compare_costs (origcost, cost) <= 0)
6776 if (set)
6777 iv_ca_free (&set);
6778 set = origset;
6780 else if (origset)
6781 iv_ca_free (&origset);
6783 for (i = 0; i < n_iv_uses (data); i++)
6785 use = iv_use (data, i);
6786 use->selected = iv_ca_cand_for_use (set, use)->cand;
6789 return set;
6792 /* Creates a new induction variable corresponding to CAND. */
6794 static void
6795 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6797 gimple_stmt_iterator incr_pos;
6798 tree base;
6799 bool after = false;
6801 if (!cand->iv)
6802 return;
6804 switch (cand->pos)
6806 case IP_NORMAL:
6807 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6808 break;
6810 case IP_END:
6811 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6812 after = true;
6813 break;
6815 case IP_AFTER_USE:
6816 after = true;
6817 /* fall through */
6818 case IP_BEFORE_USE:
6819 incr_pos = gsi_for_stmt (cand->incremented_at);
6820 break;
6822 case IP_ORIGINAL:
6823 /* Mark that the iv is preserved. */
6824 name_info (data, cand->var_before)->preserve_biv = true;
6825 name_info (data, cand->var_after)->preserve_biv = true;
6827 /* Rewrite the increment so that it uses var_before directly. */
6828 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6829 return;
6832 gimple_add_tmp_var (cand->var_before);
6834 base = unshare_expr (cand->iv->base);
6836 create_iv (base, unshare_expr (cand->iv->step),
6837 cand->var_before, data->current_loop,
6838 &incr_pos, after, &cand->var_before, &cand->var_after);
6841 /* Creates new induction variables described in SET. */
6843 static void
6844 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6846 unsigned i;
6847 struct iv_cand *cand;
6848 bitmap_iterator bi;
6850 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6852 cand = iv_cand (data, i);
6853 create_new_iv (data, cand);
6856 if (dump_file && (dump_flags & TDF_DETAILS))
6858 fprintf (dump_file, "Selected IV set for loop %d",
6859 data->current_loop->num);
6860 if (data->loop_loc != UNKNOWN_LOCATION)
6861 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6862 LOCATION_LINE (data->loop_loc));
6863 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6864 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6866 cand = iv_cand (data, i);
6867 dump_cand (dump_file, cand);
6869 fprintf (dump_file, "\n");
6873 /* Rewrites USE (definition of iv used in a nonlinear expression)
6874 using candidate CAND. */
6876 static void
6877 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6878 struct iv_use *use, struct iv_cand *cand)
6880 tree comp;
6881 tree op, tgt;
6882 gassign *ass;
6883 gimple_stmt_iterator bsi;
6885 /* An important special case -- if we are asked to express value of
6886 the original iv by itself, just exit; there is no need to
6887 introduce a new computation (that might also need casting the
6888 variable to unsigned and back). */
6889 if (cand->pos == IP_ORIGINAL
6890 && cand->incremented_at == use->stmt)
6892 enum tree_code stmt_code;
6894 gcc_assert (is_gimple_assign (use->stmt));
6895 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6897 /* Check whether we may leave the computation unchanged.
6898 This is the case only if it does not rely on other
6899 computations in the loop -- otherwise, the computation
6900 we rely upon may be removed in remove_unused_ivs,
6901 thus leading to ICE. */
6902 stmt_code = gimple_assign_rhs_code (use->stmt);
6903 if (stmt_code == PLUS_EXPR
6904 || stmt_code == MINUS_EXPR
6905 || stmt_code == POINTER_PLUS_EXPR)
6907 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6908 op = gimple_assign_rhs2 (use->stmt);
6909 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6910 op = gimple_assign_rhs1 (use->stmt);
6911 else
6912 op = NULL_TREE;
6914 else
6915 op = NULL_TREE;
6917 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6918 return;
6921 comp = get_computation (data->current_loop, use, cand);
6922 gcc_assert (comp != NULL_TREE);
6924 switch (gimple_code (use->stmt))
6926 case GIMPLE_PHI:
6927 tgt = PHI_RESULT (use->stmt);
6929 /* If we should keep the biv, do not replace it. */
6930 if (name_info (data, tgt)->preserve_biv)
6931 return;
6933 bsi = gsi_after_labels (gimple_bb (use->stmt));
6934 break;
6936 case GIMPLE_ASSIGN:
6937 tgt = gimple_assign_lhs (use->stmt);
6938 bsi = gsi_for_stmt (use->stmt);
6939 break;
6941 default:
6942 gcc_unreachable ();
6945 if (!valid_gimple_rhs_p (comp)
6946 || (gimple_code (use->stmt) != GIMPLE_PHI
6947 /* We can't allow re-allocating the stmt as it might be pointed
6948 to still. */
6949 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6950 >= gimple_num_ops (gsi_stmt (bsi)))))
6952 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6953 true, GSI_SAME_STMT);
6954 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6956 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6957 /* As this isn't a plain copy we have to reset alignment
6958 information. */
6959 if (SSA_NAME_PTR_INFO (comp))
6960 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6964 if (gimple_code (use->stmt) == GIMPLE_PHI)
6966 ass = gimple_build_assign (tgt, comp);
6967 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6969 bsi = gsi_for_stmt (use->stmt);
6970 remove_phi_node (&bsi, false);
6972 else
6974 gimple_assign_set_rhs_from_tree (&bsi, comp);
6975 use->stmt = gsi_stmt (bsi);
6979 /* Performs a peephole optimization to reorder the iv update statement with
6980 a mem ref to enable instruction combining in later phases. The mem ref uses
6981 the iv value before the update, so the reordering transformation requires
6982 adjustment of the offset. CAND is the selected IV_CAND.
6984 Example:
6986 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6987 iv2 = iv1 + 1;
6989 if (t < val) (1)
6990 goto L;
6991 goto Head;
6994 directly propagating t over to (1) will introduce overlapping live range
6995 thus increase register pressure. This peephole transform it into:
6998 iv2 = iv1 + 1;
6999 t = MEM_REF (base, iv2, 8, 8);
7000 if (t < val)
7001 goto L;
7002 goto Head;
7005 static void
7006 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7008 tree var_after;
7009 gimple *iv_update, *stmt;
7010 basic_block bb;
7011 gimple_stmt_iterator gsi, gsi_iv;
7013 if (cand->pos != IP_NORMAL)
7014 return;
7016 var_after = cand->var_after;
7017 iv_update = SSA_NAME_DEF_STMT (var_after);
7019 bb = gimple_bb (iv_update);
7020 gsi = gsi_last_nondebug_bb (bb);
7021 stmt = gsi_stmt (gsi);
7023 /* Only handle conditional statement for now. */
7024 if (gimple_code (stmt) != GIMPLE_COND)
7025 return;
7027 gsi_prev_nondebug (&gsi);
7028 stmt = gsi_stmt (gsi);
7029 if (stmt != iv_update)
7030 return;
7032 gsi_prev_nondebug (&gsi);
7033 if (gsi_end_p (gsi))
7034 return;
7036 stmt = gsi_stmt (gsi);
7037 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7038 return;
7040 if (stmt != use->stmt)
7041 return;
7043 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7044 return;
7046 if (dump_file && (dump_flags & TDF_DETAILS))
7048 fprintf (dump_file, "Reordering \n");
7049 print_gimple_stmt (dump_file, iv_update, 0, 0);
7050 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7051 fprintf (dump_file, "\n");
7054 gsi = gsi_for_stmt (use->stmt);
7055 gsi_iv = gsi_for_stmt (iv_update);
7056 gsi_move_before (&gsi_iv, &gsi);
7058 cand->pos = IP_BEFORE_USE;
7059 cand->incremented_at = use->stmt;
7062 /* Rewrites USE (address that is an iv) using candidate CAND. */
7064 static void
7065 rewrite_use_address_1 (struct ivopts_data *data,
7066 struct iv_use *use, struct iv_cand *cand)
7068 aff_tree aff;
7069 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7070 tree base_hint = NULL_TREE;
7071 tree ref, iv;
7072 bool ok;
7074 adjust_iv_update_pos (cand, use);
7075 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7076 gcc_assert (ok);
7077 unshare_aff_combination (&aff);
7079 /* To avoid undefined overflow problems, all IV candidates use unsigned
7080 integer types. The drawback is that this makes it impossible for
7081 create_mem_ref to distinguish an IV that is based on a memory object
7082 from one that represents simply an offset.
7084 To work around this problem, we pass a hint to create_mem_ref that
7085 indicates which variable (if any) in aff is an IV based on a memory
7086 object. Note that we only consider the candidate. If this is not
7087 based on an object, the base of the reference is in some subexpression
7088 of the use -- but these will use pointer types, so they are recognized
7089 by the create_mem_ref heuristics anyway. */
7090 if (cand->iv->base_object)
7091 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7093 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7094 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7095 reference_alias_ptr_type (*use->op_p),
7096 iv, base_hint, data->speed);
7097 copy_ref_info (ref, *use->op_p);
7098 *use->op_p = ref;
7101 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7102 first use of a group, rewrites sub uses in the group too. */
7104 static void
7105 rewrite_use_address (struct ivopts_data *data,
7106 struct iv_use *use, struct iv_cand *cand)
7108 struct iv_use *next;
7110 gcc_assert (use->sub_id == 0);
7111 rewrite_use_address_1 (data, use, cand);
7112 update_stmt (use->stmt);
7114 for (next = use->next; next != NULL; next = next->next)
7116 rewrite_use_address_1 (data, next, cand);
7117 update_stmt (next->stmt);
7120 return;
7123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7124 candidate CAND. */
7126 static void
7127 rewrite_use_compare (struct ivopts_data *data,
7128 struct iv_use *use, struct iv_cand *cand)
7130 tree comp, *var_p, op, bound;
7131 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7132 enum tree_code compare;
7133 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7134 bool ok;
7136 bound = cp->value;
7137 if (bound)
7139 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7140 tree var_type = TREE_TYPE (var);
7141 gimple_seq stmts;
7143 if (dump_file && (dump_flags & TDF_DETAILS))
7145 fprintf (dump_file, "Replacing exit test: ");
7146 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7148 compare = cp->comp;
7149 bound = unshare_expr (fold_convert (var_type, bound));
7150 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7151 if (stmts)
7152 gsi_insert_seq_on_edge_immediate (
7153 loop_preheader_edge (data->current_loop),
7154 stmts);
7156 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7157 gimple_cond_set_lhs (cond_stmt, var);
7158 gimple_cond_set_code (cond_stmt, compare);
7159 gimple_cond_set_rhs (cond_stmt, op);
7160 return;
7163 /* The induction variable elimination failed; just express the original
7164 giv. */
7165 comp = get_computation (data->current_loop, use, cand);
7166 gcc_assert (comp != NULL_TREE);
7168 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7169 gcc_assert (ok);
7171 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7172 true, GSI_SAME_STMT);
7175 /* Rewrites USE using candidate CAND. */
7177 static void
7178 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7180 switch (use->type)
7182 case USE_NONLINEAR_EXPR:
7183 rewrite_use_nonlinear_expr (data, use, cand);
7184 break;
7186 case USE_ADDRESS:
7187 rewrite_use_address (data, use, cand);
7188 break;
7190 case USE_COMPARE:
7191 rewrite_use_compare (data, use, cand);
7192 break;
7194 default:
7195 gcc_unreachable ();
7198 update_stmt (use->stmt);
7201 /* Rewrite the uses using the selected induction variables. */
7203 static void
7204 rewrite_uses (struct ivopts_data *data)
7206 unsigned i;
7207 struct iv_cand *cand;
7208 struct iv_use *use;
7210 for (i = 0; i < n_iv_uses (data); i++)
7212 use = iv_use (data, i);
7213 cand = use->selected;
7214 gcc_assert (cand);
7216 rewrite_use (data, use, cand);
7220 /* Removes the ivs that are not used after rewriting. */
7222 static void
7223 remove_unused_ivs (struct ivopts_data *data)
7225 unsigned j;
7226 bitmap_iterator bi;
7227 bitmap toremove = BITMAP_ALLOC (NULL);
7229 /* Figure out an order in which to release SSA DEFs so that we don't
7230 release something that we'd have to propagate into a debug stmt
7231 afterwards. */
7232 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7234 struct version_info *info;
7236 info = ver_info (data, j);
7237 if (info->iv
7238 && !integer_zerop (info->iv->step)
7239 && !info->inv_id
7240 && !info->iv->have_use_for
7241 && !info->preserve_biv)
7243 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7245 tree def = info->iv->ssa_name;
7247 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7249 imm_use_iterator imm_iter;
7250 use_operand_p use_p;
7251 gimple *stmt;
7252 int count = 0;
7254 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7256 if (!gimple_debug_bind_p (stmt))
7257 continue;
7259 /* We just want to determine whether to do nothing
7260 (count == 0), to substitute the computed
7261 expression into a single use of the SSA DEF by
7262 itself (count == 1), or to use a debug temp
7263 because the SSA DEF is used multiple times or as
7264 part of a larger expression (count > 1). */
7265 count++;
7266 if (gimple_debug_bind_get_value (stmt) != def)
7267 count++;
7269 if (count > 1)
7270 BREAK_FROM_IMM_USE_STMT (imm_iter);
7273 if (!count)
7274 continue;
7276 struct iv_use dummy_use;
7277 struct iv_cand *best_cand = NULL, *cand;
7278 unsigned i, best_pref = 0, cand_pref;
7280 memset (&dummy_use, 0, sizeof (dummy_use));
7281 dummy_use.iv = info->iv;
7282 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7284 cand = iv_use (data, i)->selected;
7285 if (cand == best_cand)
7286 continue;
7287 cand_pref = operand_equal_p (cand->iv->step,
7288 info->iv->step, 0)
7289 ? 4 : 0;
7290 cand_pref
7291 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7292 == TYPE_MODE (TREE_TYPE (info->iv->base))
7293 ? 2 : 0;
7294 cand_pref
7295 += TREE_CODE (cand->iv->base) == INTEGER_CST
7296 ? 1 : 0;
7297 if (best_cand == NULL || best_pref < cand_pref)
7299 best_cand = cand;
7300 best_pref = cand_pref;
7304 if (!best_cand)
7305 continue;
7307 tree comp = get_computation_at (data->current_loop,
7308 &dummy_use, best_cand,
7309 SSA_NAME_DEF_STMT (def));
7310 if (!comp)
7311 continue;
7313 if (count > 1)
7315 tree vexpr = make_node (DEBUG_EXPR_DECL);
7316 DECL_ARTIFICIAL (vexpr) = 1;
7317 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7318 if (SSA_NAME_VAR (def))
7319 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7320 else
7321 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7322 gdebug *def_temp
7323 = gimple_build_debug_bind (vexpr, comp, NULL);
7324 gimple_stmt_iterator gsi;
7326 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7327 gsi = gsi_after_labels (gimple_bb
7328 (SSA_NAME_DEF_STMT (def)));
7329 else
7330 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7332 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7333 comp = vexpr;
7336 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7338 if (!gimple_debug_bind_p (stmt))
7339 continue;
7341 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7342 SET_USE (use_p, comp);
7344 update_stmt (stmt);
7350 release_defs_bitset (toremove);
7352 BITMAP_FREE (toremove);
7355 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7356 for hash_map::traverse. */
7358 bool
7359 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7361 free (value);
7362 return true;
7365 /* Frees data allocated by the optimization of a single loop. */
7367 static void
7368 free_loop_data (struct ivopts_data *data)
7370 unsigned i, j;
7371 bitmap_iterator bi;
7372 tree obj;
7374 if (data->niters)
7376 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7377 delete data->niters;
7378 data->niters = NULL;
7381 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7383 struct version_info *info;
7385 info = ver_info (data, i);
7386 info->iv = NULL;
7387 info->has_nonlin_use = false;
7388 info->preserve_biv = false;
7389 info->inv_id = 0;
7391 bitmap_clear (data->relevant);
7392 bitmap_clear (data->important_candidates);
7394 for (i = 0; i < n_iv_uses (data); i++)
7396 struct iv_use *use = iv_use (data, i);
7397 struct iv_use *pre = use, *sub = use->next;
7399 while (sub)
7401 gcc_assert (sub->related_cands == NULL);
7402 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7404 pre = sub;
7405 sub = sub->next;
7406 free (pre);
7409 BITMAP_FREE (use->related_cands);
7410 for (j = 0; j < use->n_map_members; j++)
7411 if (use->cost_map[j].depends_on)
7412 BITMAP_FREE (use->cost_map[j].depends_on);
7413 free (use->cost_map);
7414 free (use);
7416 data->iv_uses.truncate (0);
7418 for (i = 0; i < n_iv_cands (data); i++)
7420 struct iv_cand *cand = iv_cand (data, i);
7422 if (cand->depends_on)
7423 BITMAP_FREE (cand->depends_on);
7424 free (cand);
7426 data->iv_candidates.truncate (0);
7428 if (data->version_info_size < num_ssa_names)
7430 data->version_info_size = 2 * num_ssa_names;
7431 free (data->version_info);
7432 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7435 data->max_inv_id = 0;
7437 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7438 SET_DECL_RTL (obj, NULL_RTX);
7440 decl_rtl_to_reset.truncate (0);
7442 data->inv_expr_tab->empty ();
7443 data->inv_expr_id = 0;
7446 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7447 loop tree. */
7449 static void
7450 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7452 free_loop_data (data);
7453 free (data->version_info);
7454 BITMAP_FREE (data->relevant);
7455 BITMAP_FREE (data->important_candidates);
7457 decl_rtl_to_reset.release ();
7458 data->iv_uses.release ();
7459 data->iv_candidates.release ();
7460 delete data->inv_expr_tab;
7461 data->inv_expr_tab = NULL;
7462 free_affine_expand_cache (&data->name_expansion_cache);
7463 obstack_free (&data->iv_obstack, NULL);
7466 /* Returns true if the loop body BODY includes any function calls. */
7468 static bool
7469 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7471 gimple_stmt_iterator gsi;
7472 unsigned i;
7474 for (i = 0; i < num_nodes; i++)
7475 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7477 gimple *stmt = gsi_stmt (gsi);
7478 if (is_gimple_call (stmt)
7479 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7480 return true;
7482 return false;
7485 /* Optimizes the LOOP. Returns true if anything changed. */
7487 static bool
7488 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7490 bool changed = false;
7491 struct iv_ca *iv_ca;
7492 edge exit = single_dom_exit (loop);
7493 basic_block *body;
7495 gcc_assert (!data->niters);
7496 data->current_loop = loop;
7497 data->loop_loc = find_loop_location (loop);
7498 data->speed = optimize_loop_for_speed_p (loop);
7500 if (dump_file && (dump_flags & TDF_DETAILS))
7502 fprintf (dump_file, "Processing loop %d", loop->num);
7503 if (data->loop_loc != UNKNOWN_LOCATION)
7504 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7505 LOCATION_LINE (data->loop_loc));
7506 fprintf (dump_file, "\n");
7508 if (exit)
7510 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7511 exit->src->index, exit->dest->index);
7512 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7513 fprintf (dump_file, "\n");
7516 fprintf (dump_file, "\n");
7519 body = get_loop_body (loop);
7520 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7521 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7522 free (body);
7524 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7526 /* For each ssa name determines whether it behaves as an induction variable
7527 in some loop. */
7528 if (!find_induction_variables (data))
7529 goto finish;
7531 /* Finds interesting uses (item 1). */
7532 find_interesting_uses (data);
7533 group_address_uses (data);
7534 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7535 goto finish;
7537 /* Finds candidates for the induction variables (item 2). */
7538 find_iv_candidates (data);
7540 /* Calculates the costs (item 3, part 1). */
7541 determine_iv_costs (data);
7542 determine_use_iv_costs (data);
7543 determine_set_costs (data);
7545 /* Find the optimal set of induction variables (item 3, part 2). */
7546 iv_ca = find_optimal_iv_set (data);
7547 if (!iv_ca)
7548 goto finish;
7549 changed = true;
7551 /* Create the new induction variables (item 4, part 1). */
7552 create_new_ivs (data, iv_ca);
7553 iv_ca_free (&iv_ca);
7555 /* Rewrite the uses (item 4, part 2). */
7556 rewrite_uses (data);
7558 /* Remove the ivs that are unused after rewriting. */
7559 remove_unused_ivs (data);
7561 /* We have changed the structure of induction variables; it might happen
7562 that definitions in the scev database refer to some of them that were
7563 eliminated. */
7564 scev_reset ();
7566 finish:
7567 free_loop_data (data);
7569 return changed;
7572 /* Main entry point. Optimizes induction variables in loops. */
7574 void
7575 tree_ssa_iv_optimize (void)
7577 struct loop *loop;
7578 struct ivopts_data data;
7580 tree_ssa_iv_optimize_init (&data);
7582 /* Optimize the loops starting with the innermost ones. */
7583 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7585 if (dump_file && (dump_flags & TDF_DETAILS))
7586 flow_loop_dump (loop, dump_file, NULL, 1);
7588 tree_ssa_iv_optimize_loop (&data, loop);
7591 tree_ssa_iv_optimize_finalize (&data);