* c-omp.c (c_omp_declare_simd_clauses_to_numbers): If all clauses
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobb022bd716b194d2024224c8f94b97451e02fa84f
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "tree.h"
69 #include "gimple.h"
70 #include "rtl.h"
71 #include "ssa.h"
72 #include "alias.h"
73 #include "fold-const.h"
74 #include "stor-layout.h"
75 #include "tm_p.h"
76 #include "gimple-pretty-print.h"
77 #include "internal-fn.h"
78 #include "tree-eh.h"
79 #include "gimplify.h"
80 #include "gimple-iterator.h"
81 #include "gimplify-me.h"
82 #include "cgraph.h"
83 #include "tree-cfg.h"
84 #include "tree-ssa-loop-ivopts.h"
85 #include "tree-ssa-loop-manip.h"
86 #include "tree-ssa-loop-niter.h"
87 #include "tree-ssa-loop.h"
88 #include "flags.h"
89 #include "insn-config.h"
90 #include "expmed.h"
91 #include "dojump.h"
92 #include "explow.h"
93 #include "calls.h"
94 #include "emit-rtl.h"
95 #include "varasm.h"
96 #include "stmt.h"
97 #include "expr.h"
98 #include "tree-dfa.h"
99 #include "tree-ssa.h"
100 #include "cfgloop.h"
101 #include "tree-pass.h"
102 #include "tree-chrec.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "tree-ssa-address.h"
111 #include "builtins.h"
112 #include "tree-vectorizer.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "recog.h"
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop *loop)
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
135 return niter;
138 /* Representation of the induction variable. */
139 struct iv
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 unsigned use_id; /* The identifier in the use if it is the case. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 bool no_overflow; /* True if the iv doesn't overflow. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 unsigned sub_id; /* The id of the sub use. */
202 enum use_type type; /* Type of the use. */
203 struct iv *iv; /* The induction variable it is based on. */
204 gimple stmt; /* Statement in that it occurs. */
205 tree *op_p; /* The place where it occurs. */
206 bitmap related_cands; /* The set of "related" iv candidates, plus the common
207 important ones. */
209 unsigned n_map_members; /* Number of candidates in the cost_map list. */
210 struct cost_pair *cost_map;
211 /* The costs wrto the iv candidates. */
213 struct iv_cand *selected;
214 /* The selected candidate. */
216 struct iv_use *next; /* The next sub use. */
217 tree addr_base; /* Base address with const offset stripped. */
218 unsigned HOST_WIDE_INT addr_offset;
219 /* Const offset stripped from base address. */
222 /* The position where the iv is computed. */
223 enum iv_position
225 IP_NORMAL, /* At the end, just before the exit condition. */
226 IP_END, /* At the end of the latch block. */
227 IP_BEFORE_USE, /* Immediately before a specific use. */
228 IP_AFTER_USE, /* Immediately after a specific use. */
229 IP_ORIGINAL /* The original biv. */
232 /* The induction variable candidate. */
233 struct iv_cand
235 unsigned id; /* The number of the candidate. */
236 bool important; /* Whether this is an "important" candidate, i.e. such
237 that it should be considered by all uses. */
238 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
239 gimple incremented_at;/* For original biv, the statement where it is
240 incremented. */
241 tree var_before; /* The variable used for it before increment. */
242 tree var_after; /* The variable used for it after increment. */
243 struct iv *iv; /* The value of the candidate. NULL for
244 "pseudocandidate" used to indicate the possibility
245 to replace the final value of an iv by direct
246 computation of the value. */
247 unsigned cost; /* Cost of the candidate. */
248 unsigned cost_step; /* Cost of the candidate's increment operation. */
249 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
250 where it is incremented. */
251 bitmap depends_on; /* The list of invariants that are used in step of the
252 biv. */
255 /* Loop invariant expression hashtable entry. */
256 struct iv_inv_expr_ent
258 tree expr;
259 int id;
260 hashval_t hash;
263 /* The data used by the induction variable optimizations. */
265 typedef struct iv_use *iv_use_p;
267 typedef struct iv_cand *iv_cand_p;
269 /* Hashtable helpers. */
271 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
273 static inline hashval_t hash (const iv_inv_expr_ent *);
274 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
277 /* Hash function for loop invariant expressions. */
279 inline hashval_t
280 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
282 return expr->hash;
285 /* Hash table equality function for expressions. */
287 inline bool
288 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
289 const iv_inv_expr_ent *expr2)
291 return expr1->hash == expr2->hash
292 && operand_equal_p (expr1->expr, expr2->expr, 0);
295 struct ivopts_data
297 /* The currently optimized loop. */
298 struct loop *current_loop;
299 source_location loop_loc;
301 /* Numbers of iterations for all exits of the current loop. */
302 hash_map<edge, tree_niter_desc *> *niters;
304 /* Number of registers used in it. */
305 unsigned regs_used;
307 /* The size of version_info array allocated. */
308 unsigned version_info_size;
310 /* The array of information for the ssa names. */
311 struct version_info *version_info;
313 /* The hashtable of loop invariant expressions created
314 by ivopt. */
315 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
317 /* Loop invariant expression id. */
318 int inv_expr_id;
320 /* The bitmap of indices in version_info whose value was changed. */
321 bitmap relevant;
323 /* The uses of induction variables. */
324 vec<iv_use_p> iv_uses;
326 /* The candidates. */
327 vec<iv_cand_p> iv_candidates;
329 /* A bitmap of important candidates. */
330 bitmap important_candidates;
332 /* Cache used by tree_to_aff_combination_expand. */
333 hash_map<tree, name_expansion *> *name_expansion_cache;
335 /* The maximum invariant id. */
336 unsigned max_inv_id;
338 /* Obstack for iv structure. */
339 struct obstack iv_obstack;
341 /* Whether to consider just related and important candidates when replacing a
342 use. */
343 bool consider_all_candidates;
345 /* Are we optimizing for speed? */
346 bool speed;
348 /* Whether the loop body includes any function calls. */
349 bool body_includes_call;
351 /* Whether the loop body can only be exited via single exit. */
352 bool loop_single_exit_p;
355 /* An assignment of iv candidates to uses. */
357 struct iv_ca
359 /* The number of uses covered by the assignment. */
360 unsigned upto;
362 /* Number of uses that cannot be expressed by the candidates in the set. */
363 unsigned bad_uses;
365 /* Candidate assigned to a use, together with the related costs. */
366 struct cost_pair **cand_for_use;
368 /* Number of times each candidate is used. */
369 unsigned *n_cand_uses;
371 /* The candidates used. */
372 bitmap cands;
374 /* The number of candidates in the set. */
375 unsigned n_cands;
377 /* Total number of registers needed. */
378 unsigned n_regs;
380 /* Total cost of expressing uses. */
381 comp_cost cand_use_cost;
383 /* Total cost of candidates. */
384 unsigned cand_cost;
386 /* Number of times each invariant is used. */
387 unsigned *n_invariant_uses;
389 /* The array holding the number of uses of each loop
390 invariant expressions created by ivopt. */
391 unsigned *used_inv_expr;
393 /* The number of created loop invariants. */
394 unsigned num_used_inv_expr;
396 /* Total cost of the assignment. */
397 comp_cost cost;
400 /* Difference of two iv candidate assignments. */
402 struct iv_ca_delta
404 /* Changed use. */
405 struct iv_use *use;
407 /* An old assignment (for rollback purposes). */
408 struct cost_pair *old_cp;
410 /* A new assignment. */
411 struct cost_pair *new_cp;
413 /* Next change in the list. */
414 struct iv_ca_delta *next_change;
417 /* Bound on number of candidates below that all candidates are considered. */
419 #define CONSIDER_ALL_CANDIDATES_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
422 /* If there are more iv occurrences, we just give up (it is quite unlikely that
423 optimizing such a loop would help, and it would take ages). */
425 #define MAX_CONSIDERED_USES \
426 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
428 /* If there are at most this number of ivs in the set, try removing unnecessary
429 ivs from the set always. */
431 #define ALWAYS_PRUNE_CAND_SET_BOUND \
432 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
434 /* The list of trees for that the decl_rtl field must be reset is stored
435 here. */
437 static vec<tree> decl_rtl_to_reset;
439 static comp_cost force_expr_to_var_cost (tree, bool);
441 /* Number of uses recorded in DATA. */
443 static inline unsigned
444 n_iv_uses (struct ivopts_data *data)
446 return data->iv_uses.length ();
449 /* Ith use recorded in DATA. */
451 static inline struct iv_use *
452 iv_use (struct ivopts_data *data, unsigned i)
454 return data->iv_uses[i];
457 /* Number of candidates recorded in DATA. */
459 static inline unsigned
460 n_iv_cands (struct ivopts_data *data)
462 return data->iv_candidates.length ();
465 /* Ith candidate recorded in DATA. */
467 static inline struct iv_cand *
468 iv_cand (struct ivopts_data *data, unsigned i)
470 return data->iv_candidates[i];
473 /* The single loop exit if it dominates the latch, NULL otherwise. */
475 edge
476 single_dom_exit (struct loop *loop)
478 edge exit = single_exit (loop);
480 if (!exit)
481 return NULL;
483 if (!just_once_each_iteration_p (loop, exit->src))
484 return NULL;
486 return exit;
489 /* Dumps information about the induction variable IV to FILE. */
491 void
492 dump_iv (FILE *file, struct iv *iv, bool dump_name)
494 if (iv->ssa_name && dump_name)
496 fprintf (file, "ssa name ");
497 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
498 fprintf (file, "\n");
501 fprintf (file, " type ");
502 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
503 fprintf (file, "\n");
505 if (iv->step)
507 fprintf (file, " base ");
508 print_generic_expr (file, iv->base, TDF_SLIM);
509 fprintf (file, "\n");
511 fprintf (file, " step ");
512 print_generic_expr (file, iv->step, TDF_SLIM);
513 fprintf (file, "\n");
515 else
517 fprintf (file, " invariant ");
518 print_generic_expr (file, iv->base, TDF_SLIM);
519 fprintf (file, "\n");
522 if (iv->base_object)
524 fprintf (file, " base object ");
525 print_generic_expr (file, iv->base_object, TDF_SLIM);
526 fprintf (file, "\n");
529 if (iv->biv_p)
530 fprintf (file, " is a biv\n");
533 /* Dumps information about the USE to FILE. */
535 void
536 dump_use (FILE *file, struct iv_use *use)
538 fprintf (file, "use %d", use->id);
539 if (use->sub_id)
540 fprintf (file, ".%d", use->sub_id);
542 fprintf (file, "\n");
544 switch (use->type)
546 case USE_NONLINEAR_EXPR:
547 fprintf (file, " generic\n");
548 break;
550 case USE_ADDRESS:
551 fprintf (file, " address\n");
552 break;
554 case USE_COMPARE:
555 fprintf (file, " compare\n");
556 break;
558 default:
559 gcc_unreachable ();
562 fprintf (file, " in statement ");
563 print_gimple_stmt (file, use->stmt, 0, 0);
564 fprintf (file, "\n");
566 fprintf (file, " at position ");
567 if (use->op_p)
568 print_generic_expr (file, *use->op_p, TDF_SLIM);
569 fprintf (file, "\n");
571 dump_iv (file, use->iv, false);
573 if (use->related_cands)
575 fprintf (file, " related candidates ");
576 dump_bitmap (file, use->related_cands);
580 /* Dumps information about the uses to FILE. */
582 void
583 dump_uses (FILE *file, struct ivopts_data *data)
585 unsigned i;
586 struct iv_use *use;
588 for (i = 0; i < n_iv_uses (data); i++)
590 use = iv_use (data, i);
593 dump_use (file, use);
594 use = use->next;
596 while (use);
597 fprintf (file, "\n");
601 /* Dumps information about induction variable candidate CAND to FILE. */
603 void
604 dump_cand (FILE *file, struct iv_cand *cand)
606 struct iv *iv = cand->iv;
608 fprintf (file, "candidate %d%s\n",
609 cand->id, cand->important ? " (important)" : "");
611 if (cand->depends_on)
613 fprintf (file, " depends on ");
614 dump_bitmap (file, cand->depends_on);
617 if (!iv)
619 fprintf (file, " final value replacement\n");
620 return;
623 if (cand->var_before)
625 fprintf (file, " var_before ");
626 print_generic_expr (file, cand->var_before, TDF_SLIM);
627 fprintf (file, "\n");
629 if (cand->var_after)
631 fprintf (file, " var_after ");
632 print_generic_expr (file, cand->var_after, TDF_SLIM);
633 fprintf (file, "\n");
636 switch (cand->pos)
638 case IP_NORMAL:
639 fprintf (file, " incremented before exit test\n");
640 break;
642 case IP_BEFORE_USE:
643 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
644 break;
646 case IP_AFTER_USE:
647 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
648 break;
650 case IP_END:
651 fprintf (file, " incremented at end\n");
652 break;
654 case IP_ORIGINAL:
655 fprintf (file, " original biv\n");
656 break;
659 dump_iv (file, iv, false);
662 /* Returns the info for ssa version VER. */
664 static inline struct version_info *
665 ver_info (struct ivopts_data *data, unsigned ver)
667 return data->version_info + ver;
670 /* Returns the info for ssa name NAME. */
672 static inline struct version_info *
673 name_info (struct ivopts_data *data, tree name)
675 return ver_info (data, SSA_NAME_VERSION (name));
678 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
679 emitted in LOOP. */
681 static bool
682 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
684 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
686 gcc_assert (bb);
688 if (sbb == loop->latch)
689 return true;
691 if (sbb != bb)
692 return false;
694 return stmt == last_stmt (bb);
697 /* Returns true if STMT if after the place where the original induction
698 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
699 if the positions are identical. */
701 static bool
702 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
704 basic_block cand_bb = gimple_bb (cand->incremented_at);
705 basic_block stmt_bb = gimple_bb (stmt);
707 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
708 return false;
710 if (stmt_bb != cand_bb)
711 return true;
713 if (true_if_equal
714 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
715 return true;
716 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
719 /* Returns true if STMT if after the place where the induction variable
720 CAND is incremented in LOOP. */
722 static bool
723 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
725 switch (cand->pos)
727 case IP_END:
728 return false;
730 case IP_NORMAL:
731 return stmt_after_ip_normal_pos (loop, stmt);
733 case IP_ORIGINAL:
734 case IP_AFTER_USE:
735 return stmt_after_inc_pos (cand, stmt, false);
737 case IP_BEFORE_USE:
738 return stmt_after_inc_pos (cand, stmt, true);
740 default:
741 gcc_unreachable ();
745 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
747 static bool
748 abnormal_ssa_name_p (tree exp)
750 if (!exp)
751 return false;
753 if (TREE_CODE (exp) != SSA_NAME)
754 return false;
756 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
759 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
760 abnormal phi node. Callback for for_each_index. */
762 static bool
763 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
764 void *data ATTRIBUTE_UNUSED)
766 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
768 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
769 return false;
770 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
771 return false;
774 return !abnormal_ssa_name_p (*index);
777 /* Returns true if EXPR contains a ssa name that occurs in an
778 abnormal phi node. */
780 bool
781 contains_abnormal_ssa_name_p (tree expr)
783 enum tree_code code;
784 enum tree_code_class codeclass;
786 if (!expr)
787 return false;
789 code = TREE_CODE (expr);
790 codeclass = TREE_CODE_CLASS (code);
792 if (code == SSA_NAME)
793 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
795 if (code == INTEGER_CST
796 || is_gimple_min_invariant (expr))
797 return false;
799 if (code == ADDR_EXPR)
800 return !for_each_index (&TREE_OPERAND (expr, 0),
801 idx_contains_abnormal_ssa_name_p,
802 NULL);
804 if (code == COND_EXPR)
805 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
806 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
809 switch (codeclass)
811 case tcc_binary:
812 case tcc_comparison:
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
814 return true;
816 /* Fallthru. */
817 case tcc_unary:
818 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
819 return true;
821 break;
823 default:
824 gcc_unreachable ();
827 return false;
830 /* Returns the structure describing number of iterations determined from
831 EXIT of DATA->current_loop, or NULL if something goes wrong. */
833 static struct tree_niter_desc *
834 niter_for_exit (struct ivopts_data *data, edge exit)
836 struct tree_niter_desc *desc;
837 tree_niter_desc **slot;
839 if (!data->niters)
841 data->niters = new hash_map<edge, tree_niter_desc *>;
842 slot = NULL;
844 else
845 slot = data->niters->get (exit);
847 if (!slot)
849 /* Try to determine number of iterations. We cannot safely work with ssa
850 names that appear in phi nodes on abnormal edges, so that we do not
851 create overlapping life ranges for them (PR 27283). */
852 desc = XNEW (struct tree_niter_desc);
853 if (!number_of_iterations_exit (data->current_loop,
854 exit, desc, true)
855 || contains_abnormal_ssa_name_p (desc->niter))
857 XDELETE (desc);
858 desc = NULL;
860 data->niters->put (exit, desc);
862 else
863 desc = *slot;
865 return desc;
868 /* Returns the structure describing number of iterations determined from
869 single dominating exit of DATA->current_loop, or NULL if something
870 goes wrong. */
872 static struct tree_niter_desc *
873 niter_for_single_dom_exit (struct ivopts_data *data)
875 edge exit = single_dom_exit (data->current_loop);
877 if (!exit)
878 return NULL;
880 return niter_for_exit (data, exit);
883 /* Initializes data structures used by the iv optimization pass, stored
884 in DATA. */
886 static void
887 tree_ssa_iv_optimize_init (struct ivopts_data *data)
889 data->version_info_size = 2 * num_ssa_names;
890 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
891 data->relevant = BITMAP_ALLOC (NULL);
892 data->important_candidates = BITMAP_ALLOC (NULL);
893 data->max_inv_id = 0;
894 data->niters = NULL;
895 data->iv_uses.create (20);
896 data->iv_candidates.create (20);
897 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
898 data->inv_expr_id = 0;
899 data->name_expansion_cache = NULL;
900 decl_rtl_to_reset.create (20);
901 gcc_obstack_init (&data->iv_obstack);
904 /* Returns a memory object to that EXPR points. In case we are able to
905 determine that it does not point to any such object, NULL is returned. */
907 static tree
908 determine_base_object (tree expr)
910 enum tree_code code = TREE_CODE (expr);
911 tree base, obj;
913 /* If this is a pointer casted to any type, we need to determine
914 the base object for the pointer; so handle conversions before
915 throwing away non-pointer expressions. */
916 if (CONVERT_EXPR_P (expr))
917 return determine_base_object (TREE_OPERAND (expr, 0));
919 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
920 return NULL_TREE;
922 switch (code)
924 case INTEGER_CST:
925 return NULL_TREE;
927 case ADDR_EXPR:
928 obj = TREE_OPERAND (expr, 0);
929 base = get_base_address (obj);
931 if (!base)
932 return expr;
934 if (TREE_CODE (base) == MEM_REF)
935 return determine_base_object (TREE_OPERAND (base, 0));
937 return fold_convert (ptr_type_node,
938 build_fold_addr_expr (base));
940 case POINTER_PLUS_EXPR:
941 return determine_base_object (TREE_OPERAND (expr, 0));
943 case PLUS_EXPR:
944 case MINUS_EXPR:
945 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
946 gcc_unreachable ();
948 default:
949 return fold_convert (ptr_type_node, expr);
953 /* Return true if address expression with non-DECL_P operand appears
954 in EXPR. */
956 static bool
957 contain_complex_addr_expr (tree expr)
959 bool res = false;
961 STRIP_NOPS (expr);
962 switch (TREE_CODE (expr))
964 case POINTER_PLUS_EXPR:
965 case PLUS_EXPR:
966 case MINUS_EXPR:
967 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
968 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
969 break;
971 case ADDR_EXPR:
972 return (!DECL_P (TREE_OPERAND (expr, 0)));
974 default:
975 return false;
978 return res;
981 /* Allocates an induction variable with given initial value BASE and step STEP
982 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
984 static struct iv *
985 alloc_iv (struct ivopts_data *data, tree base, tree step,
986 bool no_overflow = false)
988 tree expr = base;
989 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
990 sizeof (struct iv));
991 gcc_assert (step != NULL_TREE);
993 /* Lower address expression in base except ones with DECL_P as operand.
994 By doing this:
995 1) More accurate cost can be computed for address expressions;
996 2) Duplicate candidates won't be created for bases in different
997 forms, like &a[0] and &a. */
998 STRIP_NOPS (expr);
999 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1000 || contain_complex_addr_expr (expr))
1002 aff_tree comb;
1003 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1004 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1007 iv->base = base;
1008 iv->base_object = determine_base_object (base);
1009 iv->step = step;
1010 iv->biv_p = false;
1011 iv->have_use_for = false;
1012 iv->use_id = 0;
1013 iv->ssa_name = NULL_TREE;
1014 iv->no_overflow = no_overflow;
1016 return iv;
1019 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1020 doesn't overflow. */
1022 static void
1023 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1024 bool no_overflow)
1026 struct version_info *info = name_info (data, iv);
1028 gcc_assert (!info->iv);
1030 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1031 info->iv = alloc_iv (data, base, step, no_overflow);
1032 info->iv->ssa_name = iv;
1035 /* Finds induction variable declaration for VAR. */
1037 static struct iv *
1038 get_iv (struct ivopts_data *data, tree var)
1040 basic_block bb;
1041 tree type = TREE_TYPE (var);
1043 if (!POINTER_TYPE_P (type)
1044 && !INTEGRAL_TYPE_P (type))
1045 return NULL;
1047 if (!name_info (data, var)->iv)
1049 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1051 if (!bb
1052 || !flow_bb_inside_loop_p (data->current_loop, bb))
1053 set_iv (data, var, var, build_int_cst (type, 0), true);
1056 return name_info (data, var)->iv;
1059 /* Return the first non-invariant ssa var found in EXPR. */
1061 static tree
1062 extract_single_var_from_expr (tree expr)
1064 int i, n;
1065 tree tmp;
1066 enum tree_code code;
1068 if (!expr || is_gimple_min_invariant (expr))
1069 return NULL;
1071 code = TREE_CODE (expr);
1072 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1074 n = TREE_OPERAND_LENGTH (expr);
1075 for (i = 0; i < n; i++)
1077 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1079 if (tmp)
1080 return tmp;
1083 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1086 /* Finds basic ivs. */
1088 static bool
1089 find_bivs (struct ivopts_data *data)
1091 gphi *phi;
1092 affine_iv iv;
1093 tree step, type, base, stop;
1094 bool found = false;
1095 struct loop *loop = data->current_loop;
1096 gphi_iterator psi;
1098 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1100 phi = psi.phi ();
1102 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1103 continue;
1105 if (virtual_operand_p (PHI_RESULT (phi)))
1106 continue;
1108 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1109 continue;
1111 if (integer_zerop (iv.step))
1112 continue;
1114 step = iv.step;
1115 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1116 /* Stop expanding iv base at the first ssa var referred by iv step.
1117 Ideally we should stop at any ssa var, because that's expensive
1118 and unusual to happen, we just do it on the first one.
1120 See PR64705 for the rationale. */
1121 stop = extract_single_var_from_expr (step);
1122 base = expand_simple_operations (base, stop);
1123 if (contains_abnormal_ssa_name_p (base)
1124 || contains_abnormal_ssa_name_p (step))
1125 continue;
1127 type = TREE_TYPE (PHI_RESULT (phi));
1128 base = fold_convert (type, base);
1129 if (step)
1131 if (POINTER_TYPE_P (type))
1132 step = convert_to_ptrofftype (step);
1133 else
1134 step = fold_convert (type, step);
1137 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1138 found = true;
1141 return found;
1144 /* Marks basic ivs. */
1146 static void
1147 mark_bivs (struct ivopts_data *data)
1149 gphi *phi;
1150 gimple def;
1151 tree var;
1152 struct iv *iv, *incr_iv;
1153 struct loop *loop = data->current_loop;
1154 basic_block incr_bb;
1155 gphi_iterator psi;
1157 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1159 phi = psi.phi ();
1161 iv = get_iv (data, PHI_RESULT (phi));
1162 if (!iv)
1163 continue;
1165 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1166 def = SSA_NAME_DEF_STMT (var);
1167 /* Don't mark iv peeled from other one as biv. */
1168 if (def
1169 && gimple_code (def) == GIMPLE_PHI
1170 && gimple_bb (def) == loop->header)
1171 continue;
1173 incr_iv = get_iv (data, var);
1174 if (!incr_iv)
1175 continue;
1177 /* If the increment is in the subloop, ignore it. */
1178 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1179 if (incr_bb->loop_father != data->current_loop
1180 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1181 continue;
1183 iv->biv_p = true;
1184 incr_iv->biv_p = true;
1188 /* Checks whether STMT defines a linear induction variable and stores its
1189 parameters to IV. */
1191 static bool
1192 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1194 tree lhs, stop;
1195 struct loop *loop = data->current_loop;
1197 iv->base = NULL_TREE;
1198 iv->step = NULL_TREE;
1200 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1201 return false;
1203 lhs = gimple_assign_lhs (stmt);
1204 if (TREE_CODE (lhs) != SSA_NAME)
1205 return false;
1207 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1208 return false;
1210 /* Stop expanding iv base at the first ssa var referred by iv step.
1211 Ideally we should stop at any ssa var, because that's expensive
1212 and unusual to happen, we just do it on the first one.
1214 See PR64705 for the rationale. */
1215 stop = extract_single_var_from_expr (iv->step);
1216 iv->base = expand_simple_operations (iv->base, stop);
1217 if (contains_abnormal_ssa_name_p (iv->base)
1218 || contains_abnormal_ssa_name_p (iv->step))
1219 return false;
1221 /* If STMT could throw, then do not consider STMT as defining a GIV.
1222 While this will suppress optimizations, we can not safely delete this
1223 GIV and associated statements, even if it appears it is not used. */
1224 if (stmt_could_throw_p (stmt))
1225 return false;
1227 return true;
1230 /* Finds general ivs in statement STMT. */
1232 static void
1233 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1235 affine_iv iv;
1237 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1238 return;
1240 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1243 /* Finds general ivs in basic block BB. */
1245 static void
1246 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1248 gimple_stmt_iterator bsi;
1250 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1251 find_givs_in_stmt (data, gsi_stmt (bsi));
1254 /* Finds general ivs. */
1256 static void
1257 find_givs (struct ivopts_data *data)
1259 struct loop *loop = data->current_loop;
1260 basic_block *body = get_loop_body_in_dom_order (loop);
1261 unsigned i;
1263 for (i = 0; i < loop->num_nodes; i++)
1264 find_givs_in_bb (data, body[i]);
1265 free (body);
1268 /* For each ssa name defined in LOOP determines whether it is an induction
1269 variable and if so, its initial value and step. */
1271 static bool
1272 find_induction_variables (struct ivopts_data *data)
1274 unsigned i;
1275 bitmap_iterator bi;
1277 if (!find_bivs (data))
1278 return false;
1280 find_givs (data);
1281 mark_bivs (data);
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1285 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1287 if (niter)
1289 fprintf (dump_file, " number of iterations ");
1290 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1291 if (!integer_zerop (niter->may_be_zero))
1293 fprintf (dump_file, "; zero if ");
1294 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1296 fprintf (dump_file, "\n\n");
1299 fprintf (dump_file, "Induction variables:\n\n");
1301 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1303 if (ver_info (data, i)->iv)
1304 dump_iv (dump_file, ver_info (data, i)->iv, true);
1308 return true;
1311 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1312 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1313 is the const offset stripped from IV base. For uses of other types,
1314 ADDR_BASE and ADDR_OFFSET are zero by default. */
1316 static struct iv_use *
1317 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1318 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1319 unsigned HOST_WIDE_INT addr_offset = 0)
1321 struct iv_use *use = XCNEW (struct iv_use);
1323 use->id = n_iv_uses (data);
1324 use->sub_id = 0;
1325 use->type = use_type;
1326 use->iv = iv;
1327 use->stmt = stmt;
1328 use->op_p = use_p;
1329 use->related_cands = BITMAP_ALLOC (NULL);
1330 use->next = NULL;
1331 use->addr_base = addr_base;
1332 use->addr_offset = addr_offset;
1334 data->iv_uses.safe_push (use);
1336 return use;
1339 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1340 The sub use is recorded under the one whose use id is ID_GROUP. */
1342 static struct iv_use *
1343 record_sub_use (struct ivopts_data *data, tree *use_p,
1344 struct iv *iv, gimple stmt, enum use_type use_type,
1345 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1346 unsigned int id_group)
1348 struct iv_use *use = XCNEW (struct iv_use);
1349 struct iv_use *group = iv_use (data, id_group);
1351 use->id = group->id;
1352 use->sub_id = 0;
1353 use->type = use_type;
1354 use->iv = iv;
1355 use->stmt = stmt;
1356 use->op_p = use_p;
1357 use->related_cands = NULL;
1358 use->addr_base = addr_base;
1359 use->addr_offset = addr_offset;
1361 /* Sub use list is maintained in offset ascending order. */
1362 if (addr_offset <= group->addr_offset)
1364 use->related_cands = group->related_cands;
1365 group->related_cands = NULL;
1366 use->next = group;
1367 data->iv_uses[id_group] = use;
1369 else
1371 struct iv_use *pre;
1374 pre = group;
1375 group = group->next;
1377 while (group && addr_offset > group->addr_offset);
1378 use->next = pre->next;
1379 pre->next = use;
1382 return use;
1385 /* Checks whether OP is a loop-level invariant and if so, records it.
1386 NONLINEAR_USE is true if the invariant is used in a way we do not
1387 handle specially. */
1389 static void
1390 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1392 basic_block bb;
1393 struct version_info *info;
1395 if (TREE_CODE (op) != SSA_NAME
1396 || virtual_operand_p (op))
1397 return;
1399 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1400 if (bb
1401 && flow_bb_inside_loop_p (data->current_loop, bb))
1402 return;
1404 info = name_info (data, op);
1405 info->name = op;
1406 info->has_nonlin_use |= nonlinear_use;
1407 if (!info->inv_id)
1408 info->inv_id = ++data->max_inv_id;
1409 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1412 /* Checks whether the use OP is interesting and if so, records it. */
1414 static struct iv_use *
1415 find_interesting_uses_op (struct ivopts_data *data, tree op)
1417 struct iv *iv;
1418 gimple stmt;
1419 struct iv_use *use;
1421 if (TREE_CODE (op) != SSA_NAME)
1422 return NULL;
1424 iv = get_iv (data, op);
1425 if (!iv)
1426 return NULL;
1428 if (iv->have_use_for)
1430 use = iv_use (data, iv->use_id);
1432 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1433 return use;
1436 if (integer_zerop (iv->step))
1438 record_invariant (data, op, true);
1439 return NULL;
1441 iv->have_use_for = true;
1443 stmt = SSA_NAME_DEF_STMT (op);
1444 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1445 || is_gimple_assign (stmt));
1447 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1448 iv->use_id = use->id;
1450 return use;
1453 /* Given a condition in statement STMT, checks whether it is a compare
1454 of an induction variable and an invariant. If this is the case,
1455 CONTROL_VAR is set to location of the iv, BOUND to the location of
1456 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1457 induction variable descriptions, and true is returned. If this is not
1458 the case, CONTROL_VAR and BOUND are set to the arguments of the
1459 condition and false is returned. */
1461 static bool
1462 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1463 tree **control_var, tree **bound,
1464 struct iv **iv_var, struct iv **iv_bound)
1466 /* The objects returned when COND has constant operands. */
1467 static struct iv const_iv;
1468 static tree zero;
1469 tree *op0 = &zero, *op1 = &zero;
1470 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1471 bool ret = false;
1473 if (gimple_code (stmt) == GIMPLE_COND)
1475 gcond *cond_stmt = as_a <gcond *> (stmt);
1476 op0 = gimple_cond_lhs_ptr (cond_stmt);
1477 op1 = gimple_cond_rhs_ptr (cond_stmt);
1479 else
1481 op0 = gimple_assign_rhs1_ptr (stmt);
1482 op1 = gimple_assign_rhs2_ptr (stmt);
1485 zero = integer_zero_node;
1486 const_iv.step = integer_zero_node;
1488 if (TREE_CODE (*op0) == SSA_NAME)
1489 iv0 = get_iv (data, *op0);
1490 if (TREE_CODE (*op1) == SSA_NAME)
1491 iv1 = get_iv (data, *op1);
1493 /* Exactly one of the compared values must be an iv, and the other one must
1494 be an invariant. */
1495 if (!iv0 || !iv1)
1496 goto end;
1498 if (integer_zerop (iv0->step))
1500 /* Control variable may be on the other side. */
1501 std::swap (op0, op1);
1502 std::swap (iv0, iv1);
1504 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1506 end:
1507 if (control_var)
1508 *control_var = op0;
1509 if (iv_var)
1510 *iv_var = iv0;
1511 if (bound)
1512 *bound = op1;
1513 if (iv_bound)
1514 *iv_bound = iv1;
1516 return ret;
1519 /* Checks whether the condition in STMT is interesting and if so,
1520 records it. */
1522 static void
1523 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1525 tree *var_p, *bound_p;
1526 struct iv *var_iv;
1528 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1530 find_interesting_uses_op (data, *var_p);
1531 find_interesting_uses_op (data, *bound_p);
1532 return;
1535 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1538 /* Returns the outermost loop EXPR is obviously invariant in
1539 relative to the loop LOOP, i.e. if all its operands are defined
1540 outside of the returned loop. Returns NULL if EXPR is not
1541 even obviously invariant in LOOP. */
1543 struct loop *
1544 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1546 basic_block def_bb;
1547 unsigned i, len;
1549 if (is_gimple_min_invariant (expr))
1550 return current_loops->tree_root;
1552 if (TREE_CODE (expr) == SSA_NAME)
1554 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1555 if (def_bb)
1557 if (flow_bb_inside_loop_p (loop, def_bb))
1558 return NULL;
1559 return superloop_at_depth (loop,
1560 loop_depth (def_bb->loop_father) + 1);
1563 return current_loops->tree_root;
1566 if (!EXPR_P (expr))
1567 return NULL;
1569 unsigned maxdepth = 0;
1570 len = TREE_OPERAND_LENGTH (expr);
1571 for (i = 0; i < len; i++)
1573 struct loop *ivloop;
1574 if (!TREE_OPERAND (expr, i))
1575 continue;
1577 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1578 if (!ivloop)
1579 return NULL;
1580 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1583 return superloop_at_depth (loop, maxdepth);
1586 /* Returns true if expression EXPR is obviously invariant in LOOP,
1587 i.e. if all its operands are defined outside of the LOOP. LOOP
1588 should not be the function body. */
1590 bool
1591 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1593 basic_block def_bb;
1594 unsigned i, len;
1596 gcc_assert (loop_depth (loop) > 0);
1598 if (is_gimple_min_invariant (expr))
1599 return true;
1601 if (TREE_CODE (expr) == SSA_NAME)
1603 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1604 if (def_bb
1605 && flow_bb_inside_loop_p (loop, def_bb))
1606 return false;
1608 return true;
1611 if (!EXPR_P (expr))
1612 return false;
1614 len = TREE_OPERAND_LENGTH (expr);
1615 for (i = 0; i < len; i++)
1616 if (TREE_OPERAND (expr, i)
1617 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1618 return false;
1620 return true;
1623 /* Cumulates the steps of indices into DATA and replaces their values with the
1624 initial ones. Returns false when the value of the index cannot be determined.
1625 Callback for for_each_index. */
1627 struct ifs_ivopts_data
1629 struct ivopts_data *ivopts_data;
1630 gimple stmt;
1631 tree step;
1634 static bool
1635 idx_find_step (tree base, tree *idx, void *data)
1637 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1638 struct iv *iv;
1639 bool use_overflow_semantics = false;
1640 tree step, iv_base, iv_step, lbound, off;
1641 struct loop *loop = dta->ivopts_data->current_loop;
1643 /* If base is a component ref, require that the offset of the reference
1644 be invariant. */
1645 if (TREE_CODE (base) == COMPONENT_REF)
1647 off = component_ref_field_offset (base);
1648 return expr_invariant_in_loop_p (loop, off);
1651 /* If base is array, first check whether we will be able to move the
1652 reference out of the loop (in order to take its address in strength
1653 reduction). In order for this to work we need both lower bound
1654 and step to be loop invariants. */
1655 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1657 /* Moreover, for a range, the size needs to be invariant as well. */
1658 if (TREE_CODE (base) == ARRAY_RANGE_REF
1659 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1660 return false;
1662 step = array_ref_element_size (base);
1663 lbound = array_ref_low_bound (base);
1665 if (!expr_invariant_in_loop_p (loop, step)
1666 || !expr_invariant_in_loop_p (loop, lbound))
1667 return false;
1670 if (TREE_CODE (*idx) != SSA_NAME)
1671 return true;
1673 iv = get_iv (dta->ivopts_data, *idx);
1674 if (!iv)
1675 return false;
1677 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1678 *&x[0], which is not folded and does not trigger the
1679 ARRAY_REF path below. */
1680 *idx = iv->base;
1682 if (integer_zerop (iv->step))
1683 return true;
1685 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1687 step = array_ref_element_size (base);
1689 /* We only handle addresses whose step is an integer constant. */
1690 if (TREE_CODE (step) != INTEGER_CST)
1691 return false;
1693 else
1694 /* The step for pointer arithmetics already is 1 byte. */
1695 step = size_one_node;
1697 iv_base = iv->base;
1698 iv_step = iv->step;
1699 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1700 use_overflow_semantics = true;
1702 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1703 sizetype, &iv_base, &iv_step, dta->stmt,
1704 use_overflow_semantics))
1706 /* The index might wrap. */
1707 return false;
1710 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1711 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1713 return true;
1716 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1717 object is passed to it in DATA. */
1719 static bool
1720 idx_record_use (tree base, tree *idx,
1721 void *vdata)
1723 struct ivopts_data *data = (struct ivopts_data *) vdata;
1724 find_interesting_uses_op (data, *idx);
1725 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1727 find_interesting_uses_op (data, array_ref_element_size (base));
1728 find_interesting_uses_op (data, array_ref_low_bound (base));
1730 return true;
1733 /* If we can prove that TOP = cst * BOT for some constant cst,
1734 store cst to MUL and return true. Otherwise return false.
1735 The returned value is always sign-extended, regardless of the
1736 signedness of TOP and BOT. */
1738 static bool
1739 constant_multiple_of (tree top, tree bot, widest_int *mul)
1741 tree mby;
1742 enum tree_code code;
1743 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1744 widest_int res, p0, p1;
1746 STRIP_NOPS (top);
1747 STRIP_NOPS (bot);
1749 if (operand_equal_p (top, bot, 0))
1751 *mul = 1;
1752 return true;
1755 code = TREE_CODE (top);
1756 switch (code)
1758 case MULT_EXPR:
1759 mby = TREE_OPERAND (top, 1);
1760 if (TREE_CODE (mby) != INTEGER_CST)
1761 return false;
1763 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1764 return false;
1766 *mul = wi::sext (res * wi::to_widest (mby), precision);
1767 return true;
1769 case PLUS_EXPR:
1770 case MINUS_EXPR:
1771 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1772 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1773 return false;
1775 if (code == MINUS_EXPR)
1776 p1 = -p1;
1777 *mul = wi::sext (p0 + p1, precision);
1778 return true;
1780 case INTEGER_CST:
1781 if (TREE_CODE (bot) != INTEGER_CST)
1782 return false;
1784 p0 = widest_int::from (top, SIGNED);
1785 p1 = widest_int::from (bot, SIGNED);
1786 if (p1 == 0)
1787 return false;
1788 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1789 return res == 0;
1791 default:
1792 return false;
1796 /* Return true if memory reference REF with step STEP may be unaligned. */
1798 static bool
1799 may_be_unaligned_p (tree ref, tree step)
1801 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1802 thus they are not misaligned. */
1803 if (TREE_CODE (ref) == TARGET_MEM_REF)
1804 return false;
1806 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1807 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1808 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1810 unsigned HOST_WIDE_INT bitpos;
1811 unsigned int ref_align;
1812 get_object_alignment_1 (ref, &ref_align, &bitpos);
1813 if (ref_align < align
1814 || (bitpos % align) != 0
1815 || (bitpos % BITS_PER_UNIT) != 0)
1816 return true;
1818 unsigned int trailing_zeros = tree_ctz (step);
1819 if (trailing_zeros < HOST_BITS_PER_INT
1820 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1821 return true;
1823 return false;
1826 /* Return true if EXPR may be non-addressable. */
1828 bool
1829 may_be_nonaddressable_p (tree expr)
1831 switch (TREE_CODE (expr))
1833 case TARGET_MEM_REF:
1834 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1835 target, thus they are always addressable. */
1836 return false;
1838 case COMPONENT_REF:
1839 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1840 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842 case VIEW_CONVERT_EXPR:
1843 /* This kind of view-conversions may wrap non-addressable objects
1844 and make them look addressable. After some processing the
1845 non-addressability may be uncovered again, causing ADDR_EXPRs
1846 of inappropriate objects to be built. */
1847 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1848 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1849 return true;
1851 /* ... fall through ... */
1853 case ARRAY_REF:
1854 case ARRAY_RANGE_REF:
1855 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1857 CASE_CONVERT:
1858 return true;
1860 default:
1861 break;
1864 return false;
1867 static tree
1868 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1870 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1871 If there is an existing use which has same stripped iv base and step,
1872 this function records this one as a sub use to that; otherwise records
1873 it as a normal one. */
1875 static struct iv_use *
1876 record_group_use (struct ivopts_data *data, tree *use_p,
1877 struct iv *iv, gimple stmt, enum use_type use_type)
1879 unsigned int i;
1880 struct iv_use *use;
1881 tree addr_base;
1882 unsigned HOST_WIDE_INT addr_offset;
1884 /* Only support sub use for address type uses, that is, with base
1885 object. */
1886 if (!iv->base_object)
1887 return record_use (data, use_p, iv, stmt, use_type);
1889 addr_base = strip_offset (iv->base, &addr_offset);
1890 for (i = 0; i < n_iv_uses (data); i++)
1892 use = iv_use (data, i);
1893 if (use->type != USE_ADDRESS || !use->iv->base_object)
1894 continue;
1896 /* Check if it has the same stripped base and step. */
1897 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1898 && operand_equal_p (iv->step, use->iv->step, 0)
1899 && operand_equal_p (addr_base, use->addr_base, 0))
1900 break;
1903 if (i == n_iv_uses (data))
1904 return record_use (data, use_p, iv, stmt,
1905 use_type, addr_base, addr_offset);
1906 else
1907 return record_sub_use (data, use_p, iv, stmt,
1908 use_type, addr_base, addr_offset, i);
1911 /* Finds addresses in *OP_P inside STMT. */
1913 static void
1914 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1916 tree base = *op_p, step = size_zero_node;
1917 struct iv *civ;
1918 struct ifs_ivopts_data ifs_ivopts_data;
1920 /* Do not play with volatile memory references. A bit too conservative,
1921 perhaps, but safe. */
1922 if (gimple_has_volatile_ops (stmt))
1923 goto fail;
1925 /* Ignore bitfields for now. Not really something terribly complicated
1926 to handle. TODO. */
1927 if (TREE_CODE (base) == BIT_FIELD_REF)
1928 goto fail;
1930 base = unshare_expr (base);
1932 if (TREE_CODE (base) == TARGET_MEM_REF)
1934 tree type = build_pointer_type (TREE_TYPE (base));
1935 tree astep;
1937 if (TMR_BASE (base)
1938 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1940 civ = get_iv (data, TMR_BASE (base));
1941 if (!civ)
1942 goto fail;
1944 TMR_BASE (base) = civ->base;
1945 step = civ->step;
1947 if (TMR_INDEX2 (base)
1948 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1950 civ = get_iv (data, TMR_INDEX2 (base));
1951 if (!civ)
1952 goto fail;
1954 TMR_INDEX2 (base) = civ->base;
1955 step = civ->step;
1957 if (TMR_INDEX (base)
1958 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1960 civ = get_iv (data, TMR_INDEX (base));
1961 if (!civ)
1962 goto fail;
1964 TMR_INDEX (base) = civ->base;
1965 astep = civ->step;
1967 if (astep)
1969 if (TMR_STEP (base))
1970 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1972 step = fold_build2 (PLUS_EXPR, type, step, astep);
1976 if (integer_zerop (step))
1977 goto fail;
1978 base = tree_mem_ref_addr (type, base);
1980 else
1982 ifs_ivopts_data.ivopts_data = data;
1983 ifs_ivopts_data.stmt = stmt;
1984 ifs_ivopts_data.step = size_zero_node;
1985 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1986 || integer_zerop (ifs_ivopts_data.step))
1987 goto fail;
1988 step = ifs_ivopts_data.step;
1990 /* Check that the base expression is addressable. This needs
1991 to be done after substituting bases of IVs into it. */
1992 if (may_be_nonaddressable_p (base))
1993 goto fail;
1995 /* Moreover, on strict alignment platforms, check that it is
1996 sufficiently aligned. */
1997 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1998 goto fail;
2000 base = build_fold_addr_expr (base);
2002 /* Substituting bases of IVs into the base expression might
2003 have caused folding opportunities. */
2004 if (TREE_CODE (base) == ADDR_EXPR)
2006 tree *ref = &TREE_OPERAND (base, 0);
2007 while (handled_component_p (*ref))
2008 ref = &TREE_OPERAND (*ref, 0);
2009 if (TREE_CODE (*ref) == MEM_REF)
2011 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2012 TREE_OPERAND (*ref, 0),
2013 TREE_OPERAND (*ref, 1));
2014 if (tem)
2015 *ref = tem;
2020 civ = alloc_iv (data, base, step);
2021 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2022 return;
2024 fail:
2025 for_each_index (op_p, idx_record_use, data);
2028 /* Finds and records invariants used in STMT. */
2030 static void
2031 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2033 ssa_op_iter iter;
2034 use_operand_p use_p;
2035 tree op;
2037 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2039 op = USE_FROM_PTR (use_p);
2040 record_invariant (data, op, false);
2044 /* Finds interesting uses of induction variables in the statement STMT. */
2046 static void
2047 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2049 struct iv *iv;
2050 tree op, *lhs, *rhs;
2051 ssa_op_iter iter;
2052 use_operand_p use_p;
2053 enum tree_code code;
2055 find_invariants_stmt (data, stmt);
2057 if (gimple_code (stmt) == GIMPLE_COND)
2059 find_interesting_uses_cond (data, stmt);
2060 return;
2063 if (is_gimple_assign (stmt))
2065 lhs = gimple_assign_lhs_ptr (stmt);
2066 rhs = gimple_assign_rhs1_ptr (stmt);
2068 if (TREE_CODE (*lhs) == SSA_NAME)
2070 /* If the statement defines an induction variable, the uses are not
2071 interesting by themselves. */
2073 iv = get_iv (data, *lhs);
2075 if (iv && !integer_zerop (iv->step))
2076 return;
2079 code = gimple_assign_rhs_code (stmt);
2080 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2081 && (REFERENCE_CLASS_P (*rhs)
2082 || is_gimple_val (*rhs)))
2084 if (REFERENCE_CLASS_P (*rhs))
2085 find_interesting_uses_address (data, stmt, rhs);
2086 else
2087 find_interesting_uses_op (data, *rhs);
2089 if (REFERENCE_CLASS_P (*lhs))
2090 find_interesting_uses_address (data, stmt, lhs);
2091 return;
2093 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2095 find_interesting_uses_cond (data, stmt);
2096 return;
2099 /* TODO -- we should also handle address uses of type
2101 memory = call (whatever);
2105 call (memory). */
2108 if (gimple_code (stmt) == GIMPLE_PHI
2109 && gimple_bb (stmt) == data->current_loop->header)
2111 iv = get_iv (data, PHI_RESULT (stmt));
2113 if (iv && !integer_zerop (iv->step))
2114 return;
2117 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2119 op = USE_FROM_PTR (use_p);
2121 if (TREE_CODE (op) != SSA_NAME)
2122 continue;
2124 iv = get_iv (data, op);
2125 if (!iv)
2126 continue;
2128 find_interesting_uses_op (data, op);
2132 /* Finds interesting uses of induction variables outside of loops
2133 on loop exit edge EXIT. */
2135 static void
2136 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2138 gphi *phi;
2139 gphi_iterator psi;
2140 tree def;
2142 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2144 phi = psi.phi ();
2145 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2146 if (!virtual_operand_p (def))
2147 find_interesting_uses_op (data, def);
2151 /* Finds uses of the induction variables that are interesting. */
2153 static void
2154 find_interesting_uses (struct ivopts_data *data)
2156 basic_block bb;
2157 gimple_stmt_iterator bsi;
2158 basic_block *body = get_loop_body (data->current_loop);
2159 unsigned i;
2160 struct version_info *info;
2161 edge e;
2163 if (dump_file && (dump_flags & TDF_DETAILS))
2164 fprintf (dump_file, "Uses:\n\n");
2166 for (i = 0; i < data->current_loop->num_nodes; i++)
2168 edge_iterator ei;
2169 bb = body[i];
2171 FOR_EACH_EDGE (e, ei, bb->succs)
2172 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2173 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2174 find_interesting_uses_outside (data, e);
2176 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2177 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2178 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2179 if (!is_gimple_debug (gsi_stmt (bsi)))
2180 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2185 bitmap_iterator bi;
2187 fprintf (dump_file, "\n");
2189 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2191 info = ver_info (data, i);
2192 if (info->inv_id)
2194 fprintf (dump_file, " ");
2195 print_generic_expr (dump_file, info->name, TDF_SLIM);
2196 fprintf (dump_file, " is invariant (%d)%s\n",
2197 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2201 fprintf (dump_file, "\n");
2204 free (body);
2207 /* Compute maximum offset of [base + offset] addressing mode
2208 for memory reference represented by USE. */
2210 static HOST_WIDE_INT
2211 compute_max_addr_offset (struct iv_use *use)
2213 int width;
2214 rtx reg, addr;
2215 HOST_WIDE_INT i, off;
2216 unsigned list_index, num;
2217 addr_space_t as;
2218 machine_mode mem_mode, addr_mode;
2219 static vec<HOST_WIDE_INT> max_offset_list;
2221 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2222 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2224 num = max_offset_list.length ();
2225 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2226 if (list_index >= num)
2228 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2229 for (; num < max_offset_list.length (); num++)
2230 max_offset_list[num] = -1;
2233 off = max_offset_list[list_index];
2234 if (off != -1)
2235 return off;
2237 addr_mode = targetm.addr_space.address_mode (as);
2238 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2239 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2241 width = GET_MODE_BITSIZE (addr_mode) - 1;
2242 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2243 width = HOST_BITS_PER_WIDE_INT - 1;
2245 for (i = width; i > 0; i--)
2247 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2248 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2249 if (memory_address_addr_space_p (mem_mode, addr, as))
2250 break;
2252 /* For some strict-alignment targets, the offset must be naturally
2253 aligned. Try an aligned offset if mem_mode is not QImode. */
2254 off = ((unsigned HOST_WIDE_INT) 1 << i);
2255 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2257 off -= GET_MODE_SIZE (mem_mode);
2258 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2259 if (memory_address_addr_space_p (mem_mode, addr, as))
2260 break;
2263 if (i == 0)
2264 off = 0;
2266 max_offset_list[list_index] = off;
2267 return off;
2270 /* Check if all small groups should be split. Return true if and
2271 only if:
2273 1) At least one groups contain two uses with different offsets.
2274 2) No group contains more than two uses with different offsets.
2276 Return false otherwise. We want to split such groups because:
2278 1) Small groups don't have much benefit and may interfer with
2279 general candidate selection.
2280 2) Size for problem with only small groups is usually small and
2281 general algorithm can handle it well.
2283 TODO -- Above claim may not hold when auto increment is supported. */
2285 static bool
2286 split_all_small_groups (struct ivopts_data *data)
2288 bool split_p = false;
2289 unsigned int i, n, distinct;
2290 struct iv_use *pre, *use;
2292 n = n_iv_uses (data);
2293 for (i = 0; i < n; i++)
2295 use = iv_use (data, i);
2296 if (!use->next)
2297 continue;
2299 distinct = 1;
2300 gcc_assert (use->type == USE_ADDRESS);
2301 for (pre = use, use = use->next; use; pre = use, use = use->next)
2303 if (pre->addr_offset != use->addr_offset)
2304 distinct++;
2306 if (distinct > 2)
2307 return false;
2309 if (distinct == 2)
2310 split_p = true;
2313 return split_p;
2316 /* For each group of address type uses, this function further groups
2317 these uses according to the maximum offset supported by target's
2318 [base + offset] addressing mode. */
2320 static void
2321 group_address_uses (struct ivopts_data *data)
2323 HOST_WIDE_INT max_offset = -1;
2324 unsigned int i, n, sub_id;
2325 struct iv_use *pre, *use;
2326 unsigned HOST_WIDE_INT addr_offset_first;
2328 /* Reset max offset to split all small groups. */
2329 if (split_all_small_groups (data))
2330 max_offset = 0;
2332 n = n_iv_uses (data);
2333 for (i = 0; i < n; i++)
2335 use = iv_use (data, i);
2336 if (!use->next)
2337 continue;
2339 gcc_assert (use->type == USE_ADDRESS);
2340 if (max_offset != 0)
2341 max_offset = compute_max_addr_offset (use);
2343 while (use)
2345 sub_id = 0;
2346 addr_offset_first = use->addr_offset;
2347 /* Only uses with offset that can fit in offset part against
2348 the first use can be grouped together. */
2349 for (pre = use, use = use->next;
2350 use && (use->addr_offset - addr_offset_first
2351 <= (unsigned HOST_WIDE_INT) max_offset);
2352 pre = use, use = use->next)
2354 use->id = pre->id;
2355 use->sub_id = ++sub_id;
2358 /* Break the list and create new group. */
2359 if (use)
2361 pre->next = NULL;
2362 use->id = n_iv_uses (data);
2363 use->related_cands = BITMAP_ALLOC (NULL);
2364 data->iv_uses.safe_push (use);
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 dump_uses (dump_file, data);
2373 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2374 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2375 we are at the top-level of the processed address. */
2377 static tree
2378 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2379 HOST_WIDE_INT *offset)
2381 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2382 enum tree_code code;
2383 tree type, orig_type = TREE_TYPE (expr);
2384 HOST_WIDE_INT off0, off1, st;
2385 tree orig_expr = expr;
2387 STRIP_NOPS (expr);
2389 type = TREE_TYPE (expr);
2390 code = TREE_CODE (expr);
2391 *offset = 0;
2393 switch (code)
2395 case INTEGER_CST:
2396 if (!cst_and_fits_in_hwi (expr)
2397 || integer_zerop (expr))
2398 return orig_expr;
2400 *offset = int_cst_value (expr);
2401 return build_int_cst (orig_type, 0);
2403 case POINTER_PLUS_EXPR:
2404 case PLUS_EXPR:
2405 case MINUS_EXPR:
2406 op0 = TREE_OPERAND (expr, 0);
2407 op1 = TREE_OPERAND (expr, 1);
2409 op0 = strip_offset_1 (op0, false, false, &off0);
2410 op1 = strip_offset_1 (op1, false, false, &off1);
2412 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2413 if (op0 == TREE_OPERAND (expr, 0)
2414 && op1 == TREE_OPERAND (expr, 1))
2415 return orig_expr;
2417 if (integer_zerop (op1))
2418 expr = op0;
2419 else if (integer_zerop (op0))
2421 if (code == MINUS_EXPR)
2422 expr = fold_build1 (NEGATE_EXPR, type, op1);
2423 else
2424 expr = op1;
2426 else
2427 expr = fold_build2 (code, type, op0, op1);
2429 return fold_convert (orig_type, expr);
2431 case MULT_EXPR:
2432 op1 = TREE_OPERAND (expr, 1);
2433 if (!cst_and_fits_in_hwi (op1))
2434 return orig_expr;
2436 op0 = TREE_OPERAND (expr, 0);
2437 op0 = strip_offset_1 (op0, false, false, &off0);
2438 if (op0 == TREE_OPERAND (expr, 0))
2439 return orig_expr;
2441 *offset = off0 * int_cst_value (op1);
2442 if (integer_zerop (op0))
2443 expr = op0;
2444 else
2445 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2447 return fold_convert (orig_type, expr);
2449 case ARRAY_REF:
2450 case ARRAY_RANGE_REF:
2451 if (!inside_addr)
2452 return orig_expr;
2454 step = array_ref_element_size (expr);
2455 if (!cst_and_fits_in_hwi (step))
2456 break;
2458 st = int_cst_value (step);
2459 op1 = TREE_OPERAND (expr, 1);
2460 op1 = strip_offset_1 (op1, false, false, &off1);
2461 *offset = off1 * st;
2463 if (top_compref
2464 && integer_zerop (op1))
2466 /* Strip the component reference completely. */
2467 op0 = TREE_OPERAND (expr, 0);
2468 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2469 *offset += off0;
2470 return op0;
2472 break;
2474 case COMPONENT_REF:
2476 tree field;
2478 if (!inside_addr)
2479 return orig_expr;
2481 tmp = component_ref_field_offset (expr);
2482 field = TREE_OPERAND (expr, 1);
2483 if (top_compref
2484 && cst_and_fits_in_hwi (tmp)
2485 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2487 HOST_WIDE_INT boffset, abs_off;
2489 /* Strip the component reference completely. */
2490 op0 = TREE_OPERAND (expr, 0);
2491 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2492 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2493 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2494 if (boffset < 0)
2495 abs_off = -abs_off;
2497 *offset = off0 + int_cst_value (tmp) + abs_off;
2498 return op0;
2501 break;
2503 case ADDR_EXPR:
2504 op0 = TREE_OPERAND (expr, 0);
2505 op0 = strip_offset_1 (op0, true, true, &off0);
2506 *offset += off0;
2508 if (op0 == TREE_OPERAND (expr, 0))
2509 return orig_expr;
2511 expr = build_fold_addr_expr (op0);
2512 return fold_convert (orig_type, expr);
2514 case MEM_REF:
2515 /* ??? Offset operand? */
2516 inside_addr = false;
2517 break;
2519 default:
2520 return orig_expr;
2523 /* Default handling of expressions for that we want to recurse into
2524 the first operand. */
2525 op0 = TREE_OPERAND (expr, 0);
2526 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2527 *offset += off0;
2529 if (op0 == TREE_OPERAND (expr, 0)
2530 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2531 return orig_expr;
2533 expr = copy_node (expr);
2534 TREE_OPERAND (expr, 0) = op0;
2535 if (op1)
2536 TREE_OPERAND (expr, 1) = op1;
2538 /* Inside address, we might strip the top level component references,
2539 thus changing type of the expression. Handling of ADDR_EXPR
2540 will fix that. */
2541 expr = fold_convert (orig_type, expr);
2543 return expr;
2546 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2548 static tree
2549 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2551 HOST_WIDE_INT off;
2552 tree core = strip_offset_1 (expr, false, false, &off);
2553 *offset = off;
2554 return core;
2557 /* Returns variant of TYPE that can be used as base for different uses.
2558 We return unsigned type with the same precision, which avoids problems
2559 with overflows. */
2561 static tree
2562 generic_type_for (tree type)
2564 if (POINTER_TYPE_P (type))
2565 return unsigned_type_for (type);
2567 if (TYPE_UNSIGNED (type))
2568 return type;
2570 return unsigned_type_for (type);
2573 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2574 the bitmap to that we should store it. */
2576 static struct ivopts_data *fd_ivopts_data;
2577 static tree
2578 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2580 bitmap *depends_on = (bitmap *) data;
2581 struct version_info *info;
2583 if (TREE_CODE (*expr_p) != SSA_NAME)
2584 return NULL_TREE;
2585 info = name_info (fd_ivopts_data, *expr_p);
2587 if (!info->inv_id || info->has_nonlin_use)
2588 return NULL_TREE;
2590 if (!*depends_on)
2591 *depends_on = BITMAP_ALLOC (NULL);
2592 bitmap_set_bit (*depends_on, info->inv_id);
2594 return NULL_TREE;
2597 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2598 position to POS. If USE is not NULL, the candidate is set as related to
2599 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2600 replacement of the final value of the iv by a direct computation. */
2602 static struct iv_cand *
2603 add_candidate_1 (struct ivopts_data *data,
2604 tree base, tree step, bool important, enum iv_position pos,
2605 struct iv_use *use, gimple incremented_at)
2607 unsigned i;
2608 struct iv_cand *cand = NULL;
2609 tree type, orig_type;
2611 /* For non-original variables, make sure their values are computed in a type
2612 that does not invoke undefined behavior on overflows (since in general,
2613 we cannot prove that these induction variables are non-wrapping). */
2614 if (pos != IP_ORIGINAL)
2616 orig_type = TREE_TYPE (base);
2617 type = generic_type_for (orig_type);
2618 if (type != orig_type)
2620 base = fold_convert (type, base);
2621 step = fold_convert (type, step);
2625 for (i = 0; i < n_iv_cands (data); i++)
2627 cand = iv_cand (data, i);
2629 if (cand->pos != pos)
2630 continue;
2632 if (cand->incremented_at != incremented_at
2633 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2634 && cand->ainc_use != use))
2635 continue;
2637 if (!cand->iv)
2639 if (!base && !step)
2640 break;
2642 continue;
2645 if (!base && !step)
2646 continue;
2648 if (operand_equal_p (base, cand->iv->base, 0)
2649 && operand_equal_p (step, cand->iv->step, 0)
2650 && (TYPE_PRECISION (TREE_TYPE (base))
2651 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2652 break;
2655 if (i == n_iv_cands (data))
2657 cand = XCNEW (struct iv_cand);
2658 cand->id = i;
2660 if (!base && !step)
2661 cand->iv = NULL;
2662 else
2663 cand->iv = alloc_iv (data, base, step);
2665 cand->pos = pos;
2666 if (pos != IP_ORIGINAL && cand->iv)
2668 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2669 cand->var_after = cand->var_before;
2671 cand->important = important;
2672 cand->incremented_at = incremented_at;
2673 data->iv_candidates.safe_push (cand);
2675 if (step
2676 && TREE_CODE (step) != INTEGER_CST)
2678 fd_ivopts_data = data;
2679 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2682 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2683 cand->ainc_use = use;
2684 else
2685 cand->ainc_use = NULL;
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 dump_cand (dump_file, cand);
2691 if (important && !cand->important)
2693 cand->important = true;
2694 if (dump_file && (dump_flags & TDF_DETAILS))
2695 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2698 if (use)
2700 bitmap_set_bit (use->related_cands, i);
2701 if (dump_file && (dump_flags & TDF_DETAILS))
2702 fprintf (dump_file, "Candidate %d is related to use %d\n",
2703 cand->id, use->id);
2706 return cand;
2709 /* Returns true if incrementing the induction variable at the end of the LOOP
2710 is allowed.
2712 The purpose is to avoid splitting latch edge with a biv increment, thus
2713 creating a jump, possibly confusing other optimization passes and leaving
2714 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2715 is not available (so we do not have a better alternative), or if the latch
2716 edge is already nonempty. */
2718 static bool
2719 allow_ip_end_pos_p (struct loop *loop)
2721 if (!ip_normal_pos (loop))
2722 return true;
2724 if (!empty_block_p (ip_end_pos (loop)))
2725 return true;
2727 return false;
2730 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2731 Important field is set to IMPORTANT. */
2733 static void
2734 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2735 bool important, struct iv_use *use)
2737 basic_block use_bb = gimple_bb (use->stmt);
2738 machine_mode mem_mode;
2739 unsigned HOST_WIDE_INT cstepi;
2741 /* If we insert the increment in any position other than the standard
2742 ones, we must ensure that it is incremented once per iteration.
2743 It must not be in an inner nested loop, or one side of an if
2744 statement. */
2745 if (use_bb->loop_father != data->current_loop
2746 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2747 || stmt_could_throw_p (use->stmt)
2748 || !cst_and_fits_in_hwi (step))
2749 return;
2751 cstepi = int_cst_value (step);
2753 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2754 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2755 || USE_STORE_PRE_INCREMENT (mem_mode))
2756 && GET_MODE_SIZE (mem_mode) == cstepi)
2757 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2758 || USE_STORE_PRE_DECREMENT (mem_mode))
2759 && GET_MODE_SIZE (mem_mode) == -cstepi))
2761 enum tree_code code = MINUS_EXPR;
2762 tree new_base;
2763 tree new_step = step;
2765 if (POINTER_TYPE_P (TREE_TYPE (base)))
2767 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2768 code = POINTER_PLUS_EXPR;
2770 else
2771 new_step = fold_convert (TREE_TYPE (base), new_step);
2772 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2773 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2774 use->stmt);
2776 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2777 || USE_STORE_POST_INCREMENT (mem_mode))
2778 && GET_MODE_SIZE (mem_mode) == cstepi)
2779 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2780 || USE_STORE_POST_DECREMENT (mem_mode))
2781 && GET_MODE_SIZE (mem_mode) == -cstepi))
2783 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2784 use->stmt);
2788 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2789 position to POS. If USE is not NULL, the candidate is set as related to
2790 it. The candidate computation is scheduled on all available positions. */
2792 static void
2793 add_candidate (struct ivopts_data *data,
2794 tree base, tree step, bool important, struct iv_use *use)
2796 gcc_assert (use == NULL || use->sub_id == 0);
2798 if (ip_normal_pos (data->current_loop))
2799 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2800 if (ip_end_pos (data->current_loop)
2801 && allow_ip_end_pos_p (data->current_loop))
2802 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2804 if (use != NULL && use->type == USE_ADDRESS)
2805 add_autoinc_candidates (data, base, step, important, use);
2808 /* Adds standard iv candidates. */
2810 static void
2811 add_standard_iv_candidates (struct ivopts_data *data)
2813 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2815 /* The same for a double-integer type if it is still fast enough. */
2816 if (TYPE_PRECISION
2817 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2818 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2819 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2820 build_int_cst (long_integer_type_node, 1), true, NULL);
2822 /* The same for a double-integer type if it is still fast enough. */
2823 if (TYPE_PRECISION
2824 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2825 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2826 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2827 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2831 /* Adds candidates bases on the old induction variable IV. */
2833 static void
2834 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2836 gimple phi;
2837 tree def;
2838 struct iv_cand *cand;
2840 add_candidate (data, iv->base, iv->step, true, NULL);
2842 /* The same, but with initial value zero. */
2843 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2844 add_candidate (data, size_int (0), iv->step, true, NULL);
2845 else
2846 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2847 iv->step, true, NULL);
2849 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2850 if (gimple_code (phi) == GIMPLE_PHI)
2852 /* Additionally record the possibility of leaving the original iv
2853 untouched. */
2854 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2855 /* Don't add candidate if it's from another PHI node because
2856 it's an affine iv appearing in the form of PEELED_CHREC. */
2857 phi = SSA_NAME_DEF_STMT (def);
2858 if (gimple_code (phi) != GIMPLE_PHI)
2860 cand = add_candidate_1 (data,
2861 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2862 SSA_NAME_DEF_STMT (def));
2863 cand->var_before = iv->ssa_name;
2864 cand->var_after = def;
2866 else
2867 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2871 /* Adds candidates based on the old induction variables. */
2873 static void
2874 add_old_ivs_candidates (struct ivopts_data *data)
2876 unsigned i;
2877 struct iv *iv;
2878 bitmap_iterator bi;
2880 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2882 iv = ver_info (data, i)->iv;
2883 if (iv && iv->biv_p && !integer_zerop (iv->step))
2884 add_old_iv_candidates (data, iv);
2888 /* Adds candidates based on the value of the induction variable IV and USE. */
2890 static void
2891 add_iv_value_candidates (struct ivopts_data *data,
2892 struct iv *iv, struct iv_use *use)
2894 unsigned HOST_WIDE_INT offset;
2895 tree base;
2896 tree basetype;
2898 add_candidate (data, iv->base, iv->step, false, use);
2900 /* The same, but with initial value zero. Make such variable important,
2901 since it is generic enough so that possibly many uses may be based
2902 on it. */
2903 basetype = TREE_TYPE (iv->base);
2904 if (POINTER_TYPE_P (basetype))
2905 basetype = sizetype;
2906 add_candidate (data, build_int_cst (basetype, 0),
2907 iv->step, true, use);
2909 /* Third, try removing the constant offset. Make sure to even
2910 add a candidate for &a[0] vs. (T *)&a. */
2911 base = strip_offset (iv->base, &offset);
2912 if (offset
2913 || base != iv->base)
2914 add_candidate (data, base, iv->step, false, use);
2917 /* Adds candidates based on the uses. */
2919 static void
2920 add_derived_ivs_candidates (struct ivopts_data *data)
2922 unsigned i;
2924 for (i = 0; i < n_iv_uses (data); i++)
2926 struct iv_use *use = iv_use (data, i);
2928 if (!use)
2929 continue;
2931 switch (use->type)
2933 case USE_NONLINEAR_EXPR:
2934 case USE_COMPARE:
2935 case USE_ADDRESS:
2936 /* Just add the ivs based on the value of the iv used here. */
2937 add_iv_value_candidates (data, use->iv, use);
2938 break;
2940 default:
2941 gcc_unreachable ();
2946 /* Record important candidates and add them to related_cands bitmaps
2947 if needed. */
2949 static void
2950 record_important_candidates (struct ivopts_data *data)
2952 unsigned i;
2953 struct iv_use *use;
2955 for (i = 0; i < n_iv_cands (data); i++)
2957 struct iv_cand *cand = iv_cand (data, i);
2959 if (cand->important)
2960 bitmap_set_bit (data->important_candidates, i);
2963 data->consider_all_candidates = (n_iv_cands (data)
2964 <= CONSIDER_ALL_CANDIDATES_BOUND);
2966 if (data->consider_all_candidates)
2968 /* We will not need "related_cands" bitmaps in this case,
2969 so release them to decrease peak memory consumption. */
2970 for (i = 0; i < n_iv_uses (data); i++)
2972 use = iv_use (data, i);
2973 BITMAP_FREE (use->related_cands);
2976 else
2978 /* Add important candidates to the related_cands bitmaps. */
2979 for (i = 0; i < n_iv_uses (data); i++)
2980 bitmap_ior_into (iv_use (data, i)->related_cands,
2981 data->important_candidates);
2985 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2986 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2987 we allocate a simple list to every use. */
2989 static void
2990 alloc_use_cost_map (struct ivopts_data *data)
2992 unsigned i, size, s;
2994 for (i = 0; i < n_iv_uses (data); i++)
2996 struct iv_use *use = iv_use (data, i);
2998 if (data->consider_all_candidates)
2999 size = n_iv_cands (data);
3000 else
3002 s = bitmap_count_bits (use->related_cands);
3004 /* Round up to the power of two, so that moduling by it is fast. */
3005 size = s ? (1 << ceil_log2 (s)) : 1;
3008 use->n_map_members = size;
3009 use->cost_map = XCNEWVEC (struct cost_pair, size);
3013 /* Returns description of computation cost of expression whose runtime
3014 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3016 static comp_cost
3017 new_cost (unsigned runtime, unsigned complexity)
3019 comp_cost cost;
3021 cost.cost = runtime;
3022 cost.complexity = complexity;
3024 return cost;
3027 /* Returns true if COST is infinite. */
3029 static bool
3030 infinite_cost_p (comp_cost cost)
3032 return cost.cost == INFTY;
3035 /* Adds costs COST1 and COST2. */
3037 static comp_cost
3038 add_costs (comp_cost cost1, comp_cost cost2)
3040 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3041 return infinite_cost;
3043 cost1.cost += cost2.cost;
3044 cost1.complexity += cost2.complexity;
3046 return cost1;
3048 /* Subtracts costs COST1 and COST2. */
3050 static comp_cost
3051 sub_costs (comp_cost cost1, comp_cost cost2)
3053 cost1.cost -= cost2.cost;
3054 cost1.complexity -= cost2.complexity;
3056 return cost1;
3059 /* Returns a negative number if COST1 < COST2, a positive number if
3060 COST1 > COST2, and 0 if COST1 = COST2. */
3062 static int
3063 compare_costs (comp_cost cost1, comp_cost cost2)
3065 if (cost1.cost == cost2.cost)
3066 return cost1.complexity - cost2.complexity;
3068 return cost1.cost - cost2.cost;
3071 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3072 on invariants DEPENDS_ON and that the value used in expressing it
3073 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3075 static void
3076 set_use_iv_cost (struct ivopts_data *data,
3077 struct iv_use *use, struct iv_cand *cand,
3078 comp_cost cost, bitmap depends_on, tree value,
3079 enum tree_code comp, int inv_expr_id)
3081 unsigned i, s;
3083 if (infinite_cost_p (cost))
3085 BITMAP_FREE (depends_on);
3086 return;
3089 if (data->consider_all_candidates)
3091 use->cost_map[cand->id].cand = cand;
3092 use->cost_map[cand->id].cost = cost;
3093 use->cost_map[cand->id].depends_on = depends_on;
3094 use->cost_map[cand->id].value = value;
3095 use->cost_map[cand->id].comp = comp;
3096 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3097 return;
3100 /* n_map_members is a power of two, so this computes modulo. */
3101 s = cand->id & (use->n_map_members - 1);
3102 for (i = s; i < use->n_map_members; i++)
3103 if (!use->cost_map[i].cand)
3104 goto found;
3105 for (i = 0; i < s; i++)
3106 if (!use->cost_map[i].cand)
3107 goto found;
3109 gcc_unreachable ();
3111 found:
3112 use->cost_map[i].cand = cand;
3113 use->cost_map[i].cost = cost;
3114 use->cost_map[i].depends_on = depends_on;
3115 use->cost_map[i].value = value;
3116 use->cost_map[i].comp = comp;
3117 use->cost_map[i].inv_expr_id = inv_expr_id;
3120 /* Gets cost of (USE, CANDIDATE) pair. */
3122 static struct cost_pair *
3123 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3124 struct iv_cand *cand)
3126 unsigned i, s;
3127 struct cost_pair *ret;
3129 if (!cand)
3130 return NULL;
3132 if (data->consider_all_candidates)
3134 ret = use->cost_map + cand->id;
3135 if (!ret->cand)
3136 return NULL;
3138 return ret;
3141 /* n_map_members is a power of two, so this computes modulo. */
3142 s = cand->id & (use->n_map_members - 1);
3143 for (i = s; i < use->n_map_members; i++)
3144 if (use->cost_map[i].cand == cand)
3145 return use->cost_map + i;
3146 else if (use->cost_map[i].cand == NULL)
3147 return NULL;
3148 for (i = 0; i < s; i++)
3149 if (use->cost_map[i].cand == cand)
3150 return use->cost_map + i;
3151 else if (use->cost_map[i].cand == NULL)
3152 return NULL;
3154 return NULL;
3157 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3158 static rtx
3159 produce_memory_decl_rtl (tree obj, int *regno)
3161 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3162 machine_mode address_mode = targetm.addr_space.address_mode (as);
3163 rtx x;
3165 gcc_assert (obj);
3166 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3168 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3169 x = gen_rtx_SYMBOL_REF (address_mode, name);
3170 SET_SYMBOL_REF_DECL (x, obj);
3171 x = gen_rtx_MEM (DECL_MODE (obj), x);
3172 set_mem_addr_space (x, as);
3173 targetm.encode_section_info (obj, x, true);
3175 else
3177 x = gen_raw_REG (address_mode, (*regno)++);
3178 x = gen_rtx_MEM (DECL_MODE (obj), x);
3179 set_mem_addr_space (x, as);
3182 return x;
3185 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3186 walk_tree. DATA contains the actual fake register number. */
3188 static tree
3189 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3191 tree obj = NULL_TREE;
3192 rtx x = NULL_RTX;
3193 int *regno = (int *) data;
3195 switch (TREE_CODE (*expr_p))
3197 case ADDR_EXPR:
3198 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3199 handled_component_p (*expr_p);
3200 expr_p = &TREE_OPERAND (*expr_p, 0))
3201 continue;
3202 obj = *expr_p;
3203 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3204 x = produce_memory_decl_rtl (obj, regno);
3205 break;
3207 case SSA_NAME:
3208 *ws = 0;
3209 obj = SSA_NAME_VAR (*expr_p);
3210 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3211 if (!obj)
3212 return NULL_TREE;
3213 if (!DECL_RTL_SET_P (obj))
3214 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3215 break;
3217 case VAR_DECL:
3218 case PARM_DECL:
3219 case RESULT_DECL:
3220 *ws = 0;
3221 obj = *expr_p;
3223 if (DECL_RTL_SET_P (obj))
3224 break;
3226 if (DECL_MODE (obj) == BLKmode)
3227 x = produce_memory_decl_rtl (obj, regno);
3228 else
3229 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3231 break;
3233 default:
3234 break;
3237 if (x)
3239 decl_rtl_to_reset.safe_push (obj);
3240 SET_DECL_RTL (obj, x);
3243 return NULL_TREE;
3246 /* Determines cost of the computation of EXPR. */
3248 static unsigned
3249 computation_cost (tree expr, bool speed)
3251 rtx_insn *seq;
3252 rtx rslt;
3253 tree type = TREE_TYPE (expr);
3254 unsigned cost;
3255 /* Avoid using hard regs in ways which may be unsupported. */
3256 int regno = LAST_VIRTUAL_REGISTER + 1;
3257 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3258 enum node_frequency real_frequency = node->frequency;
3260 node->frequency = NODE_FREQUENCY_NORMAL;
3261 crtl->maybe_hot_insn_p = speed;
3262 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3263 start_sequence ();
3264 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3265 seq = get_insns ();
3266 end_sequence ();
3267 default_rtl_profile ();
3268 node->frequency = real_frequency;
3270 cost = seq_cost (seq, speed);
3271 if (MEM_P (rslt))
3272 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3273 TYPE_ADDR_SPACE (type), speed);
3274 else if (!REG_P (rslt))
3275 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3277 return cost;
3280 /* Returns variable containing the value of candidate CAND at statement AT. */
3282 static tree
3283 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3285 if (stmt_after_increment (loop, cand, stmt))
3286 return cand->var_after;
3287 else
3288 return cand->var_before;
3291 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3292 same precision that is at least as wide as the precision of TYPE, stores
3293 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3294 type of A and B. */
3296 static tree
3297 determine_common_wider_type (tree *a, tree *b)
3299 tree wider_type = NULL;
3300 tree suba, subb;
3301 tree atype = TREE_TYPE (*a);
3303 if (CONVERT_EXPR_P (*a))
3305 suba = TREE_OPERAND (*a, 0);
3306 wider_type = TREE_TYPE (suba);
3307 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3308 return atype;
3310 else
3311 return atype;
3313 if (CONVERT_EXPR_P (*b))
3315 subb = TREE_OPERAND (*b, 0);
3316 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3317 return atype;
3319 else
3320 return atype;
3322 *a = suba;
3323 *b = subb;
3324 return wider_type;
3327 /* Determines the expression by that USE is expressed from induction variable
3328 CAND at statement AT in LOOP. The expression is stored in a decomposed
3329 form into AFF. Returns false if USE cannot be expressed using CAND. */
3331 static bool
3332 get_computation_aff (struct loop *loop,
3333 struct iv_use *use, struct iv_cand *cand, gimple at,
3334 struct aff_tree *aff)
3336 tree ubase = use->iv->base;
3337 tree ustep = use->iv->step;
3338 tree cbase = cand->iv->base;
3339 tree cstep = cand->iv->step, cstep_common;
3340 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3341 tree common_type, var;
3342 tree uutype;
3343 aff_tree cbase_aff, var_aff;
3344 widest_int rat;
3346 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3348 /* We do not have a precision to express the values of use. */
3349 return false;
3352 var = var_at_stmt (loop, cand, at);
3353 uutype = unsigned_type_for (utype);
3355 /* If the conversion is not noop, perform it. */
3356 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3358 cstep = fold_convert (uutype, cstep);
3359 cbase = fold_convert (uutype, cbase);
3360 var = fold_convert (uutype, var);
3363 if (!constant_multiple_of (ustep, cstep, &rat))
3364 return false;
3366 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3367 type, we achieve better folding by computing their difference in this
3368 wider type, and cast the result to UUTYPE. We do not need to worry about
3369 overflows, as all the arithmetics will in the end be performed in UUTYPE
3370 anyway. */
3371 common_type = determine_common_wider_type (&ubase, &cbase);
3373 /* use = ubase - ratio * cbase + ratio * var. */
3374 tree_to_aff_combination (ubase, common_type, aff);
3375 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3376 tree_to_aff_combination (var, uutype, &var_aff);
3378 /* We need to shift the value if we are after the increment. */
3379 if (stmt_after_increment (loop, cand, at))
3381 aff_tree cstep_aff;
3383 if (common_type != uutype)
3384 cstep_common = fold_convert (common_type, cstep);
3385 else
3386 cstep_common = cstep;
3388 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3389 aff_combination_add (&cbase_aff, &cstep_aff);
3392 aff_combination_scale (&cbase_aff, -rat);
3393 aff_combination_add (aff, &cbase_aff);
3394 if (common_type != uutype)
3395 aff_combination_convert (aff, uutype);
3397 aff_combination_scale (&var_aff, rat);
3398 aff_combination_add (aff, &var_aff);
3400 return true;
3403 /* Return the type of USE. */
3405 static tree
3406 get_use_type (struct iv_use *use)
3408 tree base_type = TREE_TYPE (use->iv->base);
3409 tree type;
3411 if (use->type == USE_ADDRESS)
3413 /* The base_type may be a void pointer. Create a pointer type based on
3414 the mem_ref instead. */
3415 type = build_pointer_type (TREE_TYPE (*use->op_p));
3416 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3417 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3419 else
3420 type = base_type;
3422 return type;
3425 /* Determines the expression by that USE is expressed from induction variable
3426 CAND at statement AT in LOOP. The computation is unshared. */
3428 static tree
3429 get_computation_at (struct loop *loop,
3430 struct iv_use *use, struct iv_cand *cand, gimple at)
3432 aff_tree aff;
3433 tree type = get_use_type (use);
3435 if (!get_computation_aff (loop, use, cand, at, &aff))
3436 return NULL_TREE;
3437 unshare_aff_combination (&aff);
3438 return fold_convert (type, aff_combination_to_tree (&aff));
3441 /* Determines the expression by that USE is expressed from induction variable
3442 CAND in LOOP. The computation is unshared. */
3444 static tree
3445 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3447 return get_computation_at (loop, use, cand, use->stmt);
3450 /* Adjust the cost COST for being in loop setup rather than loop body.
3451 If we're optimizing for space, the loop setup overhead is constant;
3452 if we're optimizing for speed, amortize it over the per-iteration cost. */
3453 static unsigned
3454 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3456 if (cost == INFTY)
3457 return cost;
3458 else if (optimize_loop_for_speed_p (data->current_loop))
3459 return cost / avg_loop_niter (data->current_loop);
3460 else
3461 return cost;
3464 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3465 validity for a memory reference accessing memory of mode MODE in
3466 address space AS. */
3469 bool
3470 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3471 addr_space_t as)
3473 #define MAX_RATIO 128
3474 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3475 static vec<sbitmap> valid_mult_list;
3476 sbitmap valid_mult;
3478 if (data_index >= valid_mult_list.length ())
3479 valid_mult_list.safe_grow_cleared (data_index + 1);
3481 valid_mult = valid_mult_list[data_index];
3482 if (!valid_mult)
3484 machine_mode address_mode = targetm.addr_space.address_mode (as);
3485 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3486 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3487 rtx addr, scaled;
3488 HOST_WIDE_INT i;
3490 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3491 bitmap_clear (valid_mult);
3492 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3493 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3494 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3496 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3497 if (memory_address_addr_space_p (mode, addr, as)
3498 || memory_address_addr_space_p (mode, scaled, as))
3499 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3502 if (dump_file && (dump_flags & TDF_DETAILS))
3504 fprintf (dump_file, " allowed multipliers:");
3505 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3506 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3507 fprintf (dump_file, " %d", (int) i);
3508 fprintf (dump_file, "\n");
3509 fprintf (dump_file, "\n");
3512 valid_mult_list[data_index] = valid_mult;
3515 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3516 return false;
3518 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3521 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3522 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3523 variable is omitted. Compute the cost for a memory reference that accesses
3524 a memory location of mode MEM_MODE in address space AS.
3526 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3527 size of MEM_MODE / RATIO) is available. To make this determination, we
3528 look at the size of the increment to be made, which is given in CSTEP.
3529 CSTEP may be zero if the step is unknown.
3530 STMT_AFTER_INC is true iff the statement we're looking at is after the
3531 increment of the original biv.
3533 TODO -- there must be some better way. This all is quite crude. */
3535 enum ainc_type
3537 AINC_PRE_INC, /* Pre increment. */
3538 AINC_PRE_DEC, /* Pre decrement. */
3539 AINC_POST_INC, /* Post increment. */
3540 AINC_POST_DEC, /* Post decrement. */
3541 AINC_NONE /* Also the number of auto increment types. */
3544 typedef struct address_cost_data_s
3546 HOST_WIDE_INT min_offset, max_offset;
3547 unsigned costs[2][2][2][2];
3548 unsigned ainc_costs[AINC_NONE];
3549 } *address_cost_data;
3552 static comp_cost
3553 get_address_cost (bool symbol_present, bool var_present,
3554 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3555 HOST_WIDE_INT cstep, machine_mode mem_mode,
3556 addr_space_t as, bool speed,
3557 bool stmt_after_inc, bool *may_autoinc)
3559 machine_mode address_mode = targetm.addr_space.address_mode (as);
3560 static vec<address_cost_data> address_cost_data_list;
3561 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3562 address_cost_data data;
3563 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3564 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3565 unsigned cost, acost, complexity;
3566 enum ainc_type autoinc_type;
3567 bool offset_p, ratio_p, autoinc;
3568 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3569 unsigned HOST_WIDE_INT mask;
3570 unsigned bits;
3572 if (data_index >= address_cost_data_list.length ())
3573 address_cost_data_list.safe_grow_cleared (data_index + 1);
3575 data = address_cost_data_list[data_index];
3576 if (!data)
3578 HOST_WIDE_INT i;
3579 HOST_WIDE_INT rat, off = 0;
3580 int old_cse_not_expected, width;
3581 unsigned sym_p, var_p, off_p, rat_p, add_c;
3582 rtx_insn *seq;
3583 rtx addr, base;
3584 rtx reg0, reg1;
3586 data = (address_cost_data) xcalloc (1, sizeof (*data));
3588 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3590 width = GET_MODE_BITSIZE (address_mode) - 1;
3591 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3592 width = HOST_BITS_PER_WIDE_INT - 1;
3593 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3595 for (i = width; i >= 0; i--)
3597 off = -((unsigned HOST_WIDE_INT) 1 << i);
3598 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3599 if (memory_address_addr_space_p (mem_mode, addr, as))
3600 break;
3602 data->min_offset = (i == -1? 0 : off);
3604 for (i = width; i >= 0; i--)
3606 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3607 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3608 if (memory_address_addr_space_p (mem_mode, addr, as))
3609 break;
3610 /* For some strict-alignment targets, the offset must be naturally
3611 aligned. Try an aligned offset if mem_mode is not QImode. */
3612 off = mem_mode != QImode
3613 ? ((unsigned HOST_WIDE_INT) 1 << i)
3614 - GET_MODE_SIZE (mem_mode)
3615 : 0;
3616 if (off > 0)
3618 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3619 if (memory_address_addr_space_p (mem_mode, addr, as))
3620 break;
3623 if (i == -1)
3624 off = 0;
3625 data->max_offset = off;
3627 if (dump_file && (dump_flags & TDF_DETAILS))
3629 fprintf (dump_file, "get_address_cost:\n");
3630 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3631 GET_MODE_NAME (mem_mode),
3632 data->min_offset);
3633 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634 GET_MODE_NAME (mem_mode),
3635 data->max_offset);
3638 rat = 1;
3639 for (i = 2; i <= MAX_RATIO; i++)
3640 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3642 rat = i;
3643 break;
3646 /* Compute the cost of various addressing modes. */
3647 acost = 0;
3648 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3649 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3651 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3652 || USE_STORE_PRE_DECREMENT (mem_mode))
3654 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3655 has_predec[mem_mode]
3656 = memory_address_addr_space_p (mem_mode, addr, as);
3658 if (has_predec[mem_mode])
3659 data->ainc_costs[AINC_PRE_DEC]
3660 = address_cost (addr, mem_mode, as, speed);
3662 if (USE_LOAD_POST_DECREMENT (mem_mode)
3663 || USE_STORE_POST_DECREMENT (mem_mode))
3665 addr = gen_rtx_POST_DEC (address_mode, reg0);
3666 has_postdec[mem_mode]
3667 = memory_address_addr_space_p (mem_mode, addr, as);
3669 if (has_postdec[mem_mode])
3670 data->ainc_costs[AINC_POST_DEC]
3671 = address_cost (addr, mem_mode, as, speed);
3673 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3674 || USE_STORE_PRE_DECREMENT (mem_mode))
3676 addr = gen_rtx_PRE_INC (address_mode, reg0);
3677 has_preinc[mem_mode]
3678 = memory_address_addr_space_p (mem_mode, addr, as);
3680 if (has_preinc[mem_mode])
3681 data->ainc_costs[AINC_PRE_INC]
3682 = address_cost (addr, mem_mode, as, speed);
3684 if (USE_LOAD_POST_INCREMENT (mem_mode)
3685 || USE_STORE_POST_INCREMENT (mem_mode))
3687 addr = gen_rtx_POST_INC (address_mode, reg0);
3688 has_postinc[mem_mode]
3689 = memory_address_addr_space_p (mem_mode, addr, as);
3691 if (has_postinc[mem_mode])
3692 data->ainc_costs[AINC_POST_INC]
3693 = address_cost (addr, mem_mode, as, speed);
3695 for (i = 0; i < 16; i++)
3697 sym_p = i & 1;
3698 var_p = (i >> 1) & 1;
3699 off_p = (i >> 2) & 1;
3700 rat_p = (i >> 3) & 1;
3702 addr = reg0;
3703 if (rat_p)
3704 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3705 gen_int_mode (rat, address_mode));
3707 if (var_p)
3708 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3710 if (sym_p)
3712 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3713 /* ??? We can run into trouble with some backends by presenting
3714 it with symbols which haven't been properly passed through
3715 targetm.encode_section_info. By setting the local bit, we
3716 enhance the probability of things working. */
3717 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3719 if (off_p)
3720 base = gen_rtx_fmt_e (CONST, address_mode,
3721 gen_rtx_fmt_ee
3722 (PLUS, address_mode, base,
3723 gen_int_mode (off, address_mode)));
3725 else if (off_p)
3726 base = gen_int_mode (off, address_mode);
3727 else
3728 base = NULL_RTX;
3730 if (base)
3731 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3733 start_sequence ();
3734 /* To avoid splitting addressing modes, pretend that no cse will
3735 follow. */
3736 old_cse_not_expected = cse_not_expected;
3737 cse_not_expected = true;
3738 addr = memory_address_addr_space (mem_mode, addr, as);
3739 cse_not_expected = old_cse_not_expected;
3740 seq = get_insns ();
3741 end_sequence ();
3743 acost = seq_cost (seq, speed);
3744 acost += address_cost (addr, mem_mode, as, speed);
3746 if (!acost)
3747 acost = 1;
3748 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3751 /* On some targets, it is quite expensive to load symbol to a register,
3752 which makes addresses that contain symbols look much more expensive.
3753 However, the symbol will have to be loaded in any case before the
3754 loop (and quite likely we have it in register already), so it does not
3755 make much sense to penalize them too heavily. So make some final
3756 tweaks for the SYMBOL_PRESENT modes:
3758 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3759 var is cheaper, use this mode with small penalty.
3760 If VAR_PRESENT is true, try whether the mode with
3761 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3762 if this is the case, use it. */
3763 add_c = add_cost (speed, address_mode);
3764 for (i = 0; i < 8; i++)
3766 var_p = i & 1;
3767 off_p = (i >> 1) & 1;
3768 rat_p = (i >> 2) & 1;
3770 acost = data->costs[0][1][off_p][rat_p] + 1;
3771 if (var_p)
3772 acost += add_c;
3774 if (acost < data->costs[1][var_p][off_p][rat_p])
3775 data->costs[1][var_p][off_p][rat_p] = acost;
3778 if (dump_file && (dump_flags & TDF_DETAILS))
3780 fprintf (dump_file, "Address costs:\n");
3782 for (i = 0; i < 16; i++)
3784 sym_p = i & 1;
3785 var_p = (i >> 1) & 1;
3786 off_p = (i >> 2) & 1;
3787 rat_p = (i >> 3) & 1;
3789 fprintf (dump_file, " ");
3790 if (sym_p)
3791 fprintf (dump_file, "sym + ");
3792 if (var_p)
3793 fprintf (dump_file, "var + ");
3794 if (off_p)
3795 fprintf (dump_file, "cst + ");
3796 if (rat_p)
3797 fprintf (dump_file, "rat * ");
3799 acost = data->costs[sym_p][var_p][off_p][rat_p];
3800 fprintf (dump_file, "index costs %d\n", acost);
3802 if (has_predec[mem_mode] || has_postdec[mem_mode]
3803 || has_preinc[mem_mode] || has_postinc[mem_mode])
3804 fprintf (dump_file, " May include autoinc/dec\n");
3805 fprintf (dump_file, "\n");
3808 address_cost_data_list[data_index] = data;
3811 bits = GET_MODE_BITSIZE (address_mode);
3812 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3813 offset &= mask;
3814 if ((offset >> (bits - 1) & 1))
3815 offset |= ~mask;
3816 s_offset = offset;
3818 autoinc = false;
3819 autoinc_type = AINC_NONE;
3820 msize = GET_MODE_SIZE (mem_mode);
3821 autoinc_offset = offset;
3822 if (stmt_after_inc)
3823 autoinc_offset += ratio * cstep;
3824 if (symbol_present || var_present || ratio != 1)
3825 autoinc = false;
3826 else
3828 if (has_postinc[mem_mode] && autoinc_offset == 0
3829 && msize == cstep)
3830 autoinc_type = AINC_POST_INC;
3831 else if (has_postdec[mem_mode] && autoinc_offset == 0
3832 && msize == -cstep)
3833 autoinc_type = AINC_POST_DEC;
3834 else if (has_preinc[mem_mode] && autoinc_offset == msize
3835 && msize == cstep)
3836 autoinc_type = AINC_PRE_INC;
3837 else if (has_predec[mem_mode] && autoinc_offset == -msize
3838 && msize == -cstep)
3839 autoinc_type = AINC_PRE_DEC;
3841 if (autoinc_type != AINC_NONE)
3842 autoinc = true;
3845 cost = 0;
3846 offset_p = (s_offset != 0
3847 && data->min_offset <= s_offset
3848 && s_offset <= data->max_offset);
3849 ratio_p = (ratio != 1
3850 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3852 if (ratio != 1 && !ratio_p)
3853 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3855 if (s_offset && !offset_p && !symbol_present)
3856 cost += add_cost (speed, address_mode);
3858 if (may_autoinc)
3859 *may_autoinc = autoinc;
3860 if (autoinc)
3861 acost = data->ainc_costs[autoinc_type];
3862 else
3863 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3864 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3865 return new_cost (cost + acost, complexity);
3868 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3869 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3870 calculating the operands of EXPR. Returns true if successful, and returns
3871 the cost in COST. */
3873 static bool
3874 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3875 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3877 comp_cost res;
3878 tree op1 = TREE_OPERAND (expr, 1);
3879 tree cst = TREE_OPERAND (mult, 1);
3880 tree multop = TREE_OPERAND (mult, 0);
3881 int m = exact_log2 (int_cst_value (cst));
3882 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3883 int as_cost, sa_cost;
3884 bool mult_in_op1;
3886 if (!(m >= 0 && m < maxm))
3887 return false;
3889 mult_in_op1 = operand_equal_p (op1, mult, 0);
3891 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3893 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3894 use that in preference to a shift insn followed by an add insn. */
3895 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3896 ? shiftadd_cost (speed, mode, m)
3897 : (mult_in_op1
3898 ? shiftsub1_cost (speed, mode, m)
3899 : shiftsub0_cost (speed, mode, m)));
3901 res = new_cost (MIN (as_cost, sa_cost), 0);
3902 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3904 STRIP_NOPS (multop);
3905 if (!is_gimple_val (multop))
3906 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3908 *cost = res;
3909 return true;
3912 /* Estimates cost of forcing expression EXPR into a variable. */
3914 static comp_cost
3915 force_expr_to_var_cost (tree expr, bool speed)
3917 static bool costs_initialized = false;
3918 static unsigned integer_cost [2];
3919 static unsigned symbol_cost [2];
3920 static unsigned address_cost [2];
3921 tree op0, op1;
3922 comp_cost cost0, cost1, cost;
3923 machine_mode mode;
3925 if (!costs_initialized)
3927 tree type = build_pointer_type (integer_type_node);
3928 tree var, addr;
3929 rtx x;
3930 int i;
3932 var = create_tmp_var_raw (integer_type_node, "test_var");
3933 TREE_STATIC (var) = 1;
3934 x = produce_memory_decl_rtl (var, NULL);
3935 SET_DECL_RTL (var, x);
3937 addr = build1 (ADDR_EXPR, type, var);
3940 for (i = 0; i < 2; i++)
3942 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3943 2000), i);
3945 symbol_cost[i] = computation_cost (addr, i) + 1;
3947 address_cost[i]
3948 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3949 if (dump_file && (dump_flags & TDF_DETAILS))
3951 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3952 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3953 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3954 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3955 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3956 fprintf (dump_file, "\n");
3960 costs_initialized = true;
3963 STRIP_NOPS (expr);
3965 if (SSA_VAR_P (expr))
3966 return no_cost;
3968 if (is_gimple_min_invariant (expr))
3970 if (TREE_CODE (expr) == INTEGER_CST)
3971 return new_cost (integer_cost [speed], 0);
3973 if (TREE_CODE (expr) == ADDR_EXPR)
3975 tree obj = TREE_OPERAND (expr, 0);
3977 if (TREE_CODE (obj) == VAR_DECL
3978 || TREE_CODE (obj) == PARM_DECL
3979 || TREE_CODE (obj) == RESULT_DECL)
3980 return new_cost (symbol_cost [speed], 0);
3983 return new_cost (address_cost [speed], 0);
3986 switch (TREE_CODE (expr))
3988 case POINTER_PLUS_EXPR:
3989 case PLUS_EXPR:
3990 case MINUS_EXPR:
3991 case MULT_EXPR:
3992 op0 = TREE_OPERAND (expr, 0);
3993 op1 = TREE_OPERAND (expr, 1);
3994 STRIP_NOPS (op0);
3995 STRIP_NOPS (op1);
3996 break;
3998 CASE_CONVERT:
3999 case NEGATE_EXPR:
4000 op0 = TREE_OPERAND (expr, 0);
4001 STRIP_NOPS (op0);
4002 op1 = NULL_TREE;
4003 break;
4005 default:
4006 /* Just an arbitrary value, FIXME. */
4007 return new_cost (target_spill_cost[speed], 0);
4010 if (op0 == NULL_TREE
4011 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4012 cost0 = no_cost;
4013 else
4014 cost0 = force_expr_to_var_cost (op0, speed);
4016 if (op1 == NULL_TREE
4017 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4018 cost1 = no_cost;
4019 else
4020 cost1 = force_expr_to_var_cost (op1, speed);
4022 mode = TYPE_MODE (TREE_TYPE (expr));
4023 switch (TREE_CODE (expr))
4025 case POINTER_PLUS_EXPR:
4026 case PLUS_EXPR:
4027 case MINUS_EXPR:
4028 case NEGATE_EXPR:
4029 cost = new_cost (add_cost (speed, mode), 0);
4030 if (TREE_CODE (expr) != NEGATE_EXPR)
4032 tree mult = NULL_TREE;
4033 comp_cost sa_cost;
4034 if (TREE_CODE (op1) == MULT_EXPR)
4035 mult = op1;
4036 else if (TREE_CODE (op0) == MULT_EXPR)
4037 mult = op0;
4039 if (mult != NULL_TREE
4040 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4041 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4042 speed, &sa_cost))
4043 return sa_cost;
4045 break;
4047 CASE_CONVERT:
4049 tree inner_mode, outer_mode;
4050 outer_mode = TREE_TYPE (expr);
4051 inner_mode = TREE_TYPE (op0);
4052 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4053 TYPE_MODE (inner_mode), speed), 0);
4055 break;
4057 case MULT_EXPR:
4058 if (cst_and_fits_in_hwi (op0))
4059 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4060 mode, speed), 0);
4061 else if (cst_and_fits_in_hwi (op1))
4062 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4063 mode, speed), 0);
4064 else
4065 return new_cost (target_spill_cost [speed], 0);
4066 break;
4068 default:
4069 gcc_unreachable ();
4072 cost = add_costs (cost, cost0);
4073 cost = add_costs (cost, cost1);
4075 /* Bound the cost by target_spill_cost. The parts of complicated
4076 computations often are either loop invariant or at least can
4077 be shared between several iv uses, so letting this grow without
4078 limits would not give reasonable results. */
4079 if (cost.cost > (int) target_spill_cost [speed])
4080 cost.cost = target_spill_cost [speed];
4082 return cost;
4085 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4086 invariants the computation depends on. */
4088 static comp_cost
4089 force_var_cost (struct ivopts_data *data,
4090 tree expr, bitmap *depends_on)
4092 if (depends_on)
4094 fd_ivopts_data = data;
4095 walk_tree (&expr, find_depends, depends_on, NULL);
4098 return force_expr_to_var_cost (expr, data->speed);
4101 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4102 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4103 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4104 invariants the computation depends on. */
4106 static comp_cost
4107 split_address_cost (struct ivopts_data *data,
4108 tree addr, bool *symbol_present, bool *var_present,
4109 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4111 tree core;
4112 HOST_WIDE_INT bitsize;
4113 HOST_WIDE_INT bitpos;
4114 tree toffset;
4115 machine_mode mode;
4116 int unsignedp, volatilep;
4118 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4119 &unsignedp, &volatilep, false);
4121 if (toffset != 0
4122 || bitpos % BITS_PER_UNIT != 0
4123 || TREE_CODE (core) != VAR_DECL)
4125 *symbol_present = false;
4126 *var_present = true;
4127 fd_ivopts_data = data;
4128 walk_tree (&addr, find_depends, depends_on, NULL);
4129 return new_cost (target_spill_cost[data->speed], 0);
4132 *offset += bitpos / BITS_PER_UNIT;
4133 if (TREE_STATIC (core)
4134 || DECL_EXTERNAL (core))
4136 *symbol_present = true;
4137 *var_present = false;
4138 return no_cost;
4141 *symbol_present = false;
4142 *var_present = true;
4143 return no_cost;
4146 /* Estimates cost of expressing difference of addresses E1 - E2 as
4147 var + symbol + offset. The value of offset is added to OFFSET,
4148 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4149 part is missing. DEPENDS_ON is a set of the invariants the computation
4150 depends on. */
4152 static comp_cost
4153 ptr_difference_cost (struct ivopts_data *data,
4154 tree e1, tree e2, bool *symbol_present, bool *var_present,
4155 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4157 HOST_WIDE_INT diff = 0;
4158 aff_tree aff_e1, aff_e2;
4159 tree type;
4161 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4163 if (ptr_difference_const (e1, e2, &diff))
4165 *offset += diff;
4166 *symbol_present = false;
4167 *var_present = false;
4168 return no_cost;
4171 if (integer_zerop (e2))
4172 return split_address_cost (data, TREE_OPERAND (e1, 0),
4173 symbol_present, var_present, offset, depends_on);
4175 *symbol_present = false;
4176 *var_present = true;
4178 type = signed_type_for (TREE_TYPE (e1));
4179 tree_to_aff_combination (e1, type, &aff_e1);
4180 tree_to_aff_combination (e2, type, &aff_e2);
4181 aff_combination_scale (&aff_e2, -1);
4182 aff_combination_add (&aff_e1, &aff_e2);
4184 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4187 /* Estimates cost of expressing difference E1 - E2 as
4188 var + symbol + offset. The value of offset is added to OFFSET,
4189 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4190 part is missing. DEPENDS_ON is a set of the invariants the computation
4191 depends on. */
4193 static comp_cost
4194 difference_cost (struct ivopts_data *data,
4195 tree e1, tree e2, bool *symbol_present, bool *var_present,
4196 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4198 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4199 unsigned HOST_WIDE_INT off1, off2;
4200 aff_tree aff_e1, aff_e2;
4201 tree type;
4203 e1 = strip_offset (e1, &off1);
4204 e2 = strip_offset (e2, &off2);
4205 *offset += off1 - off2;
4207 STRIP_NOPS (e1);
4208 STRIP_NOPS (e2);
4210 if (TREE_CODE (e1) == ADDR_EXPR)
4211 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4212 offset, depends_on);
4213 *symbol_present = false;
4215 if (operand_equal_p (e1, e2, 0))
4217 *var_present = false;
4218 return no_cost;
4221 *var_present = true;
4223 if (integer_zerop (e2))
4224 return force_var_cost (data, e1, depends_on);
4226 if (integer_zerop (e1))
4228 comp_cost cost = force_var_cost (data, e2, depends_on);
4229 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4230 return cost;
4233 type = signed_type_for (TREE_TYPE (e1));
4234 tree_to_aff_combination (e1, type, &aff_e1);
4235 tree_to_aff_combination (e2, type, &aff_e2);
4236 aff_combination_scale (&aff_e2, -1);
4237 aff_combination_add (&aff_e1, &aff_e2);
4239 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4242 /* Returns true if AFF1 and AFF2 are identical. */
4244 static bool
4245 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4247 unsigned i;
4249 if (aff1->n != aff2->n)
4250 return false;
4252 for (i = 0; i < aff1->n; i++)
4254 if (aff1->elts[i].coef != aff2->elts[i].coef)
4255 return false;
4257 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4258 return false;
4260 return true;
4263 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4265 static int
4266 get_expr_id (struct ivopts_data *data, tree expr)
4268 struct iv_inv_expr_ent ent;
4269 struct iv_inv_expr_ent **slot;
4271 ent.expr = expr;
4272 ent.hash = iterative_hash_expr (expr, 0);
4273 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4274 if (*slot)
4275 return (*slot)->id;
4277 *slot = XNEW (struct iv_inv_expr_ent);
4278 (*slot)->expr = expr;
4279 (*slot)->hash = ent.hash;
4280 (*slot)->id = data->inv_expr_id++;
4281 return (*slot)->id;
4284 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4285 requires a new compiler generated temporary. Returns -1 otherwise.
4286 ADDRESS_P is a flag indicating if the expression is for address
4287 computation. */
4289 static int
4290 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4291 tree cbase, HOST_WIDE_INT ratio,
4292 bool address_p)
4294 aff_tree ubase_aff, cbase_aff;
4295 tree expr, ub, cb;
4297 STRIP_NOPS (ubase);
4298 STRIP_NOPS (cbase);
4299 ub = ubase;
4300 cb = cbase;
4302 if ((TREE_CODE (ubase) == INTEGER_CST)
4303 && (TREE_CODE (cbase) == INTEGER_CST))
4304 return -1;
4306 /* Strips the constant part. */
4307 if (TREE_CODE (ubase) == PLUS_EXPR
4308 || TREE_CODE (ubase) == MINUS_EXPR
4309 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4311 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4312 ubase = TREE_OPERAND (ubase, 0);
4315 /* Strips the constant part. */
4316 if (TREE_CODE (cbase) == PLUS_EXPR
4317 || TREE_CODE (cbase) == MINUS_EXPR
4318 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4320 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4321 cbase = TREE_OPERAND (cbase, 0);
4324 if (address_p)
4326 if (((TREE_CODE (ubase) == SSA_NAME)
4327 || (TREE_CODE (ubase) == ADDR_EXPR
4328 && is_gimple_min_invariant (ubase)))
4329 && (TREE_CODE (cbase) == INTEGER_CST))
4330 return -1;
4332 if (((TREE_CODE (cbase) == SSA_NAME)
4333 || (TREE_CODE (cbase) == ADDR_EXPR
4334 && is_gimple_min_invariant (cbase)))
4335 && (TREE_CODE (ubase) == INTEGER_CST))
4336 return -1;
4339 if (ratio == 1)
4341 if (operand_equal_p (ubase, cbase, 0))
4342 return -1;
4344 if (TREE_CODE (ubase) == ADDR_EXPR
4345 && TREE_CODE (cbase) == ADDR_EXPR)
4347 tree usym, csym;
4349 usym = TREE_OPERAND (ubase, 0);
4350 csym = TREE_OPERAND (cbase, 0);
4351 if (TREE_CODE (usym) == ARRAY_REF)
4353 tree ind = TREE_OPERAND (usym, 1);
4354 if (TREE_CODE (ind) == INTEGER_CST
4355 && tree_fits_shwi_p (ind)
4356 && tree_to_shwi (ind) == 0)
4357 usym = TREE_OPERAND (usym, 0);
4359 if (TREE_CODE (csym) == ARRAY_REF)
4361 tree ind = TREE_OPERAND (csym, 1);
4362 if (TREE_CODE (ind) == INTEGER_CST
4363 && tree_fits_shwi_p (ind)
4364 && tree_to_shwi (ind) == 0)
4365 csym = TREE_OPERAND (csym, 0);
4367 if (operand_equal_p (usym, csym, 0))
4368 return -1;
4370 /* Now do more complex comparison */
4371 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4372 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4373 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4374 return -1;
4377 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4378 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4380 aff_combination_scale (&cbase_aff, -1 * ratio);
4381 aff_combination_add (&ubase_aff, &cbase_aff);
4382 expr = aff_combination_to_tree (&ubase_aff);
4383 return get_expr_id (data, expr);
4388 /* Determines the cost of the computation by that USE is expressed
4389 from induction variable CAND. If ADDRESS_P is true, we just need
4390 to create an address from it, otherwise we want to get it into
4391 register. A set of invariants we depend on is stored in
4392 DEPENDS_ON. AT is the statement at that the value is computed.
4393 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4394 addressing is likely. */
4396 static comp_cost
4397 get_computation_cost_at (struct ivopts_data *data,
4398 struct iv_use *use, struct iv_cand *cand,
4399 bool address_p, bitmap *depends_on, gimple at,
4400 bool *can_autoinc,
4401 int *inv_expr_id)
4403 tree ubase = use->iv->base, ustep = use->iv->step;
4404 tree cbase, cstep;
4405 tree utype = TREE_TYPE (ubase), ctype;
4406 unsigned HOST_WIDE_INT cstepi, offset = 0;
4407 HOST_WIDE_INT ratio, aratio;
4408 bool var_present, symbol_present, stmt_is_after_inc;
4409 comp_cost cost;
4410 widest_int rat;
4411 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4412 machine_mode mem_mode = (address_p
4413 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4414 : VOIDmode);
4416 *depends_on = NULL;
4418 /* Only consider real candidates. */
4419 if (!cand->iv)
4420 return infinite_cost;
4422 cbase = cand->iv->base;
4423 cstep = cand->iv->step;
4424 ctype = TREE_TYPE (cbase);
4426 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4428 /* We do not have a precision to express the values of use. */
4429 return infinite_cost;
4432 if (address_p
4433 || (use->iv->base_object
4434 && cand->iv->base_object
4435 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4436 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4438 /* Do not try to express address of an object with computation based
4439 on address of a different object. This may cause problems in rtl
4440 level alias analysis (that does not expect this to be happening,
4441 as this is illegal in C), and would be unlikely to be useful
4442 anyway. */
4443 if (use->iv->base_object
4444 && cand->iv->base_object
4445 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4446 return infinite_cost;
4449 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4451 /* TODO -- add direct handling of this case. */
4452 goto fallback;
4455 /* CSTEPI is removed from the offset in case statement is after the
4456 increment. If the step is not constant, we use zero instead.
4457 This is a bit imprecise (there is the extra addition), but
4458 redundancy elimination is likely to transform the code so that
4459 it uses value of the variable before increment anyway,
4460 so it is not that much unrealistic. */
4461 if (cst_and_fits_in_hwi (cstep))
4462 cstepi = int_cst_value (cstep);
4463 else
4464 cstepi = 0;
4466 if (!constant_multiple_of (ustep, cstep, &rat))
4467 return infinite_cost;
4469 if (wi::fits_shwi_p (rat))
4470 ratio = rat.to_shwi ();
4471 else
4472 return infinite_cost;
4474 STRIP_NOPS (cbase);
4475 ctype = TREE_TYPE (cbase);
4477 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4479 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4480 or ratio == 1, it is better to handle this like
4482 ubase - ratio * cbase + ratio * var
4484 (also holds in the case ratio == -1, TODO. */
4486 if (cst_and_fits_in_hwi (cbase))
4488 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4489 cost = difference_cost (data,
4490 ubase, build_int_cst (utype, 0),
4491 &symbol_present, &var_present, &offset,
4492 depends_on);
4493 cost.cost /= avg_loop_niter (data->current_loop);
4495 else if (ratio == 1)
4497 tree real_cbase = cbase;
4499 /* Check to see if any adjustment is needed. */
4500 if (cstepi == 0 && stmt_is_after_inc)
4502 aff_tree real_cbase_aff;
4503 aff_tree cstep_aff;
4505 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4506 &real_cbase_aff);
4507 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4509 aff_combination_add (&real_cbase_aff, &cstep_aff);
4510 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4513 cost = difference_cost (data,
4514 ubase, real_cbase,
4515 &symbol_present, &var_present, &offset,
4516 depends_on);
4517 cost.cost /= avg_loop_niter (data->current_loop);
4519 else if (address_p
4520 && !POINTER_TYPE_P (ctype)
4521 && multiplier_allowed_in_address_p
4522 (ratio, mem_mode,
4523 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4525 cbase
4526 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4527 cost = difference_cost (data,
4528 ubase, cbase,
4529 &symbol_present, &var_present, &offset,
4530 depends_on);
4531 cost.cost /= avg_loop_niter (data->current_loop);
4533 else
4535 cost = force_var_cost (data, cbase, depends_on);
4536 cost = add_costs (cost,
4537 difference_cost (data,
4538 ubase, build_int_cst (utype, 0),
4539 &symbol_present, &var_present,
4540 &offset, depends_on));
4541 cost.cost /= avg_loop_niter (data->current_loop);
4542 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4545 /* Set of invariants depended on by sub use has already been computed
4546 for the first use in the group. */
4547 if (use->sub_id)
4549 cost.cost = 0;
4550 if (depends_on && *depends_on)
4551 bitmap_clear (*depends_on);
4553 else if (inv_expr_id)
4555 *inv_expr_id =
4556 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4557 /* Clear depends on. */
4558 if (*inv_expr_id != -1 && depends_on && *depends_on)
4559 bitmap_clear (*depends_on);
4562 /* If we are after the increment, the value of the candidate is higher by
4563 one iteration. */
4564 if (stmt_is_after_inc)
4565 offset -= ratio * cstepi;
4567 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4568 (symbol/var1/const parts may be omitted). If we are looking for an
4569 address, find the cost of addressing this. */
4570 if (address_p)
4571 return add_costs (cost,
4572 get_address_cost (symbol_present, var_present,
4573 offset, ratio, cstepi,
4574 mem_mode,
4575 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4576 speed, stmt_is_after_inc,
4577 can_autoinc));
4579 /* Otherwise estimate the costs for computing the expression. */
4580 if (!symbol_present && !var_present && !offset)
4582 if (ratio != 1)
4583 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4584 return cost;
4587 /* Symbol + offset should be compile-time computable so consider that they
4588 are added once to the variable, if present. */
4589 if (var_present && (symbol_present || offset))
4590 cost.cost += adjust_setup_cost (data,
4591 add_cost (speed, TYPE_MODE (ctype)));
4593 /* Having offset does not affect runtime cost in case it is added to
4594 symbol, but it increases complexity. */
4595 if (offset)
4596 cost.complexity++;
4598 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4600 aratio = ratio > 0 ? ratio : -ratio;
4601 if (aratio != 1)
4602 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4603 return cost;
4605 fallback:
4606 if (can_autoinc)
4607 *can_autoinc = false;
4610 /* Just get the expression, expand it and measure the cost. */
4611 tree comp = get_computation_at (data->current_loop, use, cand, at);
4613 if (!comp)
4614 return infinite_cost;
4616 if (address_p)
4617 comp = build_simple_mem_ref (comp);
4619 return new_cost (computation_cost (comp, speed), 0);
4623 /* Determines the cost of the computation by that USE is expressed
4624 from induction variable CAND. If ADDRESS_P is true, we just need
4625 to create an address from it, otherwise we want to get it into
4626 register. A set of invariants we depend on is stored in
4627 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4628 autoinc addressing is likely. */
4630 static comp_cost
4631 get_computation_cost (struct ivopts_data *data,
4632 struct iv_use *use, struct iv_cand *cand,
4633 bool address_p, bitmap *depends_on,
4634 bool *can_autoinc, int *inv_expr_id)
4636 return get_computation_cost_at (data,
4637 use, cand, address_p, depends_on, use->stmt,
4638 can_autoinc, inv_expr_id);
4641 /* Determines cost of basing replacement of USE on CAND in a generic
4642 expression. */
4644 static bool
4645 determine_use_iv_cost_generic (struct ivopts_data *data,
4646 struct iv_use *use, struct iv_cand *cand)
4648 bitmap depends_on;
4649 comp_cost cost;
4650 int inv_expr_id = -1;
4652 /* The simple case first -- if we need to express value of the preserved
4653 original biv, the cost is 0. This also prevents us from counting the
4654 cost of increment twice -- once at this use and once in the cost of
4655 the candidate. */
4656 if (cand->pos == IP_ORIGINAL
4657 && cand->incremented_at == use->stmt)
4659 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4660 ERROR_MARK, -1);
4661 return true;
4664 cost = get_computation_cost (data, use, cand, false, &depends_on,
4665 NULL, &inv_expr_id);
4667 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4668 inv_expr_id);
4670 return !infinite_cost_p (cost);
4673 /* Determines cost of basing replacement of USE on CAND in an address. */
4675 static bool
4676 determine_use_iv_cost_address (struct ivopts_data *data,
4677 struct iv_use *use, struct iv_cand *cand)
4679 bitmap depends_on;
4680 bool can_autoinc;
4681 int inv_expr_id = -1;
4682 struct iv_use *sub_use;
4683 comp_cost sub_cost;
4684 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4685 &can_autoinc, &inv_expr_id);
4687 if (cand->ainc_use == use)
4689 if (can_autoinc)
4690 cost.cost -= cand->cost_step;
4691 /* If we generated the candidate solely for exploiting autoincrement
4692 opportunities, and it turns out it can't be used, set the cost to
4693 infinity to make sure we ignore it. */
4694 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4695 cost = infinite_cost;
4697 for (sub_use = use->next;
4698 sub_use && !infinite_cost_p (cost);
4699 sub_use = sub_use->next)
4701 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4702 &can_autoinc, &inv_expr_id);
4703 cost = add_costs (cost, sub_cost);
4706 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4707 inv_expr_id);
4709 return !infinite_cost_p (cost);
4712 /* Computes value of candidate CAND at position AT in iteration NITER, and
4713 stores it to VAL. */
4715 static void
4716 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4717 aff_tree *val)
4719 aff_tree step, delta, nit;
4720 struct iv *iv = cand->iv;
4721 tree type = TREE_TYPE (iv->base);
4722 tree steptype = type;
4723 if (POINTER_TYPE_P (type))
4724 steptype = sizetype;
4725 steptype = unsigned_type_for (type);
4727 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4728 aff_combination_convert (&step, steptype);
4729 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4730 aff_combination_convert (&nit, steptype);
4731 aff_combination_mult (&nit, &step, &delta);
4732 if (stmt_after_increment (loop, cand, at))
4733 aff_combination_add (&delta, &step);
4735 tree_to_aff_combination (iv->base, type, val);
4736 if (!POINTER_TYPE_P (type))
4737 aff_combination_convert (val, steptype);
4738 aff_combination_add (val, &delta);
4741 /* Returns period of induction variable iv. */
4743 static tree
4744 iv_period (struct iv *iv)
4746 tree step = iv->step, period, type;
4747 tree pow2div;
4749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4751 type = unsigned_type_for (TREE_TYPE (step));
4752 /* Period of the iv is lcm (step, type_range)/step -1,
4753 i.e., N*type_range/step - 1. Since type range is power
4754 of two, N == (step >> num_of_ending_zeros_binary (step),
4755 so the final result is
4757 (type_range >> num_of_ending_zeros_binary (step)) - 1
4760 pow2div = num_ending_zeros (step);
4762 period = build_low_bits_mask (type,
4763 (TYPE_PRECISION (type)
4764 - tree_to_uhwi (pow2div)));
4766 return period;
4769 /* Returns the comparison operator used when eliminating the iv USE. */
4771 static enum tree_code
4772 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4774 struct loop *loop = data->current_loop;
4775 basic_block ex_bb;
4776 edge exit;
4778 ex_bb = gimple_bb (use->stmt);
4779 exit = EDGE_SUCC (ex_bb, 0);
4780 if (flow_bb_inside_loop_p (loop, exit->dest))
4781 exit = EDGE_SUCC (ex_bb, 1);
4783 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4786 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4787 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4788 calculation is performed in non-wrapping type.
4790 TODO: More generally, we could test for the situation that
4791 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4792 This would require knowing the sign of OFFSET. */
4794 static bool
4795 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4797 enum tree_code code;
4798 tree e1, e2;
4799 aff_tree aff_e1, aff_e2, aff_offset;
4801 if (!nowrap_type_p (TREE_TYPE (base)))
4802 return false;
4804 base = expand_simple_operations (base);
4806 if (TREE_CODE (base) == SSA_NAME)
4808 gimple stmt = SSA_NAME_DEF_STMT (base);
4810 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4811 return false;
4813 code = gimple_assign_rhs_code (stmt);
4814 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4815 return false;
4817 e1 = gimple_assign_rhs1 (stmt);
4818 e2 = gimple_assign_rhs2 (stmt);
4820 else
4822 code = TREE_CODE (base);
4823 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4824 return false;
4825 e1 = TREE_OPERAND (base, 0);
4826 e2 = TREE_OPERAND (base, 1);
4829 /* Use affine expansion as deeper inspection to prove the equality. */
4830 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4831 &aff_e2, &data->name_expansion_cache);
4832 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4833 &aff_offset, &data->name_expansion_cache);
4834 aff_combination_scale (&aff_offset, -1);
4835 switch (code)
4837 case PLUS_EXPR:
4838 aff_combination_add (&aff_e2, &aff_offset);
4839 if (aff_combination_zero_p (&aff_e2))
4840 return true;
4842 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4843 &aff_e1, &data->name_expansion_cache);
4844 aff_combination_add (&aff_e1, &aff_offset);
4845 return aff_combination_zero_p (&aff_e1);
4847 case POINTER_PLUS_EXPR:
4848 aff_combination_add (&aff_e2, &aff_offset);
4849 return aff_combination_zero_p (&aff_e2);
4851 default:
4852 return false;
4856 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4857 comparison with CAND. NITER describes the number of iterations of
4858 the loops. If successful, the comparison in COMP_P is altered accordingly.
4860 We aim to handle the following situation:
4862 sometype *base, *p;
4863 int a, b, i;
4865 i = a;
4866 p = p_0 = base + a;
4870 bla (*p);
4871 p++;
4872 i++;
4874 while (i < b);
4876 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4877 We aim to optimize this to
4879 p = p_0 = base + a;
4882 bla (*p);
4883 p++;
4885 while (p < p_0 - a + b);
4887 This preserves the correctness, since the pointer arithmetics does not
4888 overflow. More precisely:
4890 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4891 overflow in computing it or the values of p.
4892 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4893 overflow. To prove this, we use the fact that p_0 = base + a. */
4895 static bool
4896 iv_elimination_compare_lt (struct ivopts_data *data,
4897 struct iv_cand *cand, enum tree_code *comp_p,
4898 struct tree_niter_desc *niter)
4900 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4901 struct aff_tree nit, tmpa, tmpb;
4902 enum tree_code comp;
4903 HOST_WIDE_INT step;
4905 /* We need to know that the candidate induction variable does not overflow.
4906 While more complex analysis may be used to prove this, for now just
4907 check that the variable appears in the original program and that it
4908 is computed in a type that guarantees no overflows. */
4909 cand_type = TREE_TYPE (cand->iv->base);
4910 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4911 return false;
4913 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4914 the calculation of the BOUND could overflow, making the comparison
4915 invalid. */
4916 if (!data->loop_single_exit_p)
4917 return false;
4919 /* We need to be able to decide whether candidate is increasing or decreasing
4920 in order to choose the right comparison operator. */
4921 if (!cst_and_fits_in_hwi (cand->iv->step))
4922 return false;
4923 step = int_cst_value (cand->iv->step);
4925 /* Check that the number of iterations matches the expected pattern:
4926 a + 1 > b ? 0 : b - a - 1. */
4927 mbz = niter->may_be_zero;
4928 if (TREE_CODE (mbz) == GT_EXPR)
4930 /* Handle a + 1 > b. */
4931 tree op0 = TREE_OPERAND (mbz, 0);
4932 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4934 a = TREE_OPERAND (op0, 0);
4935 b = TREE_OPERAND (mbz, 1);
4937 else
4938 return false;
4940 else if (TREE_CODE (mbz) == LT_EXPR)
4942 tree op1 = TREE_OPERAND (mbz, 1);
4944 /* Handle b < a + 1. */
4945 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4947 a = TREE_OPERAND (op1, 0);
4948 b = TREE_OPERAND (mbz, 0);
4950 else
4951 return false;
4953 else
4954 return false;
4956 /* Expected number of iterations is B - A - 1. Check that it matches
4957 the actual number, i.e., that B - A - NITER = 1. */
4958 tree_to_aff_combination (niter->niter, nit_type, &nit);
4959 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4960 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4961 aff_combination_scale (&nit, -1);
4962 aff_combination_scale (&tmpa, -1);
4963 aff_combination_add (&tmpb, &tmpa);
4964 aff_combination_add (&tmpb, &nit);
4965 if (tmpb.n != 0 || tmpb.offset != 1)
4966 return false;
4968 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4969 overflow. */
4970 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4971 cand->iv->step,
4972 fold_convert (TREE_TYPE (cand->iv->step), a));
4973 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4974 return false;
4976 /* Determine the new comparison operator. */
4977 comp = step < 0 ? GT_EXPR : LT_EXPR;
4978 if (*comp_p == NE_EXPR)
4979 *comp_p = comp;
4980 else if (*comp_p == EQ_EXPR)
4981 *comp_p = invert_tree_comparison (comp, false);
4982 else
4983 gcc_unreachable ();
4985 return true;
4988 /* Check whether it is possible to express the condition in USE by comparison
4989 of candidate CAND. If so, store the value compared with to BOUND, and the
4990 comparison operator to COMP. */
4992 static bool
4993 may_eliminate_iv (struct ivopts_data *data,
4994 struct iv_use *use, struct iv_cand *cand, tree *bound,
4995 enum tree_code *comp)
4997 basic_block ex_bb;
4998 edge exit;
4999 tree period;
5000 struct loop *loop = data->current_loop;
5001 aff_tree bnd;
5002 struct tree_niter_desc *desc = NULL;
5004 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5005 return false;
5007 /* For now works only for exits that dominate the loop latch.
5008 TODO: extend to other conditions inside loop body. */
5009 ex_bb = gimple_bb (use->stmt);
5010 if (use->stmt != last_stmt (ex_bb)
5011 || gimple_code (use->stmt) != GIMPLE_COND
5012 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5013 return false;
5015 exit = EDGE_SUCC (ex_bb, 0);
5016 if (flow_bb_inside_loop_p (loop, exit->dest))
5017 exit = EDGE_SUCC (ex_bb, 1);
5018 if (flow_bb_inside_loop_p (loop, exit->dest))
5019 return false;
5021 desc = niter_for_exit (data, exit);
5022 if (!desc)
5023 return false;
5025 /* Determine whether we can use the variable to test the exit condition.
5026 This is the case iff the period of the induction variable is greater
5027 than the number of iterations for which the exit condition is true. */
5028 period = iv_period (cand->iv);
5030 /* If the number of iterations is constant, compare against it directly. */
5031 if (TREE_CODE (desc->niter) == INTEGER_CST)
5033 /* See cand_value_at. */
5034 if (stmt_after_increment (loop, cand, use->stmt))
5036 if (!tree_int_cst_lt (desc->niter, period))
5037 return false;
5039 else
5041 if (tree_int_cst_lt (period, desc->niter))
5042 return false;
5046 /* If not, and if this is the only possible exit of the loop, see whether
5047 we can get a conservative estimate on the number of iterations of the
5048 entire loop and compare against that instead. */
5049 else
5051 widest_int period_value, max_niter;
5053 max_niter = desc->max;
5054 if (stmt_after_increment (loop, cand, use->stmt))
5055 max_niter += 1;
5056 period_value = wi::to_widest (period);
5057 if (wi::gtu_p (max_niter, period_value))
5059 /* See if we can take advantage of inferred loop bound information. */
5060 if (data->loop_single_exit_p)
5062 if (!max_loop_iterations (loop, &max_niter))
5063 return false;
5064 /* The loop bound is already adjusted by adding 1. */
5065 if (wi::gtu_p (max_niter, period_value))
5066 return false;
5068 else
5069 return false;
5073 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5075 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5076 aff_combination_to_tree (&bnd));
5077 *comp = iv_elimination_compare (data, use);
5079 /* It is unlikely that computing the number of iterations using division
5080 would be more profitable than keeping the original induction variable. */
5081 if (expression_expensive_p (*bound))
5082 return false;
5084 /* Sometimes, it is possible to handle the situation that the number of
5085 iterations may be zero unless additional assumtions by using <
5086 instead of != in the exit condition.
5088 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5089 base the exit condition on it. However, that is often too
5090 expensive. */
5091 if (!integer_zerop (desc->may_be_zero))
5092 return iv_elimination_compare_lt (data, cand, comp, desc);
5094 return true;
5097 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5098 be copied, if is is used in the loop body and DATA->body_includes_call. */
5100 static int
5101 parm_decl_cost (struct ivopts_data *data, tree bound)
5103 tree sbound = bound;
5104 STRIP_NOPS (sbound);
5106 if (TREE_CODE (sbound) == SSA_NAME
5107 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5108 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5109 && data->body_includes_call)
5110 return COSTS_N_INSNS (1);
5112 return 0;
5115 /* Determines cost of basing replacement of USE on CAND in a condition. */
5117 static bool
5118 determine_use_iv_cost_condition (struct ivopts_data *data,
5119 struct iv_use *use, struct iv_cand *cand)
5121 tree bound = NULL_TREE;
5122 struct iv *cmp_iv;
5123 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5124 comp_cost elim_cost, express_cost, cost, bound_cost;
5125 bool ok;
5126 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5127 tree *control_var, *bound_cst;
5128 enum tree_code comp = ERROR_MARK;
5130 /* Only consider real candidates. */
5131 if (!cand->iv)
5133 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5134 ERROR_MARK, -1);
5135 return false;
5138 /* Try iv elimination. */
5139 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5141 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5142 if (elim_cost.cost == 0)
5143 elim_cost.cost = parm_decl_cost (data, bound);
5144 else if (TREE_CODE (bound) == INTEGER_CST)
5145 elim_cost.cost = 0;
5146 /* If we replace a loop condition 'i < n' with 'p < base + n',
5147 depends_on_elim will have 'base' and 'n' set, which implies
5148 that both 'base' and 'n' will be live during the loop. More likely,
5149 'base + n' will be loop invariant, resulting in only one live value
5150 during the loop. So in that case we clear depends_on_elim and set
5151 elim_inv_expr_id instead. */
5152 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5154 elim_inv_expr_id = get_expr_id (data, bound);
5155 bitmap_clear (depends_on_elim);
5157 /* The bound is a loop invariant, so it will be only computed
5158 once. */
5159 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5161 else
5162 elim_cost = infinite_cost;
5164 /* Try expressing the original giv. If it is compared with an invariant,
5165 note that we cannot get rid of it. */
5166 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5167 NULL, &cmp_iv);
5168 gcc_assert (ok);
5170 /* When the condition is a comparison of the candidate IV against
5171 zero, prefer this IV.
5173 TODO: The constant that we're subtracting from the cost should
5174 be target-dependent. This information should be added to the
5175 target costs for each backend. */
5176 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5177 && integer_zerop (*bound_cst)
5178 && (operand_equal_p (*control_var, cand->var_after, 0)
5179 || operand_equal_p (*control_var, cand->var_before, 0)))
5180 elim_cost.cost -= 1;
5182 express_cost = get_computation_cost (data, use, cand, false,
5183 &depends_on_express, NULL,
5184 &express_inv_expr_id);
5185 fd_ivopts_data = data;
5186 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5188 /* Count the cost of the original bound as well. */
5189 bound_cost = force_var_cost (data, *bound_cst, NULL);
5190 if (bound_cost.cost == 0)
5191 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5192 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5193 bound_cost.cost = 0;
5194 express_cost.cost += bound_cost.cost;
5196 /* Choose the better approach, preferring the eliminated IV. */
5197 if (compare_costs (elim_cost, express_cost) <= 0)
5199 cost = elim_cost;
5200 depends_on = depends_on_elim;
5201 depends_on_elim = NULL;
5202 inv_expr_id = elim_inv_expr_id;
5204 else
5206 cost = express_cost;
5207 depends_on = depends_on_express;
5208 depends_on_express = NULL;
5209 bound = NULL_TREE;
5210 comp = ERROR_MARK;
5211 inv_expr_id = express_inv_expr_id;
5214 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5216 if (depends_on_elim)
5217 BITMAP_FREE (depends_on_elim);
5218 if (depends_on_express)
5219 BITMAP_FREE (depends_on_express);
5221 return !infinite_cost_p (cost);
5224 /* Determines cost of basing replacement of USE on CAND. Returns false
5225 if USE cannot be based on CAND. */
5227 static bool
5228 determine_use_iv_cost (struct ivopts_data *data,
5229 struct iv_use *use, struct iv_cand *cand)
5231 switch (use->type)
5233 case USE_NONLINEAR_EXPR:
5234 return determine_use_iv_cost_generic (data, use, cand);
5236 case USE_ADDRESS:
5237 return determine_use_iv_cost_address (data, use, cand);
5239 case USE_COMPARE:
5240 return determine_use_iv_cost_condition (data, use, cand);
5242 default:
5243 gcc_unreachable ();
5247 /* Return true if get_computation_cost indicates that autoincrement is
5248 a possibility for the pair of USE and CAND, false otherwise. */
5250 static bool
5251 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5252 struct iv_cand *cand)
5254 bitmap depends_on;
5255 bool can_autoinc;
5256 comp_cost cost;
5258 if (use->type != USE_ADDRESS)
5259 return false;
5261 cost = get_computation_cost (data, use, cand, true, &depends_on,
5262 &can_autoinc, NULL);
5264 BITMAP_FREE (depends_on);
5266 return !infinite_cost_p (cost) && can_autoinc;
5269 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5270 use that allows autoincrement, and set their AINC_USE if possible. */
5272 static void
5273 set_autoinc_for_original_candidates (struct ivopts_data *data)
5275 unsigned i, j;
5277 for (i = 0; i < n_iv_cands (data); i++)
5279 struct iv_cand *cand = iv_cand (data, i);
5280 struct iv_use *closest_before = NULL;
5281 struct iv_use *closest_after = NULL;
5282 if (cand->pos != IP_ORIGINAL)
5283 continue;
5285 for (j = 0; j < n_iv_uses (data); j++)
5287 struct iv_use *use = iv_use (data, j);
5288 unsigned uid = gimple_uid (use->stmt);
5290 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5291 continue;
5293 if (uid < gimple_uid (cand->incremented_at)
5294 && (closest_before == NULL
5295 || uid > gimple_uid (closest_before->stmt)))
5296 closest_before = use;
5298 if (uid > gimple_uid (cand->incremented_at)
5299 && (closest_after == NULL
5300 || uid < gimple_uid (closest_after->stmt)))
5301 closest_after = use;
5304 if (closest_before != NULL
5305 && autoinc_possible_for_pair (data, closest_before, cand))
5306 cand->ainc_use = closest_before;
5307 else if (closest_after != NULL
5308 && autoinc_possible_for_pair (data, closest_after, cand))
5309 cand->ainc_use = closest_after;
5313 /* Finds the candidates for the induction variables. */
5315 static void
5316 find_iv_candidates (struct ivopts_data *data)
5318 /* Add commonly used ivs. */
5319 add_standard_iv_candidates (data);
5321 /* Add old induction variables. */
5322 add_old_ivs_candidates (data);
5324 /* Add induction variables derived from uses. */
5325 add_derived_ivs_candidates (data);
5327 set_autoinc_for_original_candidates (data);
5329 /* Record the important candidates. */
5330 record_important_candidates (data);
5333 /* Determines costs of basing the use of the iv on an iv candidate. */
5335 static void
5336 determine_use_iv_costs (struct ivopts_data *data)
5338 unsigned i, j;
5339 struct iv_use *use;
5340 struct iv_cand *cand;
5341 bitmap to_clear = BITMAP_ALLOC (NULL);
5343 alloc_use_cost_map (data);
5345 for (i = 0; i < n_iv_uses (data); i++)
5347 use = iv_use (data, i);
5349 if (data->consider_all_candidates)
5351 for (j = 0; j < n_iv_cands (data); j++)
5353 cand = iv_cand (data, j);
5354 determine_use_iv_cost (data, use, cand);
5357 else
5359 bitmap_iterator bi;
5361 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5363 cand = iv_cand (data, j);
5364 if (!determine_use_iv_cost (data, use, cand))
5365 bitmap_set_bit (to_clear, j);
5368 /* Remove the candidates for that the cost is infinite from
5369 the list of related candidates. */
5370 bitmap_and_compl_into (use->related_cands, to_clear);
5371 bitmap_clear (to_clear);
5375 BITMAP_FREE (to_clear);
5377 if (dump_file && (dump_flags & TDF_DETAILS))
5379 fprintf (dump_file, "Use-candidate costs:\n");
5381 for (i = 0; i < n_iv_uses (data); i++)
5383 use = iv_use (data, i);
5385 fprintf (dump_file, "Use %d:\n", i);
5386 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5387 for (j = 0; j < use->n_map_members; j++)
5389 if (!use->cost_map[j].cand
5390 || infinite_cost_p (use->cost_map[j].cost))
5391 continue;
5393 fprintf (dump_file, " %d\t%d\t%d\t",
5394 use->cost_map[j].cand->id,
5395 use->cost_map[j].cost.cost,
5396 use->cost_map[j].cost.complexity);
5397 if (use->cost_map[j].depends_on)
5398 bitmap_print (dump_file,
5399 use->cost_map[j].depends_on, "","");
5400 if (use->cost_map[j].inv_expr_id != -1)
5401 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5402 fprintf (dump_file, "\n");
5405 fprintf (dump_file, "\n");
5407 fprintf (dump_file, "\n");
5411 /* Determines cost of the candidate CAND. */
5413 static void
5414 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5416 comp_cost cost_base;
5417 unsigned cost, cost_step;
5418 tree base;
5420 if (!cand->iv)
5422 cand->cost = 0;
5423 return;
5426 /* There are two costs associated with the candidate -- its increment
5427 and its initialization. The second is almost negligible for any loop
5428 that rolls enough, so we take it just very little into account. */
5430 base = cand->iv->base;
5431 cost_base = force_var_cost (data, base, NULL);
5432 /* It will be exceptional that the iv register happens to be initialized with
5433 the proper value at no cost. In general, there will at least be a regcopy
5434 or a const set. */
5435 if (cost_base.cost == 0)
5436 cost_base.cost = COSTS_N_INSNS (1);
5437 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5439 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5441 /* Prefer the original ivs unless we may gain something by replacing it.
5442 The reason is to make debugging simpler; so this is not relevant for
5443 artificial ivs created by other optimization passes. */
5444 if (cand->pos != IP_ORIGINAL
5445 || !SSA_NAME_VAR (cand->var_before)
5446 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5447 cost++;
5449 /* Prefer not to insert statements into latch unless there are some
5450 already (so that we do not create unnecessary jumps). */
5451 if (cand->pos == IP_END
5452 && empty_block_p (ip_end_pos (data->current_loop)))
5453 cost++;
5455 cand->cost = cost;
5456 cand->cost_step = cost_step;
5459 /* Determines costs of computation of the candidates. */
5461 static void
5462 determine_iv_costs (struct ivopts_data *data)
5464 unsigned i;
5466 if (dump_file && (dump_flags & TDF_DETAILS))
5468 fprintf (dump_file, "Candidate costs:\n");
5469 fprintf (dump_file, " cand\tcost\n");
5472 for (i = 0; i < n_iv_cands (data); i++)
5474 struct iv_cand *cand = iv_cand (data, i);
5476 determine_iv_cost (data, cand);
5478 if (dump_file && (dump_flags & TDF_DETAILS))
5479 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5482 if (dump_file && (dump_flags & TDF_DETAILS))
5483 fprintf (dump_file, "\n");
5486 /* Calculates cost for having SIZE induction variables. */
5488 static unsigned
5489 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5491 /* We add size to the cost, so that we prefer eliminating ivs
5492 if possible. */
5493 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5494 data->body_includes_call);
5497 /* For each size of the induction variable set determine the penalty. */
5499 static void
5500 determine_set_costs (struct ivopts_data *data)
5502 unsigned j, n;
5503 gphi *phi;
5504 gphi_iterator psi;
5505 tree op;
5506 struct loop *loop = data->current_loop;
5507 bitmap_iterator bi;
5509 if (dump_file && (dump_flags & TDF_DETAILS))
5511 fprintf (dump_file, "Global costs:\n");
5512 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5513 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5514 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5515 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5518 n = 0;
5519 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5521 phi = psi.phi ();
5522 op = PHI_RESULT (phi);
5524 if (virtual_operand_p (op))
5525 continue;
5527 if (get_iv (data, op))
5528 continue;
5530 n++;
5533 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5535 struct version_info *info = ver_info (data, j);
5537 if (info->inv_id && info->has_nonlin_use)
5538 n++;
5541 data->regs_used = n;
5542 if (dump_file && (dump_flags & TDF_DETAILS))
5543 fprintf (dump_file, " regs_used %d\n", n);
5545 if (dump_file && (dump_flags & TDF_DETAILS))
5547 fprintf (dump_file, " cost for size:\n");
5548 fprintf (dump_file, " ivs\tcost\n");
5549 for (j = 0; j <= 2 * target_avail_regs; j++)
5550 fprintf (dump_file, " %d\t%d\n", j,
5551 ivopts_global_cost_for_size (data, j));
5552 fprintf (dump_file, "\n");
5556 /* Returns true if A is a cheaper cost pair than B. */
5558 static bool
5559 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5561 int cmp;
5563 if (!a)
5564 return false;
5566 if (!b)
5567 return true;
5569 cmp = compare_costs (a->cost, b->cost);
5570 if (cmp < 0)
5571 return true;
5573 if (cmp > 0)
5574 return false;
5576 /* In case the costs are the same, prefer the cheaper candidate. */
5577 if (a->cand->cost < b->cand->cost)
5578 return true;
5580 return false;
5584 /* Returns candidate by that USE is expressed in IVS. */
5586 static struct cost_pair *
5587 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5589 return ivs->cand_for_use[use->id];
5592 /* Computes the cost field of IVS structure. */
5594 static void
5595 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5597 comp_cost cost = ivs->cand_use_cost;
5599 cost.cost += ivs->cand_cost;
5601 cost.cost += ivopts_global_cost_for_size (data,
5602 ivs->n_regs + ivs->num_used_inv_expr);
5604 ivs->cost = cost;
5607 /* Remove invariants in set INVS to set IVS. */
5609 static void
5610 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5612 bitmap_iterator bi;
5613 unsigned iid;
5615 if (!invs)
5616 return;
5618 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5620 ivs->n_invariant_uses[iid]--;
5621 if (ivs->n_invariant_uses[iid] == 0)
5622 ivs->n_regs--;
5626 /* Set USE not to be expressed by any candidate in IVS. */
5628 static void
5629 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5630 struct iv_use *use)
5632 unsigned uid = use->id, cid;
5633 struct cost_pair *cp;
5635 cp = ivs->cand_for_use[uid];
5636 if (!cp)
5637 return;
5638 cid = cp->cand->id;
5640 ivs->bad_uses++;
5641 ivs->cand_for_use[uid] = NULL;
5642 ivs->n_cand_uses[cid]--;
5644 if (ivs->n_cand_uses[cid] == 0)
5646 bitmap_clear_bit (ivs->cands, cid);
5647 /* Do not count the pseudocandidates. */
5648 if (cp->cand->iv)
5649 ivs->n_regs--;
5650 ivs->n_cands--;
5651 ivs->cand_cost -= cp->cand->cost;
5653 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5656 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5658 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5660 if (cp->inv_expr_id != -1)
5662 ivs->used_inv_expr[cp->inv_expr_id]--;
5663 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5664 ivs->num_used_inv_expr--;
5666 iv_ca_recount_cost (data, ivs);
5669 /* Add invariants in set INVS to set IVS. */
5671 static void
5672 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5674 bitmap_iterator bi;
5675 unsigned iid;
5677 if (!invs)
5678 return;
5680 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5682 ivs->n_invariant_uses[iid]++;
5683 if (ivs->n_invariant_uses[iid] == 1)
5684 ivs->n_regs++;
5688 /* Set cost pair for USE in set IVS to CP. */
5690 static void
5691 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5692 struct iv_use *use, struct cost_pair *cp)
5694 unsigned uid = use->id, cid;
5696 if (ivs->cand_for_use[uid] == cp)
5697 return;
5699 if (ivs->cand_for_use[uid])
5700 iv_ca_set_no_cp (data, ivs, use);
5702 if (cp)
5704 cid = cp->cand->id;
5706 ivs->bad_uses--;
5707 ivs->cand_for_use[uid] = cp;
5708 ivs->n_cand_uses[cid]++;
5709 if (ivs->n_cand_uses[cid] == 1)
5711 bitmap_set_bit (ivs->cands, cid);
5712 /* Do not count the pseudocandidates. */
5713 if (cp->cand->iv)
5714 ivs->n_regs++;
5715 ivs->n_cands++;
5716 ivs->cand_cost += cp->cand->cost;
5718 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5721 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5722 iv_ca_set_add_invariants (ivs, cp->depends_on);
5724 if (cp->inv_expr_id != -1)
5726 ivs->used_inv_expr[cp->inv_expr_id]++;
5727 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5728 ivs->num_used_inv_expr++;
5730 iv_ca_recount_cost (data, ivs);
5734 /* Extend set IVS by expressing USE by some of the candidates in it
5735 if possible. Consider all important candidates if candidates in
5736 set IVS don't give any result. */
5738 static void
5739 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5740 struct iv_use *use)
5742 struct cost_pair *best_cp = NULL, *cp;
5743 bitmap_iterator bi;
5744 unsigned i;
5745 struct iv_cand *cand;
5747 gcc_assert (ivs->upto >= use->id);
5748 ivs->upto++;
5749 ivs->bad_uses++;
5751 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5753 cand = iv_cand (data, i);
5754 cp = get_use_iv_cost (data, use, cand);
5755 if (cheaper_cost_pair (cp, best_cp))
5756 best_cp = cp;
5759 if (best_cp == NULL)
5761 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5763 cand = iv_cand (data, i);
5764 cp = get_use_iv_cost (data, use, cand);
5765 if (cheaper_cost_pair (cp, best_cp))
5766 best_cp = cp;
5770 iv_ca_set_cp (data, ivs, use, best_cp);
5773 /* Get cost for assignment IVS. */
5775 static comp_cost
5776 iv_ca_cost (struct iv_ca *ivs)
5778 /* This was a conditional expression but it triggered a bug in
5779 Sun C 5.5. */
5780 if (ivs->bad_uses)
5781 return infinite_cost;
5782 else
5783 return ivs->cost;
5786 /* Returns true if all dependences of CP are among invariants in IVS. */
5788 static bool
5789 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5791 unsigned i;
5792 bitmap_iterator bi;
5794 if (!cp->depends_on)
5795 return true;
5797 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5799 if (ivs->n_invariant_uses[i] == 0)
5800 return false;
5803 return true;
5806 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5807 it before NEXT_CHANGE. */
5809 static struct iv_ca_delta *
5810 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5811 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5813 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5815 change->use = use;
5816 change->old_cp = old_cp;
5817 change->new_cp = new_cp;
5818 change->next_change = next_change;
5820 return change;
5823 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5824 are rewritten. */
5826 static struct iv_ca_delta *
5827 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5829 struct iv_ca_delta *last;
5831 if (!l2)
5832 return l1;
5834 if (!l1)
5835 return l2;
5837 for (last = l1; last->next_change; last = last->next_change)
5838 continue;
5839 last->next_change = l2;
5841 return l1;
5844 /* Reverse the list of changes DELTA, forming the inverse to it. */
5846 static struct iv_ca_delta *
5847 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5849 struct iv_ca_delta *act, *next, *prev = NULL;
5851 for (act = delta; act; act = next)
5853 next = act->next_change;
5854 act->next_change = prev;
5855 prev = act;
5857 std::swap (act->old_cp, act->new_cp);
5860 return prev;
5863 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5864 reverted instead. */
5866 static void
5867 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5868 struct iv_ca_delta *delta, bool forward)
5870 struct cost_pair *from, *to;
5871 struct iv_ca_delta *act;
5873 if (!forward)
5874 delta = iv_ca_delta_reverse (delta);
5876 for (act = delta; act; act = act->next_change)
5878 from = act->old_cp;
5879 to = act->new_cp;
5880 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5881 iv_ca_set_cp (data, ivs, act->use, to);
5884 if (!forward)
5885 iv_ca_delta_reverse (delta);
5888 /* Returns true if CAND is used in IVS. */
5890 static bool
5891 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5893 return ivs->n_cand_uses[cand->id] > 0;
5896 /* Returns number of induction variable candidates in the set IVS. */
5898 static unsigned
5899 iv_ca_n_cands (struct iv_ca *ivs)
5901 return ivs->n_cands;
5904 /* Free the list of changes DELTA. */
5906 static void
5907 iv_ca_delta_free (struct iv_ca_delta **delta)
5909 struct iv_ca_delta *act, *next;
5911 for (act = *delta; act; act = next)
5913 next = act->next_change;
5914 free (act);
5917 *delta = NULL;
5920 /* Allocates new iv candidates assignment. */
5922 static struct iv_ca *
5923 iv_ca_new (struct ivopts_data *data)
5925 struct iv_ca *nw = XNEW (struct iv_ca);
5927 nw->upto = 0;
5928 nw->bad_uses = 0;
5929 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5930 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5931 nw->cands = BITMAP_ALLOC (NULL);
5932 nw->n_cands = 0;
5933 nw->n_regs = 0;
5934 nw->cand_use_cost = no_cost;
5935 nw->cand_cost = 0;
5936 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5937 nw->cost = no_cost;
5938 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5939 nw->num_used_inv_expr = 0;
5941 return nw;
5944 /* Free memory occupied by the set IVS. */
5946 static void
5947 iv_ca_free (struct iv_ca **ivs)
5949 free ((*ivs)->cand_for_use);
5950 free ((*ivs)->n_cand_uses);
5951 BITMAP_FREE ((*ivs)->cands);
5952 free ((*ivs)->n_invariant_uses);
5953 free ((*ivs)->used_inv_expr);
5954 free (*ivs);
5955 *ivs = NULL;
5958 /* Dumps IVS to FILE. */
5960 static void
5961 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5963 const char *pref = " invariants ";
5964 unsigned i;
5965 comp_cost cost = iv_ca_cost (ivs);
5967 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5968 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5969 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5970 bitmap_print (file, ivs->cands, " candidates: ","\n");
5972 for (i = 0; i < ivs->upto; i++)
5974 struct iv_use *use = iv_use (data, i);
5975 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5976 if (cp)
5977 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5978 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5979 else
5980 fprintf (file, " use:%d --> ??\n", use->id);
5983 for (i = 1; i <= data->max_inv_id; i++)
5984 if (ivs->n_invariant_uses[i])
5986 fprintf (file, "%s%d", pref, i);
5987 pref = ", ";
5989 fprintf (file, "\n\n");
5992 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5993 new set, and store differences in DELTA. Number of induction variables
5994 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5995 the function will try to find a solution with mimimal iv candidates. */
5997 static comp_cost
5998 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5999 struct iv_cand *cand, struct iv_ca_delta **delta,
6000 unsigned *n_ivs, bool min_ncand)
6002 unsigned i;
6003 comp_cost cost;
6004 struct iv_use *use;
6005 struct cost_pair *old_cp, *new_cp;
6007 *delta = NULL;
6008 for (i = 0; i < ivs->upto; i++)
6010 use = iv_use (data, i);
6011 old_cp = iv_ca_cand_for_use (ivs, use);
6013 if (old_cp
6014 && old_cp->cand == cand)
6015 continue;
6017 new_cp = get_use_iv_cost (data, use, cand);
6018 if (!new_cp)
6019 continue;
6021 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6022 continue;
6024 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6025 continue;
6027 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6030 iv_ca_delta_commit (data, ivs, *delta, true);
6031 cost = iv_ca_cost (ivs);
6032 if (n_ivs)
6033 *n_ivs = iv_ca_n_cands (ivs);
6034 iv_ca_delta_commit (data, ivs, *delta, false);
6036 return cost;
6039 /* Try narrowing set IVS by removing CAND. Return the cost of
6040 the new set and store the differences in DELTA. START is
6041 the candidate with which we start narrowing. */
6043 static comp_cost
6044 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6045 struct iv_cand *cand, struct iv_cand *start,
6046 struct iv_ca_delta **delta)
6048 unsigned i, ci;
6049 struct iv_use *use;
6050 struct cost_pair *old_cp, *new_cp, *cp;
6051 bitmap_iterator bi;
6052 struct iv_cand *cnd;
6053 comp_cost cost, best_cost, acost;
6055 *delta = NULL;
6056 for (i = 0; i < n_iv_uses (data); i++)
6058 use = iv_use (data, i);
6060 old_cp = iv_ca_cand_for_use (ivs, use);
6061 if (old_cp->cand != cand)
6062 continue;
6064 best_cost = iv_ca_cost (ivs);
6065 /* Start narrowing with START. */
6066 new_cp = get_use_iv_cost (data, use, start);
6068 if (data->consider_all_candidates)
6070 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6072 if (ci == cand->id || (start && ci == start->id))
6073 continue;
6075 cnd = iv_cand (data, ci);
6077 cp = get_use_iv_cost (data, use, cnd);
6078 if (!cp)
6079 continue;
6081 iv_ca_set_cp (data, ivs, use, cp);
6082 acost = iv_ca_cost (ivs);
6084 if (compare_costs (acost, best_cost) < 0)
6086 best_cost = acost;
6087 new_cp = cp;
6091 else
6093 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6095 if (ci == cand->id || (start && ci == start->id))
6096 continue;
6098 cnd = iv_cand (data, ci);
6100 cp = get_use_iv_cost (data, use, cnd);
6101 if (!cp)
6102 continue;
6104 iv_ca_set_cp (data, ivs, use, cp);
6105 acost = iv_ca_cost (ivs);
6107 if (compare_costs (acost, best_cost) < 0)
6109 best_cost = acost;
6110 new_cp = cp;
6114 /* Restore to old cp for use. */
6115 iv_ca_set_cp (data, ivs, use, old_cp);
6117 if (!new_cp)
6119 iv_ca_delta_free (delta);
6120 return infinite_cost;
6123 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6126 iv_ca_delta_commit (data, ivs, *delta, true);
6127 cost = iv_ca_cost (ivs);
6128 iv_ca_delta_commit (data, ivs, *delta, false);
6130 return cost;
6133 /* Try optimizing the set of candidates IVS by removing candidates different
6134 from to EXCEPT_CAND from it. Return cost of the new set, and store
6135 differences in DELTA. */
6137 static comp_cost
6138 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6139 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6141 bitmap_iterator bi;
6142 struct iv_ca_delta *act_delta, *best_delta;
6143 unsigned i;
6144 comp_cost best_cost, acost;
6145 struct iv_cand *cand;
6147 best_delta = NULL;
6148 best_cost = iv_ca_cost (ivs);
6150 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6152 cand = iv_cand (data, i);
6154 if (cand == except_cand)
6155 continue;
6157 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6159 if (compare_costs (acost, best_cost) < 0)
6161 best_cost = acost;
6162 iv_ca_delta_free (&best_delta);
6163 best_delta = act_delta;
6165 else
6166 iv_ca_delta_free (&act_delta);
6169 if (!best_delta)
6171 *delta = NULL;
6172 return best_cost;
6175 /* Recurse to possibly remove other unnecessary ivs. */
6176 iv_ca_delta_commit (data, ivs, best_delta, true);
6177 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6178 iv_ca_delta_commit (data, ivs, best_delta, false);
6179 *delta = iv_ca_delta_join (best_delta, *delta);
6180 return best_cost;
6183 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6184 cheaper local cost for USE than BEST_CP. Return pointer to
6185 the corresponding cost_pair, otherwise just return BEST_CP. */
6187 static struct cost_pair*
6188 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6189 unsigned int cand_idx, struct iv_cand *old_cand,
6190 struct cost_pair *best_cp)
6192 struct iv_cand *cand;
6193 struct cost_pair *cp;
6195 gcc_assert (old_cand != NULL && best_cp != NULL);
6196 if (cand_idx == old_cand->id)
6197 return best_cp;
6199 cand = iv_cand (data, cand_idx);
6200 cp = get_use_iv_cost (data, use, cand);
6201 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6202 return cp;
6204 return best_cp;
6207 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6208 which are used by more than one iv uses. For each of those candidates,
6209 this function tries to represent iv uses under that candidate using
6210 other ones with lower local cost, then tries to prune the new set.
6211 If the new set has lower cost, It returns the new cost after recording
6212 candidate replacement in list DELTA. */
6214 static comp_cost
6215 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6216 struct iv_ca_delta **delta)
6218 bitmap_iterator bi, bj;
6219 unsigned int i, j, k;
6220 struct iv_use *use;
6221 struct iv_cand *cand;
6222 comp_cost orig_cost, acost;
6223 struct iv_ca_delta *act_delta, *tmp_delta;
6224 struct cost_pair *old_cp, *best_cp = NULL;
6226 *delta = NULL;
6227 orig_cost = iv_ca_cost (ivs);
6229 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6231 if (ivs->n_cand_uses[i] == 1
6232 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6233 continue;
6235 cand = iv_cand (data, i);
6237 act_delta = NULL;
6238 /* Represent uses under current candidate using other ones with
6239 lower local cost. */
6240 for (j = 0; j < ivs->upto; j++)
6242 use = iv_use (data, j);
6243 old_cp = iv_ca_cand_for_use (ivs, use);
6245 if (old_cp->cand != cand)
6246 continue;
6248 best_cp = old_cp;
6249 if (data->consider_all_candidates)
6250 for (k = 0; k < n_iv_cands (data); k++)
6251 best_cp = cheaper_cost_with_cand (data, use, k,
6252 old_cp->cand, best_cp);
6253 else
6254 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6255 best_cp = cheaper_cost_with_cand (data, use, k,
6256 old_cp->cand, best_cp);
6258 if (best_cp == old_cp)
6259 continue;
6261 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6263 /* No need for further prune. */
6264 if (!act_delta)
6265 continue;
6267 /* Prune the new candidate set. */
6268 iv_ca_delta_commit (data, ivs, act_delta, true);
6269 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6270 iv_ca_delta_commit (data, ivs, act_delta, false);
6271 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6273 if (compare_costs (acost, orig_cost) < 0)
6275 *delta = act_delta;
6276 return acost;
6278 else
6279 iv_ca_delta_free (&act_delta);
6282 return orig_cost;
6285 /* Tries to extend the sets IVS in the best possible way in order
6286 to express the USE. If ORIGINALP is true, prefer candidates from
6287 the original set of IVs, otherwise favor important candidates not
6288 based on any memory object. */
6290 static bool
6291 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6292 struct iv_use *use, bool originalp)
6294 comp_cost best_cost, act_cost;
6295 unsigned i;
6296 bitmap_iterator bi;
6297 struct iv_cand *cand;
6298 struct iv_ca_delta *best_delta = NULL, *act_delta;
6299 struct cost_pair *cp;
6301 iv_ca_add_use (data, ivs, use);
6302 best_cost = iv_ca_cost (ivs);
6303 cp = iv_ca_cand_for_use (ivs, use);
6304 if (cp)
6306 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6307 iv_ca_set_no_cp (data, ivs, use);
6310 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6311 first try important candidates not based on any memory object. Only if
6312 this fails, try the specific ones. Rationale -- in loops with many
6313 variables the best choice often is to use just one generic biv. If we
6314 added here many ivs specific to the uses, the optimization algorithm later
6315 would be likely to get stuck in a local minimum, thus causing us to create
6316 too many ivs. The approach from few ivs to more seems more likely to be
6317 successful -- starting from few ivs, replacing an expensive use by a
6318 specific iv should always be a win. */
6319 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6321 cand = iv_cand (data, i);
6323 if (originalp && cand->pos !=IP_ORIGINAL)
6324 continue;
6326 if (!originalp && cand->iv->base_object != NULL_TREE)
6327 continue;
6329 if (iv_ca_cand_used_p (ivs, cand))
6330 continue;
6332 cp = get_use_iv_cost (data, use, cand);
6333 if (!cp)
6334 continue;
6336 iv_ca_set_cp (data, ivs, use, cp);
6337 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6338 true);
6339 iv_ca_set_no_cp (data, ivs, use);
6340 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6342 if (compare_costs (act_cost, best_cost) < 0)
6344 best_cost = act_cost;
6346 iv_ca_delta_free (&best_delta);
6347 best_delta = act_delta;
6349 else
6350 iv_ca_delta_free (&act_delta);
6353 if (infinite_cost_p (best_cost))
6355 for (i = 0; i < use->n_map_members; i++)
6357 cp = use->cost_map + i;
6358 cand = cp->cand;
6359 if (!cand)
6360 continue;
6362 /* Already tried this. */
6363 if (cand->important)
6365 if (originalp && cand->pos == IP_ORIGINAL)
6366 continue;
6367 if (!originalp && cand->iv->base_object == NULL_TREE)
6368 continue;
6371 if (iv_ca_cand_used_p (ivs, cand))
6372 continue;
6374 act_delta = NULL;
6375 iv_ca_set_cp (data, ivs, use, cp);
6376 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6377 iv_ca_set_no_cp (data, ivs, use);
6378 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6379 cp, act_delta);
6381 if (compare_costs (act_cost, best_cost) < 0)
6383 best_cost = act_cost;
6385 if (best_delta)
6386 iv_ca_delta_free (&best_delta);
6387 best_delta = act_delta;
6389 else
6390 iv_ca_delta_free (&act_delta);
6394 iv_ca_delta_commit (data, ivs, best_delta, true);
6395 iv_ca_delta_free (&best_delta);
6397 return !infinite_cost_p (best_cost);
6400 /* Finds an initial assignment of candidates to uses. */
6402 static struct iv_ca *
6403 get_initial_solution (struct ivopts_data *data, bool originalp)
6405 struct iv_ca *ivs = iv_ca_new (data);
6406 unsigned i;
6408 for (i = 0; i < n_iv_uses (data); i++)
6409 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6411 iv_ca_free (&ivs);
6412 return NULL;
6415 return ivs;
6418 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6419 points to a bool variable, this function tries to break local
6420 optimal fixed-point by replacing candidates in IVS if it's true. */
6422 static bool
6423 try_improve_iv_set (struct ivopts_data *data,
6424 struct iv_ca *ivs, bool *try_replace_p)
6426 unsigned i, n_ivs;
6427 comp_cost acost, best_cost = iv_ca_cost (ivs);
6428 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6429 struct iv_cand *cand;
6431 /* Try extending the set of induction variables by one. */
6432 for (i = 0; i < n_iv_cands (data); i++)
6434 cand = iv_cand (data, i);
6436 if (iv_ca_cand_used_p (ivs, cand))
6437 continue;
6439 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6440 if (!act_delta)
6441 continue;
6443 /* If we successfully added the candidate and the set is small enough,
6444 try optimizing it by removing other candidates. */
6445 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6447 iv_ca_delta_commit (data, ivs, act_delta, true);
6448 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6449 iv_ca_delta_commit (data, ivs, act_delta, false);
6450 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6453 if (compare_costs (acost, best_cost) < 0)
6455 best_cost = acost;
6456 iv_ca_delta_free (&best_delta);
6457 best_delta = act_delta;
6459 else
6460 iv_ca_delta_free (&act_delta);
6463 if (!best_delta)
6465 /* Try removing the candidates from the set instead. */
6466 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6468 if (!best_delta && *try_replace_p)
6470 *try_replace_p = false;
6471 /* So far candidate selecting algorithm tends to choose fewer IVs
6472 so that it can handle cases in which loops have many variables
6473 but the best choice is often to use only one general biv. One
6474 weakness is it can't handle opposite cases, in which different
6475 candidates should be chosen with respect to each use. To solve
6476 the problem, we replace candidates in a manner described by the
6477 comments of iv_ca_replace, thus give general algorithm a chance
6478 to break local optimal fixed-point in these cases. */
6479 best_cost = iv_ca_replace (data, ivs, &best_delta);
6482 if (!best_delta)
6483 return false;
6486 iv_ca_delta_commit (data, ivs, best_delta, true);
6487 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6488 iv_ca_delta_free (&best_delta);
6489 return true;
6492 /* Attempts to find the optimal set of induction variables. We do simple
6493 greedy heuristic -- we try to replace at most one candidate in the selected
6494 solution and remove the unused ivs while this improves the cost. */
6496 static struct iv_ca *
6497 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6499 struct iv_ca *set;
6500 bool try_replace_p = true;
6502 /* Get the initial solution. */
6503 set = get_initial_solution (data, originalp);
6504 if (!set)
6506 if (dump_file && (dump_flags & TDF_DETAILS))
6507 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6508 return NULL;
6511 if (dump_file && (dump_flags & TDF_DETAILS))
6513 fprintf (dump_file, "Initial set of candidates:\n");
6514 iv_ca_dump (data, dump_file, set);
6517 while (try_improve_iv_set (data, set, &try_replace_p))
6519 if (dump_file && (dump_flags & TDF_DETAILS))
6521 fprintf (dump_file, "Improved to:\n");
6522 iv_ca_dump (data, dump_file, set);
6526 return set;
6529 static struct iv_ca *
6530 find_optimal_iv_set (struct ivopts_data *data)
6532 unsigned i;
6533 struct iv_ca *set, *origset;
6534 struct iv_use *use;
6535 comp_cost cost, origcost;
6537 /* Determine the cost based on a strategy that starts with original IVs,
6538 and try again using a strategy that prefers candidates not based
6539 on any IVs. */
6540 origset = find_optimal_iv_set_1 (data, true);
6541 set = find_optimal_iv_set_1 (data, false);
6543 if (!origset && !set)
6544 return NULL;
6546 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6547 cost = set ? iv_ca_cost (set) : infinite_cost;
6549 if (dump_file && (dump_flags & TDF_DETAILS))
6551 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6552 origcost.cost, origcost.complexity);
6553 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6554 cost.cost, cost.complexity);
6557 /* Choose the one with the best cost. */
6558 if (compare_costs (origcost, cost) <= 0)
6560 if (set)
6561 iv_ca_free (&set);
6562 set = origset;
6564 else if (origset)
6565 iv_ca_free (&origset);
6567 for (i = 0; i < n_iv_uses (data); i++)
6569 use = iv_use (data, i);
6570 use->selected = iv_ca_cand_for_use (set, use)->cand;
6573 return set;
6576 /* Creates a new induction variable corresponding to CAND. */
6578 static void
6579 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6581 gimple_stmt_iterator incr_pos;
6582 tree base;
6583 bool after = false;
6585 if (!cand->iv)
6586 return;
6588 switch (cand->pos)
6590 case IP_NORMAL:
6591 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6592 break;
6594 case IP_END:
6595 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6596 after = true;
6597 break;
6599 case IP_AFTER_USE:
6600 after = true;
6601 /* fall through */
6602 case IP_BEFORE_USE:
6603 incr_pos = gsi_for_stmt (cand->incremented_at);
6604 break;
6606 case IP_ORIGINAL:
6607 /* Mark that the iv is preserved. */
6608 name_info (data, cand->var_before)->preserve_biv = true;
6609 name_info (data, cand->var_after)->preserve_biv = true;
6611 /* Rewrite the increment so that it uses var_before directly. */
6612 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6613 return;
6616 gimple_add_tmp_var (cand->var_before);
6618 base = unshare_expr (cand->iv->base);
6620 create_iv (base, unshare_expr (cand->iv->step),
6621 cand->var_before, data->current_loop,
6622 &incr_pos, after, &cand->var_before, &cand->var_after);
6625 /* Creates new induction variables described in SET. */
6627 static void
6628 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6630 unsigned i;
6631 struct iv_cand *cand;
6632 bitmap_iterator bi;
6634 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6636 cand = iv_cand (data, i);
6637 create_new_iv (data, cand);
6640 if (dump_file && (dump_flags & TDF_DETAILS))
6642 fprintf (dump_file, "Selected IV set for loop %d",
6643 data->current_loop->num);
6644 if (data->loop_loc != UNKNOWN_LOCATION)
6645 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6646 LOCATION_LINE (data->loop_loc));
6647 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6648 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6650 cand = iv_cand (data, i);
6651 dump_cand (dump_file, cand);
6653 fprintf (dump_file, "\n");
6657 /* Rewrites USE (definition of iv used in a nonlinear expression)
6658 using candidate CAND. */
6660 static void
6661 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6662 struct iv_use *use, struct iv_cand *cand)
6664 tree comp;
6665 tree op, tgt;
6666 gassign *ass;
6667 gimple_stmt_iterator bsi;
6669 /* An important special case -- if we are asked to express value of
6670 the original iv by itself, just exit; there is no need to
6671 introduce a new computation (that might also need casting the
6672 variable to unsigned and back). */
6673 if (cand->pos == IP_ORIGINAL
6674 && cand->incremented_at == use->stmt)
6676 enum tree_code stmt_code;
6678 gcc_assert (is_gimple_assign (use->stmt));
6679 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6681 /* Check whether we may leave the computation unchanged.
6682 This is the case only if it does not rely on other
6683 computations in the loop -- otherwise, the computation
6684 we rely upon may be removed in remove_unused_ivs,
6685 thus leading to ICE. */
6686 stmt_code = gimple_assign_rhs_code (use->stmt);
6687 if (stmt_code == PLUS_EXPR
6688 || stmt_code == MINUS_EXPR
6689 || stmt_code == POINTER_PLUS_EXPR)
6691 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6692 op = gimple_assign_rhs2 (use->stmt);
6693 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6694 op = gimple_assign_rhs1 (use->stmt);
6695 else
6696 op = NULL_TREE;
6698 else
6699 op = NULL_TREE;
6701 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6702 return;
6705 comp = get_computation (data->current_loop, use, cand);
6706 gcc_assert (comp != NULL_TREE);
6708 switch (gimple_code (use->stmt))
6710 case GIMPLE_PHI:
6711 tgt = PHI_RESULT (use->stmt);
6713 /* If we should keep the biv, do not replace it. */
6714 if (name_info (data, tgt)->preserve_biv)
6715 return;
6717 bsi = gsi_after_labels (gimple_bb (use->stmt));
6718 break;
6720 case GIMPLE_ASSIGN:
6721 tgt = gimple_assign_lhs (use->stmt);
6722 bsi = gsi_for_stmt (use->stmt);
6723 break;
6725 default:
6726 gcc_unreachable ();
6729 if (!valid_gimple_rhs_p (comp)
6730 || (gimple_code (use->stmt) != GIMPLE_PHI
6731 /* We can't allow re-allocating the stmt as it might be pointed
6732 to still. */
6733 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6734 >= gimple_num_ops (gsi_stmt (bsi)))))
6736 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6737 true, GSI_SAME_STMT);
6738 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6740 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6741 /* As this isn't a plain copy we have to reset alignment
6742 information. */
6743 if (SSA_NAME_PTR_INFO (comp))
6744 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6748 if (gimple_code (use->stmt) == GIMPLE_PHI)
6750 ass = gimple_build_assign (tgt, comp);
6751 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6753 bsi = gsi_for_stmt (use->stmt);
6754 remove_phi_node (&bsi, false);
6756 else
6758 gimple_assign_set_rhs_from_tree (&bsi, comp);
6759 use->stmt = gsi_stmt (bsi);
6763 /* Performs a peephole optimization to reorder the iv update statement with
6764 a mem ref to enable instruction combining in later phases. The mem ref uses
6765 the iv value before the update, so the reordering transformation requires
6766 adjustment of the offset. CAND is the selected IV_CAND.
6768 Example:
6770 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6771 iv2 = iv1 + 1;
6773 if (t < val) (1)
6774 goto L;
6775 goto Head;
6778 directly propagating t over to (1) will introduce overlapping live range
6779 thus increase register pressure. This peephole transform it into:
6782 iv2 = iv1 + 1;
6783 t = MEM_REF (base, iv2, 8, 8);
6784 if (t < val)
6785 goto L;
6786 goto Head;
6789 static void
6790 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6792 tree var_after;
6793 gimple iv_update, stmt;
6794 basic_block bb;
6795 gimple_stmt_iterator gsi, gsi_iv;
6797 if (cand->pos != IP_NORMAL)
6798 return;
6800 var_after = cand->var_after;
6801 iv_update = SSA_NAME_DEF_STMT (var_after);
6803 bb = gimple_bb (iv_update);
6804 gsi = gsi_last_nondebug_bb (bb);
6805 stmt = gsi_stmt (gsi);
6807 /* Only handle conditional statement for now. */
6808 if (gimple_code (stmt) != GIMPLE_COND)
6809 return;
6811 gsi_prev_nondebug (&gsi);
6812 stmt = gsi_stmt (gsi);
6813 if (stmt != iv_update)
6814 return;
6816 gsi_prev_nondebug (&gsi);
6817 if (gsi_end_p (gsi))
6818 return;
6820 stmt = gsi_stmt (gsi);
6821 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6822 return;
6824 if (stmt != use->stmt)
6825 return;
6827 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6828 return;
6830 if (dump_file && (dump_flags & TDF_DETAILS))
6832 fprintf (dump_file, "Reordering \n");
6833 print_gimple_stmt (dump_file, iv_update, 0, 0);
6834 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6835 fprintf (dump_file, "\n");
6838 gsi = gsi_for_stmt (use->stmt);
6839 gsi_iv = gsi_for_stmt (iv_update);
6840 gsi_move_before (&gsi_iv, &gsi);
6842 cand->pos = IP_BEFORE_USE;
6843 cand->incremented_at = use->stmt;
6846 /* Rewrites USE (address that is an iv) using candidate CAND. */
6848 static void
6849 rewrite_use_address_1 (struct ivopts_data *data,
6850 struct iv_use *use, struct iv_cand *cand)
6852 aff_tree aff;
6853 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6854 tree base_hint = NULL_TREE;
6855 tree ref, iv;
6856 bool ok;
6858 adjust_iv_update_pos (cand, use);
6859 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6860 gcc_assert (ok);
6861 unshare_aff_combination (&aff);
6863 /* To avoid undefined overflow problems, all IV candidates use unsigned
6864 integer types. The drawback is that this makes it impossible for
6865 create_mem_ref to distinguish an IV that is based on a memory object
6866 from one that represents simply an offset.
6868 To work around this problem, we pass a hint to create_mem_ref that
6869 indicates which variable (if any) in aff is an IV based on a memory
6870 object. Note that we only consider the candidate. If this is not
6871 based on an object, the base of the reference is in some subexpression
6872 of the use -- but these will use pointer types, so they are recognized
6873 by the create_mem_ref heuristics anyway. */
6874 if (cand->iv->base_object)
6875 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6877 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6878 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6879 reference_alias_ptr_type (*use->op_p),
6880 iv, base_hint, data->speed);
6881 copy_ref_info (ref, *use->op_p);
6882 *use->op_p = ref;
6885 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6886 first use of a group, rewrites sub uses in the group too. */
6888 static void
6889 rewrite_use_address (struct ivopts_data *data,
6890 struct iv_use *use, struct iv_cand *cand)
6892 struct iv_use *next;
6894 gcc_assert (use->sub_id == 0);
6895 rewrite_use_address_1 (data, use, cand);
6896 update_stmt (use->stmt);
6898 for (next = use->next; next != NULL; next = next->next)
6900 rewrite_use_address_1 (data, next, cand);
6901 update_stmt (next->stmt);
6904 return;
6907 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6908 candidate CAND. */
6910 static void
6911 rewrite_use_compare (struct ivopts_data *data,
6912 struct iv_use *use, struct iv_cand *cand)
6914 tree comp, *var_p, op, bound;
6915 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6916 enum tree_code compare;
6917 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6918 bool ok;
6920 bound = cp->value;
6921 if (bound)
6923 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6924 tree var_type = TREE_TYPE (var);
6925 gimple_seq stmts;
6927 if (dump_file && (dump_flags & TDF_DETAILS))
6929 fprintf (dump_file, "Replacing exit test: ");
6930 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6932 compare = cp->comp;
6933 bound = unshare_expr (fold_convert (var_type, bound));
6934 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6935 if (stmts)
6936 gsi_insert_seq_on_edge_immediate (
6937 loop_preheader_edge (data->current_loop),
6938 stmts);
6940 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6941 gimple_cond_set_lhs (cond_stmt, var);
6942 gimple_cond_set_code (cond_stmt, compare);
6943 gimple_cond_set_rhs (cond_stmt, op);
6944 return;
6947 /* The induction variable elimination failed; just express the original
6948 giv. */
6949 comp = get_computation (data->current_loop, use, cand);
6950 gcc_assert (comp != NULL_TREE);
6952 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6953 gcc_assert (ok);
6955 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6956 true, GSI_SAME_STMT);
6959 /* Rewrites USE using candidate CAND. */
6961 static void
6962 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6964 switch (use->type)
6966 case USE_NONLINEAR_EXPR:
6967 rewrite_use_nonlinear_expr (data, use, cand);
6968 break;
6970 case USE_ADDRESS:
6971 rewrite_use_address (data, use, cand);
6972 break;
6974 case USE_COMPARE:
6975 rewrite_use_compare (data, use, cand);
6976 break;
6978 default:
6979 gcc_unreachable ();
6982 update_stmt (use->stmt);
6985 /* Rewrite the uses using the selected induction variables. */
6987 static void
6988 rewrite_uses (struct ivopts_data *data)
6990 unsigned i;
6991 struct iv_cand *cand;
6992 struct iv_use *use;
6994 for (i = 0; i < n_iv_uses (data); i++)
6996 use = iv_use (data, i);
6997 cand = use->selected;
6998 gcc_assert (cand);
7000 rewrite_use (data, use, cand);
7004 /* Removes the ivs that are not used after rewriting. */
7006 static void
7007 remove_unused_ivs (struct ivopts_data *data)
7009 unsigned j;
7010 bitmap_iterator bi;
7011 bitmap toremove = BITMAP_ALLOC (NULL);
7013 /* Figure out an order in which to release SSA DEFs so that we don't
7014 release something that we'd have to propagate into a debug stmt
7015 afterwards. */
7016 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7018 struct version_info *info;
7020 info = ver_info (data, j);
7021 if (info->iv
7022 && !integer_zerop (info->iv->step)
7023 && !info->inv_id
7024 && !info->iv->have_use_for
7025 && !info->preserve_biv)
7027 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7029 tree def = info->iv->ssa_name;
7031 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7033 imm_use_iterator imm_iter;
7034 use_operand_p use_p;
7035 gimple stmt;
7036 int count = 0;
7038 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7040 if (!gimple_debug_bind_p (stmt))
7041 continue;
7043 /* We just want to determine whether to do nothing
7044 (count == 0), to substitute the computed
7045 expression into a single use of the SSA DEF by
7046 itself (count == 1), or to use a debug temp
7047 because the SSA DEF is used multiple times or as
7048 part of a larger expression (count > 1). */
7049 count++;
7050 if (gimple_debug_bind_get_value (stmt) != def)
7051 count++;
7053 if (count > 1)
7054 BREAK_FROM_IMM_USE_STMT (imm_iter);
7057 if (!count)
7058 continue;
7060 struct iv_use dummy_use;
7061 struct iv_cand *best_cand = NULL, *cand;
7062 unsigned i, best_pref = 0, cand_pref;
7064 memset (&dummy_use, 0, sizeof (dummy_use));
7065 dummy_use.iv = info->iv;
7066 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7068 cand = iv_use (data, i)->selected;
7069 if (cand == best_cand)
7070 continue;
7071 cand_pref = operand_equal_p (cand->iv->step,
7072 info->iv->step, 0)
7073 ? 4 : 0;
7074 cand_pref
7075 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7076 == TYPE_MODE (TREE_TYPE (info->iv->base))
7077 ? 2 : 0;
7078 cand_pref
7079 += TREE_CODE (cand->iv->base) == INTEGER_CST
7080 ? 1 : 0;
7081 if (best_cand == NULL || best_pref < cand_pref)
7083 best_cand = cand;
7084 best_pref = cand_pref;
7088 if (!best_cand)
7089 continue;
7091 tree comp = get_computation_at (data->current_loop,
7092 &dummy_use, best_cand,
7093 SSA_NAME_DEF_STMT (def));
7094 if (!comp)
7095 continue;
7097 if (count > 1)
7099 tree vexpr = make_node (DEBUG_EXPR_DECL);
7100 DECL_ARTIFICIAL (vexpr) = 1;
7101 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7102 if (SSA_NAME_VAR (def))
7103 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7104 else
7105 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7106 gdebug *def_temp
7107 = gimple_build_debug_bind (vexpr, comp, NULL);
7108 gimple_stmt_iterator gsi;
7110 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7111 gsi = gsi_after_labels (gimple_bb
7112 (SSA_NAME_DEF_STMT (def)));
7113 else
7114 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7116 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7117 comp = vexpr;
7120 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7122 if (!gimple_debug_bind_p (stmt))
7123 continue;
7125 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7126 SET_USE (use_p, comp);
7128 update_stmt (stmt);
7134 release_defs_bitset (toremove);
7136 BITMAP_FREE (toremove);
7139 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7140 for hash_map::traverse. */
7142 bool
7143 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7145 free (value);
7146 return true;
7149 /* Frees data allocated by the optimization of a single loop. */
7151 static void
7152 free_loop_data (struct ivopts_data *data)
7154 unsigned i, j;
7155 bitmap_iterator bi;
7156 tree obj;
7158 if (data->niters)
7160 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7161 delete data->niters;
7162 data->niters = NULL;
7165 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7167 struct version_info *info;
7169 info = ver_info (data, i);
7170 info->iv = NULL;
7171 info->has_nonlin_use = false;
7172 info->preserve_biv = false;
7173 info->inv_id = 0;
7175 bitmap_clear (data->relevant);
7176 bitmap_clear (data->important_candidates);
7178 for (i = 0; i < n_iv_uses (data); i++)
7180 struct iv_use *use = iv_use (data, i);
7181 struct iv_use *pre = use, *sub = use->next;
7183 while (sub)
7185 gcc_assert (sub->related_cands == NULL);
7186 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7188 pre = sub;
7189 sub = sub->next;
7190 free (pre);
7193 BITMAP_FREE (use->related_cands);
7194 for (j = 0; j < use->n_map_members; j++)
7195 if (use->cost_map[j].depends_on)
7196 BITMAP_FREE (use->cost_map[j].depends_on);
7197 free (use->cost_map);
7198 free (use);
7200 data->iv_uses.truncate (0);
7202 for (i = 0; i < n_iv_cands (data); i++)
7204 struct iv_cand *cand = iv_cand (data, i);
7206 if (cand->depends_on)
7207 BITMAP_FREE (cand->depends_on);
7208 free (cand);
7210 data->iv_candidates.truncate (0);
7212 if (data->version_info_size < num_ssa_names)
7214 data->version_info_size = 2 * num_ssa_names;
7215 free (data->version_info);
7216 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7219 data->max_inv_id = 0;
7221 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7222 SET_DECL_RTL (obj, NULL_RTX);
7224 decl_rtl_to_reset.truncate (0);
7226 data->inv_expr_tab->empty ();
7227 data->inv_expr_id = 0;
7230 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7231 loop tree. */
7233 static void
7234 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7236 free_loop_data (data);
7237 free (data->version_info);
7238 BITMAP_FREE (data->relevant);
7239 BITMAP_FREE (data->important_candidates);
7241 decl_rtl_to_reset.release ();
7242 data->iv_uses.release ();
7243 data->iv_candidates.release ();
7244 delete data->inv_expr_tab;
7245 data->inv_expr_tab = NULL;
7246 free_affine_expand_cache (&data->name_expansion_cache);
7247 obstack_free (&data->iv_obstack, NULL);
7250 /* Returns true if the loop body BODY includes any function calls. */
7252 static bool
7253 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7255 gimple_stmt_iterator gsi;
7256 unsigned i;
7258 for (i = 0; i < num_nodes; i++)
7259 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7261 gimple stmt = gsi_stmt (gsi);
7262 if (is_gimple_call (stmt)
7263 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7264 return true;
7266 return false;
7269 /* Optimizes the LOOP. Returns true if anything changed. */
7271 static bool
7272 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7274 bool changed = false;
7275 struct iv_ca *iv_ca;
7276 edge exit = single_dom_exit (loop);
7277 basic_block *body;
7279 gcc_assert (!data->niters);
7280 data->current_loop = loop;
7281 data->loop_loc = find_loop_location (loop);
7282 data->speed = optimize_loop_for_speed_p (loop);
7284 if (dump_file && (dump_flags & TDF_DETAILS))
7286 fprintf (dump_file, "Processing loop %d", loop->num);
7287 if (data->loop_loc != UNKNOWN_LOCATION)
7288 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7289 LOCATION_LINE (data->loop_loc));
7290 fprintf (dump_file, "\n");
7292 if (exit)
7294 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7295 exit->src->index, exit->dest->index);
7296 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7297 fprintf (dump_file, "\n");
7300 fprintf (dump_file, "\n");
7303 body = get_loop_body (loop);
7304 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7305 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7306 free (body);
7308 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7310 /* For each ssa name determines whether it behaves as an induction variable
7311 in some loop. */
7312 if (!find_induction_variables (data))
7313 goto finish;
7315 /* Finds interesting uses (item 1). */
7316 find_interesting_uses (data);
7317 group_address_uses (data);
7318 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7319 goto finish;
7321 /* Finds candidates for the induction variables (item 2). */
7322 find_iv_candidates (data);
7324 /* Calculates the costs (item 3, part 1). */
7325 determine_iv_costs (data);
7326 determine_use_iv_costs (data);
7327 determine_set_costs (data);
7329 /* Find the optimal set of induction variables (item 3, part 2). */
7330 iv_ca = find_optimal_iv_set (data);
7331 if (!iv_ca)
7332 goto finish;
7333 changed = true;
7335 /* Create the new induction variables (item 4, part 1). */
7336 create_new_ivs (data, iv_ca);
7337 iv_ca_free (&iv_ca);
7339 /* Rewrite the uses (item 4, part 2). */
7340 rewrite_uses (data);
7342 /* Remove the ivs that are unused after rewriting. */
7343 remove_unused_ivs (data);
7345 /* We have changed the structure of induction variables; it might happen
7346 that definitions in the scev database refer to some of them that were
7347 eliminated. */
7348 scev_reset ();
7350 finish:
7351 free_loop_data (data);
7353 return changed;
7356 /* Main entry point. Optimizes induction variables in loops. */
7358 void
7359 tree_ssa_iv_optimize (void)
7361 struct loop *loop;
7362 struct ivopts_data data;
7364 tree_ssa_iv_optimize_init (&data);
7366 /* Optimize the loops starting with the innermost ones. */
7367 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7369 if (dump_file && (dump_flags & TDF_DETAILS))
7370 flow_loop_dump (loop, dump_file, NULL, 1);
7372 tree_ssa_iv_optimize_loop (&data, loop);
7375 tree_ssa_iv_optimize_finalize (&data);