* include/bits/basic_string.h (getline): Qualify call to prevent ADL
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob98b60abdcb526c11ac8877b8c8e34340769257b6
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand *selected;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
217 enum iv_position
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
226 /* The induction variable candidate. */
227 struct iv_cand
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
252 tree expr;
253 int id;
254 hashval_t hash;
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use *iv_use_p;
261 typedef struct iv_cand *iv_cand_p;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
273 /* Hash function for loop invariant expressions. */
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
278 return expr->hash;
281 /* Hash table equality function for expressions. */
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
290 struct ivopts_data
292 /* The currently optimized loop. */
293 struct loop *current_loop;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
298 /* Number of registers used in it. */
299 unsigned regs_used;
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
311 /* Loop invariant expression id. */
312 int inv_expr_id;
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
326 /* The maximum invariant id. */
327 unsigned max_inv_id;
329 /* Whether to consider just related and important candidates when replacing a
330 use. */
331 bool consider_all_candidates;
333 /* Are we optimizing for speed? */
334 bool speed;
336 /* Whether the loop body includes any function calls. */
337 bool body_includes_call;
339 /* Whether the loop body can only be exited via single exit. */
340 bool loop_single_exit_p;
343 /* An assignment of iv candidates to uses. */
345 struct iv_ca
347 /* The number of uses covered by the assignment. */
348 unsigned upto;
350 /* Number of uses that cannot be expressed by the candidates in the set. */
351 unsigned bad_uses;
353 /* Candidate assigned to a use, together with the related costs. */
354 struct cost_pair **cand_for_use;
356 /* Number of times each candidate is used. */
357 unsigned *n_cand_uses;
359 /* The candidates used. */
360 bitmap cands;
362 /* The number of candidates in the set. */
363 unsigned n_cands;
365 /* Total number of registers needed. */
366 unsigned n_regs;
368 /* Total cost of expressing uses. */
369 comp_cost cand_use_cost;
371 /* Total cost of candidates. */
372 unsigned cand_cost;
374 /* Number of times each invariant is used. */
375 unsigned *n_invariant_uses;
377 /* The array holding the number of uses of each loop
378 invariant expressions created by ivopt. */
379 unsigned *used_inv_expr;
381 /* The number of created loop invariants. */
382 unsigned num_used_inv_expr;
384 /* Total cost of the assignment. */
385 comp_cost cost;
388 /* Difference of two iv candidate assignments. */
390 struct iv_ca_delta
392 /* Changed use. */
393 struct iv_use *use;
395 /* An old assignment (for rollback purposes). */
396 struct cost_pair *old_cp;
398 /* A new assignment. */
399 struct cost_pair *new_cp;
401 /* Next change in the list. */
402 struct iv_ca_delta *next_change;
405 /* Bound on number of candidates below that all candidates are considered. */
407 #define CONSIDER_ALL_CANDIDATES_BOUND \
408 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
411 optimizing such a loop would help, and it would take ages). */
413 #define MAX_CONSIDERED_USES \
414 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
416 /* If there are at most this number of ivs in the set, try removing unnecessary
417 ivs from the set always. */
419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
422 /* The list of trees for that the decl_rtl field must be reset is stored
423 here. */
425 static vec<tree> decl_rtl_to_reset;
427 static comp_cost force_expr_to_var_cost (tree, bool);
429 /* Number of uses recorded in DATA. */
431 static inline unsigned
432 n_iv_uses (struct ivopts_data *data)
434 return data->iv_uses.length ();
437 /* Ith use recorded in DATA. */
439 static inline struct iv_use *
440 iv_use (struct ivopts_data *data, unsigned i)
442 return data->iv_uses[i];
445 /* Number of candidates recorded in DATA. */
447 static inline unsigned
448 n_iv_cands (struct ivopts_data *data)
450 return data->iv_candidates.length ();
453 /* Ith candidate recorded in DATA. */
455 static inline struct iv_cand *
456 iv_cand (struct ivopts_data *data, unsigned i)
458 return data->iv_candidates[i];
461 /* The single loop exit if it dominates the latch, NULL otherwise. */
463 edge
464 single_dom_exit (struct loop *loop)
466 edge exit = single_exit (loop);
468 if (!exit)
469 return NULL;
471 if (!just_once_each_iteration_p (loop, exit->src))
472 return NULL;
474 return exit;
477 /* Dumps information about the induction variable IV to FILE. */
479 void
480 dump_iv (FILE *file, struct iv *iv)
482 if (iv->ssa_name)
484 fprintf (file, "ssa name ");
485 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
486 fprintf (file, "\n");
489 fprintf (file, " type ");
490 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
491 fprintf (file, "\n");
493 if (iv->step)
495 fprintf (file, " base ");
496 print_generic_expr (file, iv->base, TDF_SLIM);
497 fprintf (file, "\n");
499 fprintf (file, " step ");
500 print_generic_expr (file, iv->step, TDF_SLIM);
501 fprintf (file, "\n");
503 else
505 fprintf (file, " invariant ");
506 print_generic_expr (file, iv->base, TDF_SLIM);
507 fprintf (file, "\n");
510 if (iv->base_object)
512 fprintf (file, " base object ");
513 print_generic_expr (file, iv->base_object, TDF_SLIM);
514 fprintf (file, "\n");
517 if (iv->biv_p)
518 fprintf (file, " is a biv\n");
521 /* Dumps information about the USE to FILE. */
523 void
524 dump_use (FILE *file, struct iv_use *use)
526 fprintf (file, "use %d\n", use->id);
528 switch (use->type)
530 case USE_NONLINEAR_EXPR:
531 fprintf (file, " generic\n");
532 break;
534 case USE_ADDRESS:
535 fprintf (file, " address\n");
536 break;
538 case USE_COMPARE:
539 fprintf (file, " compare\n");
540 break;
542 default:
543 gcc_unreachable ();
546 fprintf (file, " in statement ");
547 print_gimple_stmt (file, use->stmt, 0, 0);
548 fprintf (file, "\n");
550 fprintf (file, " at position ");
551 if (use->op_p)
552 print_generic_expr (file, *use->op_p, TDF_SLIM);
553 fprintf (file, "\n");
555 dump_iv (file, use->iv);
557 if (use->related_cands)
559 fprintf (file, " related candidates ");
560 dump_bitmap (file, use->related_cands);
564 /* Dumps information about the uses to FILE. */
566 void
567 dump_uses (FILE *file, struct ivopts_data *data)
569 unsigned i;
570 struct iv_use *use;
572 for (i = 0; i < n_iv_uses (data); i++)
574 use = iv_use (data, i);
576 dump_use (file, use);
577 fprintf (file, "\n");
581 /* Dumps information about induction variable candidate CAND to FILE. */
583 void
584 dump_cand (FILE *file, struct iv_cand *cand)
586 struct iv *iv = cand->iv;
588 fprintf (file, "candidate %d%s\n",
589 cand->id, cand->important ? " (important)" : "");
591 if (cand->depends_on)
593 fprintf (file, " depends on ");
594 dump_bitmap (file, cand->depends_on);
597 if (!iv)
599 fprintf (file, " final value replacement\n");
600 return;
603 if (cand->var_before)
605 fprintf (file, " var_before ");
606 print_generic_expr (file, cand->var_before, TDF_SLIM);
607 fprintf (file, "\n");
609 if (cand->var_after)
611 fprintf (file, " var_after ");
612 print_generic_expr (file, cand->var_after, TDF_SLIM);
613 fprintf (file, "\n");
616 switch (cand->pos)
618 case IP_NORMAL:
619 fprintf (file, " incremented before exit test\n");
620 break;
622 case IP_BEFORE_USE:
623 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
624 break;
626 case IP_AFTER_USE:
627 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
628 break;
630 case IP_END:
631 fprintf (file, " incremented at end\n");
632 break;
634 case IP_ORIGINAL:
635 fprintf (file, " original biv\n");
636 break;
639 dump_iv (file, iv);
642 /* Returns the info for ssa version VER. */
644 static inline struct version_info *
645 ver_info (struct ivopts_data *data, unsigned ver)
647 return data->version_info + ver;
650 /* Returns the info for ssa name NAME. */
652 static inline struct version_info *
653 name_info (struct ivopts_data *data, tree name)
655 return ver_info (data, SSA_NAME_VERSION (name));
658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
659 emitted in LOOP. */
661 static bool
662 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
664 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
666 gcc_assert (bb);
668 if (sbb == loop->latch)
669 return true;
671 if (sbb != bb)
672 return false;
674 return stmt == last_stmt (bb);
677 /* Returns true if STMT if after the place where the original induction
678 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
679 if the positions are identical. */
681 static bool
682 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
684 basic_block cand_bb = gimple_bb (cand->incremented_at);
685 basic_block stmt_bb = gimple_bb (stmt);
687 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
688 return false;
690 if (stmt_bb != cand_bb)
691 return true;
693 if (true_if_equal
694 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
695 return true;
696 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
699 /* Returns true if STMT if after the place where the induction variable
700 CAND is incremented in LOOP. */
702 static bool
703 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
705 switch (cand->pos)
707 case IP_END:
708 return false;
710 case IP_NORMAL:
711 return stmt_after_ip_normal_pos (loop, stmt);
713 case IP_ORIGINAL:
714 case IP_AFTER_USE:
715 return stmt_after_inc_pos (cand, stmt, false);
717 case IP_BEFORE_USE:
718 return stmt_after_inc_pos (cand, stmt, true);
720 default:
721 gcc_unreachable ();
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
727 static bool
728 abnormal_ssa_name_p (tree exp)
730 if (!exp)
731 return false;
733 if (TREE_CODE (exp) != SSA_NAME)
734 return false;
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
742 static bool
743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
744 void *data ATTRIBUTE_UNUSED)
746 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
749 return false;
750 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
751 return false;
754 return !abnormal_ssa_name_p (*index);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
760 bool
761 contains_abnormal_ssa_name_p (tree expr)
763 enum tree_code code;
764 enum tree_code_class codeclass;
766 if (!expr)
767 return false;
769 code = TREE_CODE (expr);
770 codeclass = TREE_CODE_CLASS (code);
772 if (code == SSA_NAME)
773 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
775 if (code == INTEGER_CST
776 || is_gimple_min_invariant (expr))
777 return false;
779 if (code == ADDR_EXPR)
780 return !for_each_index (&TREE_OPERAND (expr, 0),
781 idx_contains_abnormal_ssa_name_p,
782 NULL);
784 if (code == COND_EXPR)
785 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
787 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
789 switch (codeclass)
791 case tcc_binary:
792 case tcc_comparison:
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
794 return true;
796 /* Fallthru. */
797 case tcc_unary:
798 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
799 return true;
801 break;
803 default:
804 gcc_unreachable ();
807 return false;
810 /* Returns the structure describing number of iterations determined from
811 EXIT of DATA->current_loop, or NULL if something goes wrong. */
813 static struct tree_niter_desc *
814 niter_for_exit (struct ivopts_data *data, edge exit)
816 struct tree_niter_desc *desc;
817 tree_niter_desc **slot;
819 if (!data->niters)
821 data->niters = new hash_map<edge, tree_niter_desc *>;
822 slot = NULL;
824 else
825 slot = data->niters->get (exit);
827 if (!slot)
829 /* Try to determine number of iterations. We cannot safely work with ssa
830 names that appear in phi nodes on abnormal edges, so that we do not
831 create overlapping life ranges for them (PR 27283). */
832 desc = XNEW (struct tree_niter_desc);
833 if (!number_of_iterations_exit (data->current_loop,
834 exit, desc, true)
835 || contains_abnormal_ssa_name_p (desc->niter))
837 XDELETE (desc);
838 desc = NULL;
840 data->niters->put (exit, desc);
842 else
843 desc = *slot;
845 return desc;
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
855 edge exit = single_dom_exit (data->current_loop);
857 if (!exit)
858 return NULL;
860 return niter_for_exit (data, exit);
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
885 static tree
886 determine_base_object (tree expr)
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
900 switch (code)
902 case INTEGER_CST:
903 return NULL_TREE;
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
909 if (!base)
910 return expr;
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
926 default:
927 return fold_convert (ptr_type_node, expr);
931 /* Return true if address expression with non-DECL_P operand appears
932 in EXPR. */
934 static bool
935 contain_complex_addr_expr (tree expr)
937 bool res = false;
939 STRIP_NOPS (expr);
940 switch (TREE_CODE (expr))
942 case POINTER_PLUS_EXPR:
943 case PLUS_EXPR:
944 case MINUS_EXPR:
945 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
946 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
947 break;
949 case ADDR_EXPR:
950 return (!DECL_P (TREE_OPERAND (expr, 0)));
952 default:
953 return false;
956 return res;
959 /* Allocates an induction variable with given initial value BASE and step STEP
960 for loop LOOP. */
962 static struct iv *
963 alloc_iv (tree base, tree step)
965 tree expr = base;
966 struct iv *iv = XCNEW (struct iv);
967 gcc_assert (step != NULL_TREE);
969 /* Lower address expression in base except ones with DECL_P as operand.
970 By doing this:
971 1) More accurate cost can be computed for address expressions;
972 2) Duplicate candidates won't be created for bases in different
973 forms, like &a[0] and &a. */
974 STRIP_NOPS (expr);
975 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
976 || contain_complex_addr_expr (expr))
978 aff_tree comb;
979 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
980 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
983 iv->base = base;
984 iv->base_object = determine_base_object (base);
985 iv->step = step;
986 iv->biv_p = false;
987 iv->have_use_for = false;
988 iv->use_id = 0;
989 iv->ssa_name = NULL_TREE;
991 return iv;
994 /* Sets STEP and BASE for induction variable IV. */
996 static void
997 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
999 struct version_info *info = name_info (data, iv);
1001 gcc_assert (!info->iv);
1003 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1004 info->iv = alloc_iv (base, step);
1005 info->iv->ssa_name = iv;
1008 /* Finds induction variable declaration for VAR. */
1010 static struct iv *
1011 get_iv (struct ivopts_data *data, tree var)
1013 basic_block bb;
1014 tree type = TREE_TYPE (var);
1016 if (!POINTER_TYPE_P (type)
1017 && !INTEGRAL_TYPE_P (type))
1018 return NULL;
1020 if (!name_info (data, var)->iv)
1022 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1024 if (!bb
1025 || !flow_bb_inside_loop_p (data->current_loop, bb))
1026 set_iv (data, var, var, build_int_cst (type, 0));
1029 return name_info (data, var)->iv;
1032 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1033 not define a simple affine biv with nonzero step. */
1035 static tree
1036 determine_biv_step (gimple phi)
1038 struct loop *loop = gimple_bb (phi)->loop_father;
1039 tree name = PHI_RESULT (phi);
1040 affine_iv iv;
1042 if (virtual_operand_p (name))
1043 return NULL_TREE;
1045 if (!simple_iv (loop, loop, name, &iv, true))
1046 return NULL_TREE;
1048 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1051 /* Finds basic ivs. */
1053 static bool
1054 find_bivs (struct ivopts_data *data)
1056 gimple phi;
1057 tree step, type, base;
1058 bool found = false;
1059 struct loop *loop = data->current_loop;
1060 gimple_stmt_iterator psi;
1062 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1064 phi = gsi_stmt (psi);
1066 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1067 continue;
1069 step = determine_biv_step (phi);
1070 if (!step)
1071 continue;
1073 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1074 base = expand_simple_operations (base);
1075 if (contains_abnormal_ssa_name_p (base)
1076 || contains_abnormal_ssa_name_p (step))
1077 continue;
1079 type = TREE_TYPE (PHI_RESULT (phi));
1080 base = fold_convert (type, base);
1081 if (step)
1083 if (POINTER_TYPE_P (type))
1084 step = convert_to_ptrofftype (step);
1085 else
1086 step = fold_convert (type, step);
1089 set_iv (data, PHI_RESULT (phi), base, step);
1090 found = true;
1093 return found;
1096 /* Marks basic ivs. */
1098 static void
1099 mark_bivs (struct ivopts_data *data)
1101 gimple phi, def;
1102 tree var;
1103 struct iv *iv, *incr_iv;
1104 struct loop *loop = data->current_loop;
1105 basic_block incr_bb;
1106 gimple_stmt_iterator psi;
1108 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1110 phi = gsi_stmt (psi);
1112 iv = get_iv (data, PHI_RESULT (phi));
1113 if (!iv)
1114 continue;
1116 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1117 def = SSA_NAME_DEF_STMT (var);
1118 /* Don't mark iv peeled from other one as biv. */
1119 if (def
1120 && gimple_code (def) == GIMPLE_PHI
1121 && gimple_bb (def) == loop->header)
1122 continue;
1124 incr_iv = get_iv (data, var);
1125 if (!incr_iv)
1126 continue;
1128 /* If the increment is in the subloop, ignore it. */
1129 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1130 if (incr_bb->loop_father != data->current_loop
1131 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1132 continue;
1134 iv->biv_p = true;
1135 incr_iv->biv_p = true;
1139 /* Checks whether STMT defines a linear induction variable and stores its
1140 parameters to IV. */
1142 static bool
1143 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1145 tree lhs;
1146 struct loop *loop = data->current_loop;
1148 iv->base = NULL_TREE;
1149 iv->step = NULL_TREE;
1151 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1152 return false;
1154 lhs = gimple_assign_lhs (stmt);
1155 if (TREE_CODE (lhs) != SSA_NAME)
1156 return false;
1158 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1159 return false;
1160 iv->base = expand_simple_operations (iv->base);
1162 if (contains_abnormal_ssa_name_p (iv->base)
1163 || contains_abnormal_ssa_name_p (iv->step))
1164 return false;
1166 /* If STMT could throw, then do not consider STMT as defining a GIV.
1167 While this will suppress optimizations, we can not safely delete this
1168 GIV and associated statements, even if it appears it is not used. */
1169 if (stmt_could_throw_p (stmt))
1170 return false;
1172 return true;
1175 /* Finds general ivs in statement STMT. */
1177 static void
1178 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1180 affine_iv iv;
1182 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1183 return;
1185 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1188 /* Finds general ivs in basic block BB. */
1190 static void
1191 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1193 gimple_stmt_iterator bsi;
1195 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1196 find_givs_in_stmt (data, gsi_stmt (bsi));
1199 /* Finds general ivs. */
1201 static void
1202 find_givs (struct ivopts_data *data)
1204 struct loop *loop = data->current_loop;
1205 basic_block *body = get_loop_body_in_dom_order (loop);
1206 unsigned i;
1208 for (i = 0; i < loop->num_nodes; i++)
1209 find_givs_in_bb (data, body[i]);
1210 free (body);
1213 /* For each ssa name defined in LOOP determines whether it is an induction
1214 variable and if so, its initial value and step. */
1216 static bool
1217 find_induction_variables (struct ivopts_data *data)
1219 unsigned i;
1220 bitmap_iterator bi;
1222 if (!find_bivs (data))
1223 return false;
1225 find_givs (data);
1226 mark_bivs (data);
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1230 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1232 if (niter)
1234 fprintf (dump_file, " number of iterations ");
1235 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1236 if (!integer_zerop (niter->may_be_zero))
1238 fprintf (dump_file, "; zero if ");
1239 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1241 fprintf (dump_file, "\n\n");
1244 fprintf (dump_file, "Induction variables:\n\n");
1246 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1248 if (ver_info (data, i)->iv)
1249 dump_iv (dump_file, ver_info (data, i)->iv);
1253 return true;
1256 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1258 static struct iv_use *
1259 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1260 gimple stmt, enum use_type use_type)
1262 struct iv_use *use = XCNEW (struct iv_use);
1264 use->id = n_iv_uses (data);
1265 use->type = use_type;
1266 use->iv = iv;
1267 use->stmt = stmt;
1268 use->op_p = use_p;
1269 use->related_cands = BITMAP_ALLOC (NULL);
1271 /* To avoid showing ssa name in the dumps, if it was not reset by the
1272 caller. */
1273 iv->ssa_name = NULL_TREE;
1275 if (dump_file && (dump_flags & TDF_DETAILS))
1276 dump_use (dump_file, use);
1278 data->iv_uses.safe_push (use);
1280 return use;
1283 /* Checks whether OP is a loop-level invariant and if so, records it.
1284 NONLINEAR_USE is true if the invariant is used in a way we do not
1285 handle specially. */
1287 static void
1288 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1290 basic_block bb;
1291 struct version_info *info;
1293 if (TREE_CODE (op) != SSA_NAME
1294 || virtual_operand_p (op))
1295 return;
1297 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1298 if (bb
1299 && flow_bb_inside_loop_p (data->current_loop, bb))
1300 return;
1302 info = name_info (data, op);
1303 info->name = op;
1304 info->has_nonlin_use |= nonlinear_use;
1305 if (!info->inv_id)
1306 info->inv_id = ++data->max_inv_id;
1307 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1310 /* Checks whether the use OP is interesting and if so, records it. */
1312 static struct iv_use *
1313 find_interesting_uses_op (struct ivopts_data *data, tree op)
1315 struct iv *iv;
1316 struct iv *civ;
1317 gimple stmt;
1318 struct iv_use *use;
1320 if (TREE_CODE (op) != SSA_NAME)
1321 return NULL;
1323 iv = get_iv (data, op);
1324 if (!iv)
1325 return NULL;
1327 if (iv->have_use_for)
1329 use = iv_use (data, iv->use_id);
1331 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1332 return use;
1335 if (integer_zerop (iv->step))
1337 record_invariant (data, op, true);
1338 return NULL;
1340 iv->have_use_for = true;
1342 civ = XNEW (struct iv);
1343 *civ = *iv;
1345 stmt = SSA_NAME_DEF_STMT (op);
1346 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1347 || is_gimple_assign (stmt));
1349 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1350 iv->use_id = use->id;
1352 return use;
1355 /* Given a condition in statement STMT, checks whether it is a compare
1356 of an induction variable and an invariant. If this is the case,
1357 CONTROL_VAR is set to location of the iv, BOUND to the location of
1358 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1359 induction variable descriptions, and true is returned. If this is not
1360 the case, CONTROL_VAR and BOUND are set to the arguments of the
1361 condition and false is returned. */
1363 static bool
1364 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1365 tree **control_var, tree **bound,
1366 struct iv **iv_var, struct iv **iv_bound)
1368 /* The objects returned when COND has constant operands. */
1369 static struct iv const_iv;
1370 static tree zero;
1371 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1372 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1373 bool ret = false;
1375 if (gimple_code (stmt) == GIMPLE_COND)
1377 op0 = gimple_cond_lhs_ptr (stmt);
1378 op1 = gimple_cond_rhs_ptr (stmt);
1380 else
1382 op0 = gimple_assign_rhs1_ptr (stmt);
1383 op1 = gimple_assign_rhs2_ptr (stmt);
1386 zero = integer_zero_node;
1387 const_iv.step = integer_zero_node;
1389 if (TREE_CODE (*op0) == SSA_NAME)
1390 iv0 = get_iv (data, *op0);
1391 if (TREE_CODE (*op1) == SSA_NAME)
1392 iv1 = get_iv (data, *op1);
1394 /* Exactly one of the compared values must be an iv, and the other one must
1395 be an invariant. */
1396 if (!iv0 || !iv1)
1397 goto end;
1399 if (integer_zerop (iv0->step))
1401 /* Control variable may be on the other side. */
1402 tmp_op = op0; op0 = op1; op1 = tmp_op;
1403 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1405 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1407 end:
1408 if (control_var)
1409 *control_var = op0;;
1410 if (iv_var)
1411 *iv_var = iv0;;
1412 if (bound)
1413 *bound = op1;
1414 if (iv_bound)
1415 *iv_bound = iv1;
1417 return ret;
1420 /* Checks whether the condition in STMT is interesting and if so,
1421 records it. */
1423 static void
1424 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1426 tree *var_p, *bound_p;
1427 struct iv *var_iv, *civ;
1429 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1431 find_interesting_uses_op (data, *var_p);
1432 find_interesting_uses_op (data, *bound_p);
1433 return;
1436 civ = XNEW (struct iv);
1437 *civ = *var_iv;
1438 record_use (data, NULL, civ, stmt, USE_COMPARE);
1441 /* Returns the outermost loop EXPR is obviously invariant in
1442 relative to the loop LOOP, i.e. if all its operands are defined
1443 outside of the returned loop. Returns NULL if EXPR is not
1444 even obviously invariant in LOOP. */
1446 struct loop *
1447 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1449 basic_block def_bb;
1450 unsigned i, len;
1452 if (is_gimple_min_invariant (expr))
1453 return current_loops->tree_root;
1455 if (TREE_CODE (expr) == SSA_NAME)
1457 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1458 if (def_bb)
1460 if (flow_bb_inside_loop_p (loop, def_bb))
1461 return NULL;
1462 return superloop_at_depth (loop,
1463 loop_depth (def_bb->loop_father) + 1);
1466 return current_loops->tree_root;
1469 if (!EXPR_P (expr))
1470 return NULL;
1472 unsigned maxdepth = 0;
1473 len = TREE_OPERAND_LENGTH (expr);
1474 for (i = 0; i < len; i++)
1476 struct loop *ivloop;
1477 if (!TREE_OPERAND (expr, i))
1478 continue;
1480 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1481 if (!ivloop)
1482 return NULL;
1483 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1486 return superloop_at_depth (loop, maxdepth);
1489 /* Returns true if expression EXPR is obviously invariant in LOOP,
1490 i.e. if all its operands are defined outside of the LOOP. LOOP
1491 should not be the function body. */
1493 bool
1494 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1496 basic_block def_bb;
1497 unsigned i, len;
1499 gcc_assert (loop_depth (loop) > 0);
1501 if (is_gimple_min_invariant (expr))
1502 return true;
1504 if (TREE_CODE (expr) == SSA_NAME)
1506 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1507 if (def_bb
1508 && flow_bb_inside_loop_p (loop, def_bb))
1509 return false;
1511 return true;
1514 if (!EXPR_P (expr))
1515 return false;
1517 len = TREE_OPERAND_LENGTH (expr);
1518 for (i = 0; i < len; i++)
1519 if (TREE_OPERAND (expr, i)
1520 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1521 return false;
1523 return true;
1526 /* Cumulates the steps of indices into DATA and replaces their values with the
1527 initial ones. Returns false when the value of the index cannot be determined.
1528 Callback for for_each_index. */
1530 struct ifs_ivopts_data
1532 struct ivopts_data *ivopts_data;
1533 gimple stmt;
1534 tree step;
1537 static bool
1538 idx_find_step (tree base, tree *idx, void *data)
1540 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1541 struct iv *iv;
1542 tree step, iv_base, iv_step, lbound, off;
1543 struct loop *loop = dta->ivopts_data->current_loop;
1545 /* If base is a component ref, require that the offset of the reference
1546 be invariant. */
1547 if (TREE_CODE (base) == COMPONENT_REF)
1549 off = component_ref_field_offset (base);
1550 return expr_invariant_in_loop_p (loop, off);
1553 /* If base is array, first check whether we will be able to move the
1554 reference out of the loop (in order to take its address in strength
1555 reduction). In order for this to work we need both lower bound
1556 and step to be loop invariants. */
1557 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1559 /* Moreover, for a range, the size needs to be invariant as well. */
1560 if (TREE_CODE (base) == ARRAY_RANGE_REF
1561 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1562 return false;
1564 step = array_ref_element_size (base);
1565 lbound = array_ref_low_bound (base);
1567 if (!expr_invariant_in_loop_p (loop, step)
1568 || !expr_invariant_in_loop_p (loop, lbound))
1569 return false;
1572 if (TREE_CODE (*idx) != SSA_NAME)
1573 return true;
1575 iv = get_iv (dta->ivopts_data, *idx);
1576 if (!iv)
1577 return false;
1579 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1580 *&x[0], which is not folded and does not trigger the
1581 ARRAY_REF path below. */
1582 *idx = iv->base;
1584 if (integer_zerop (iv->step))
1585 return true;
1587 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1589 step = array_ref_element_size (base);
1591 /* We only handle addresses whose step is an integer constant. */
1592 if (TREE_CODE (step) != INTEGER_CST)
1593 return false;
1595 else
1596 /* The step for pointer arithmetics already is 1 byte. */
1597 step = size_one_node;
1599 iv_base = iv->base;
1600 iv_step = iv->step;
1601 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1602 sizetype, &iv_base, &iv_step, dta->stmt,
1603 false))
1605 /* The index might wrap. */
1606 return false;
1609 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1610 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1612 return true;
1615 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1616 object is passed to it in DATA. */
1618 static bool
1619 idx_record_use (tree base, tree *idx,
1620 void *vdata)
1622 struct ivopts_data *data = (struct ivopts_data *) vdata;
1623 find_interesting_uses_op (data, *idx);
1624 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1626 find_interesting_uses_op (data, array_ref_element_size (base));
1627 find_interesting_uses_op (data, array_ref_low_bound (base));
1629 return true;
1632 /* If we can prove that TOP = cst * BOT for some constant cst,
1633 store cst to MUL and return true. Otherwise return false.
1634 The returned value is always sign-extended, regardless of the
1635 signedness of TOP and BOT. */
1637 static bool
1638 constant_multiple_of (tree top, tree bot, widest_int *mul)
1640 tree mby;
1641 enum tree_code code;
1642 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1643 widest_int res, p0, p1;
1645 STRIP_NOPS (top);
1646 STRIP_NOPS (bot);
1648 if (operand_equal_p (top, bot, 0))
1650 *mul = 1;
1651 return true;
1654 code = TREE_CODE (top);
1655 switch (code)
1657 case MULT_EXPR:
1658 mby = TREE_OPERAND (top, 1);
1659 if (TREE_CODE (mby) != INTEGER_CST)
1660 return false;
1662 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1663 return false;
1665 *mul = wi::sext (res * wi::to_widest (mby), precision);
1666 return true;
1668 case PLUS_EXPR:
1669 case MINUS_EXPR:
1670 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1671 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1672 return false;
1674 if (code == MINUS_EXPR)
1675 p1 = -p1;
1676 *mul = wi::sext (p0 + p1, precision);
1677 return true;
1679 case INTEGER_CST:
1680 if (TREE_CODE (bot) != INTEGER_CST)
1681 return false;
1683 p0 = widest_int::from (top, SIGNED);
1684 p1 = widest_int::from (bot, SIGNED);
1685 if (p1 == 0)
1686 return false;
1687 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1688 return res == 0;
1690 default:
1691 return false;
1695 /* Return true if memory reference REF with step STEP may be unaligned. */
1697 static bool
1698 may_be_unaligned_p (tree ref, tree step)
1700 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1701 thus they are not misaligned. */
1702 if (TREE_CODE (ref) == TARGET_MEM_REF)
1703 return false;
1705 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1706 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1707 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1709 unsigned HOST_WIDE_INT bitpos;
1710 unsigned int ref_align;
1711 get_object_alignment_1 (ref, &ref_align, &bitpos);
1712 if (ref_align < align
1713 || (bitpos % align) != 0
1714 || (bitpos % BITS_PER_UNIT) != 0)
1715 return true;
1717 unsigned int trailing_zeros = tree_ctz (step);
1718 if (trailing_zeros < HOST_BITS_PER_INT
1719 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1720 return true;
1722 return false;
1725 /* Return true if EXPR may be non-addressable. */
1727 bool
1728 may_be_nonaddressable_p (tree expr)
1730 switch (TREE_CODE (expr))
1732 case TARGET_MEM_REF:
1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1734 target, thus they are always addressable. */
1735 return false;
1737 case COMPONENT_REF:
1738 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1739 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1741 case VIEW_CONVERT_EXPR:
1742 /* This kind of view-conversions may wrap non-addressable objects
1743 and make them look addressable. After some processing the
1744 non-addressability may be uncovered again, causing ADDR_EXPRs
1745 of inappropriate objects to be built. */
1746 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1747 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1748 return true;
1750 /* ... fall through ... */
1752 case ARRAY_REF:
1753 case ARRAY_RANGE_REF:
1754 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1756 CASE_CONVERT:
1757 return true;
1759 default:
1760 break;
1763 return false;
1766 /* Finds addresses in *OP_P inside STMT. */
1768 static void
1769 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1771 tree base = *op_p, step = size_zero_node;
1772 struct iv *civ;
1773 struct ifs_ivopts_data ifs_ivopts_data;
1775 /* Do not play with volatile memory references. A bit too conservative,
1776 perhaps, but safe. */
1777 if (gimple_has_volatile_ops (stmt))
1778 goto fail;
1780 /* Ignore bitfields for now. Not really something terribly complicated
1781 to handle. TODO. */
1782 if (TREE_CODE (base) == BIT_FIELD_REF)
1783 goto fail;
1785 base = unshare_expr (base);
1787 if (TREE_CODE (base) == TARGET_MEM_REF)
1789 tree type = build_pointer_type (TREE_TYPE (base));
1790 tree astep;
1792 if (TMR_BASE (base)
1793 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1795 civ = get_iv (data, TMR_BASE (base));
1796 if (!civ)
1797 goto fail;
1799 TMR_BASE (base) = civ->base;
1800 step = civ->step;
1802 if (TMR_INDEX2 (base)
1803 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1805 civ = get_iv (data, TMR_INDEX2 (base));
1806 if (!civ)
1807 goto fail;
1809 TMR_INDEX2 (base) = civ->base;
1810 step = civ->step;
1812 if (TMR_INDEX (base)
1813 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1815 civ = get_iv (data, TMR_INDEX (base));
1816 if (!civ)
1817 goto fail;
1819 TMR_INDEX (base) = civ->base;
1820 astep = civ->step;
1822 if (astep)
1824 if (TMR_STEP (base))
1825 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1827 step = fold_build2 (PLUS_EXPR, type, step, astep);
1831 if (integer_zerop (step))
1832 goto fail;
1833 base = tree_mem_ref_addr (type, base);
1835 else
1837 ifs_ivopts_data.ivopts_data = data;
1838 ifs_ivopts_data.stmt = stmt;
1839 ifs_ivopts_data.step = size_zero_node;
1840 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1841 || integer_zerop (ifs_ivopts_data.step))
1842 goto fail;
1843 step = ifs_ivopts_data.step;
1845 /* Check that the base expression is addressable. This needs
1846 to be done after substituting bases of IVs into it. */
1847 if (may_be_nonaddressable_p (base))
1848 goto fail;
1850 /* Moreover, on strict alignment platforms, check that it is
1851 sufficiently aligned. */
1852 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1853 goto fail;
1855 base = build_fold_addr_expr (base);
1857 /* Substituting bases of IVs into the base expression might
1858 have caused folding opportunities. */
1859 if (TREE_CODE (base) == ADDR_EXPR)
1861 tree *ref = &TREE_OPERAND (base, 0);
1862 while (handled_component_p (*ref))
1863 ref = &TREE_OPERAND (*ref, 0);
1864 if (TREE_CODE (*ref) == MEM_REF)
1866 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1867 TREE_OPERAND (*ref, 0),
1868 TREE_OPERAND (*ref, 1));
1869 if (tem)
1870 *ref = tem;
1875 civ = alloc_iv (base, step);
1876 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1877 return;
1879 fail:
1880 for_each_index (op_p, idx_record_use, data);
1883 /* Finds and records invariants used in STMT. */
1885 static void
1886 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1888 ssa_op_iter iter;
1889 use_operand_p use_p;
1890 tree op;
1892 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1894 op = USE_FROM_PTR (use_p);
1895 record_invariant (data, op, false);
1899 /* Finds interesting uses of induction variables in the statement STMT. */
1901 static void
1902 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1904 struct iv *iv;
1905 tree op, *lhs, *rhs;
1906 ssa_op_iter iter;
1907 use_operand_p use_p;
1908 enum tree_code code;
1910 find_invariants_stmt (data, stmt);
1912 if (gimple_code (stmt) == GIMPLE_COND)
1914 find_interesting_uses_cond (data, stmt);
1915 return;
1918 if (is_gimple_assign (stmt))
1920 lhs = gimple_assign_lhs_ptr (stmt);
1921 rhs = gimple_assign_rhs1_ptr (stmt);
1923 if (TREE_CODE (*lhs) == SSA_NAME)
1925 /* If the statement defines an induction variable, the uses are not
1926 interesting by themselves. */
1928 iv = get_iv (data, *lhs);
1930 if (iv && !integer_zerop (iv->step))
1931 return;
1934 code = gimple_assign_rhs_code (stmt);
1935 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1936 && (REFERENCE_CLASS_P (*rhs)
1937 || is_gimple_val (*rhs)))
1939 if (REFERENCE_CLASS_P (*rhs))
1940 find_interesting_uses_address (data, stmt, rhs);
1941 else
1942 find_interesting_uses_op (data, *rhs);
1944 if (REFERENCE_CLASS_P (*lhs))
1945 find_interesting_uses_address (data, stmt, lhs);
1946 return;
1948 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1950 find_interesting_uses_cond (data, stmt);
1951 return;
1954 /* TODO -- we should also handle address uses of type
1956 memory = call (whatever);
1960 call (memory). */
1963 if (gimple_code (stmt) == GIMPLE_PHI
1964 && gimple_bb (stmt) == data->current_loop->header)
1966 iv = get_iv (data, PHI_RESULT (stmt));
1968 if (iv && !integer_zerop (iv->step))
1969 return;
1972 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1974 op = USE_FROM_PTR (use_p);
1976 if (TREE_CODE (op) != SSA_NAME)
1977 continue;
1979 iv = get_iv (data, op);
1980 if (!iv)
1981 continue;
1983 find_interesting_uses_op (data, op);
1987 /* Finds interesting uses of induction variables outside of loops
1988 on loop exit edge EXIT. */
1990 static void
1991 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1993 gimple phi;
1994 gimple_stmt_iterator psi;
1995 tree def;
1997 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1999 phi = gsi_stmt (psi);
2000 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2001 if (!virtual_operand_p (def))
2002 find_interesting_uses_op (data, def);
2006 /* Finds uses of the induction variables that are interesting. */
2008 static void
2009 find_interesting_uses (struct ivopts_data *data)
2011 basic_block bb;
2012 gimple_stmt_iterator bsi;
2013 basic_block *body = get_loop_body (data->current_loop);
2014 unsigned i;
2015 struct version_info *info;
2016 edge e;
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2019 fprintf (dump_file, "Uses:\n\n");
2021 for (i = 0; i < data->current_loop->num_nodes; i++)
2023 edge_iterator ei;
2024 bb = body[i];
2026 FOR_EACH_EDGE (e, ei, bb->succs)
2027 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2028 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2029 find_interesting_uses_outside (data, e);
2031 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2032 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2033 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2034 if (!is_gimple_debug (gsi_stmt (bsi)))
2035 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2040 bitmap_iterator bi;
2042 fprintf (dump_file, "\n");
2044 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2046 info = ver_info (data, i);
2047 if (info->inv_id)
2049 fprintf (dump_file, " ");
2050 print_generic_expr (dump_file, info->name, TDF_SLIM);
2051 fprintf (dump_file, " is invariant (%d)%s\n",
2052 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2056 fprintf (dump_file, "\n");
2059 free (body);
2062 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2063 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2064 we are at the top-level of the processed address. */
2066 static tree
2067 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2068 HOST_WIDE_INT *offset)
2070 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2071 enum tree_code code;
2072 tree type, orig_type = TREE_TYPE (expr);
2073 HOST_WIDE_INT off0, off1, st;
2074 tree orig_expr = expr;
2076 STRIP_NOPS (expr);
2078 type = TREE_TYPE (expr);
2079 code = TREE_CODE (expr);
2080 *offset = 0;
2082 switch (code)
2084 case INTEGER_CST:
2085 if (!cst_and_fits_in_hwi (expr)
2086 || integer_zerop (expr))
2087 return orig_expr;
2089 *offset = int_cst_value (expr);
2090 return build_int_cst (orig_type, 0);
2092 case POINTER_PLUS_EXPR:
2093 case PLUS_EXPR:
2094 case MINUS_EXPR:
2095 op0 = TREE_OPERAND (expr, 0);
2096 op1 = TREE_OPERAND (expr, 1);
2098 op0 = strip_offset_1 (op0, false, false, &off0);
2099 op1 = strip_offset_1 (op1, false, false, &off1);
2101 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2102 if (op0 == TREE_OPERAND (expr, 0)
2103 && op1 == TREE_OPERAND (expr, 1))
2104 return orig_expr;
2106 if (integer_zerop (op1))
2107 expr = op0;
2108 else if (integer_zerop (op0))
2110 if (code == MINUS_EXPR)
2111 expr = fold_build1 (NEGATE_EXPR, type, op1);
2112 else
2113 expr = op1;
2115 else
2116 expr = fold_build2 (code, type, op0, op1);
2118 return fold_convert (orig_type, expr);
2120 case MULT_EXPR:
2121 op1 = TREE_OPERAND (expr, 1);
2122 if (!cst_and_fits_in_hwi (op1))
2123 return orig_expr;
2125 op0 = TREE_OPERAND (expr, 0);
2126 op0 = strip_offset_1 (op0, false, false, &off0);
2127 if (op0 == TREE_OPERAND (expr, 0))
2128 return orig_expr;
2130 *offset = off0 * int_cst_value (op1);
2131 if (integer_zerop (op0))
2132 expr = op0;
2133 else
2134 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2136 return fold_convert (orig_type, expr);
2138 case ARRAY_REF:
2139 case ARRAY_RANGE_REF:
2140 if (!inside_addr)
2141 return orig_expr;
2143 step = array_ref_element_size (expr);
2144 if (!cst_and_fits_in_hwi (step))
2145 break;
2147 st = int_cst_value (step);
2148 op1 = TREE_OPERAND (expr, 1);
2149 op1 = strip_offset_1 (op1, false, false, &off1);
2150 *offset = off1 * st;
2152 if (top_compref
2153 && integer_zerop (op1))
2155 /* Strip the component reference completely. */
2156 op0 = TREE_OPERAND (expr, 0);
2157 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158 *offset += off0;
2159 return op0;
2161 break;
2163 case COMPONENT_REF:
2165 tree field;
2167 if (!inside_addr)
2168 return orig_expr;
2170 tmp = component_ref_field_offset (expr);
2171 field = TREE_OPERAND (expr, 1);
2172 if (top_compref
2173 && cst_and_fits_in_hwi (tmp)
2174 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2176 HOST_WIDE_INT boffset, abs_off;
2178 /* Strip the component reference completely. */
2179 op0 = TREE_OPERAND (expr, 0);
2180 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2181 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2182 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2183 if (boffset < 0)
2184 abs_off = -abs_off;
2186 *offset = off0 + int_cst_value (tmp) + abs_off;
2187 return op0;
2190 break;
2192 case ADDR_EXPR:
2193 op0 = TREE_OPERAND (expr, 0);
2194 op0 = strip_offset_1 (op0, true, true, &off0);
2195 *offset += off0;
2197 if (op0 == TREE_OPERAND (expr, 0))
2198 return orig_expr;
2200 expr = build_fold_addr_expr (op0);
2201 return fold_convert (orig_type, expr);
2203 case MEM_REF:
2204 /* ??? Offset operand? */
2205 inside_addr = false;
2206 break;
2208 default:
2209 return orig_expr;
2212 /* Default handling of expressions for that we want to recurse into
2213 the first operand. */
2214 op0 = TREE_OPERAND (expr, 0);
2215 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2216 *offset += off0;
2218 if (op0 == TREE_OPERAND (expr, 0)
2219 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2220 return orig_expr;
2222 expr = copy_node (expr);
2223 TREE_OPERAND (expr, 0) = op0;
2224 if (op1)
2225 TREE_OPERAND (expr, 1) = op1;
2227 /* Inside address, we might strip the top level component references,
2228 thus changing type of the expression. Handling of ADDR_EXPR
2229 will fix that. */
2230 expr = fold_convert (orig_type, expr);
2232 return expr;
2235 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2237 static tree
2238 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2240 HOST_WIDE_INT off;
2241 tree core = strip_offset_1 (expr, false, false, &off);
2242 *offset = off;
2243 return core;
2246 /* Returns variant of TYPE that can be used as base for different uses.
2247 We return unsigned type with the same precision, which avoids problems
2248 with overflows. */
2250 static tree
2251 generic_type_for (tree type)
2253 if (POINTER_TYPE_P (type))
2254 return unsigned_type_for (type);
2256 if (TYPE_UNSIGNED (type))
2257 return type;
2259 return unsigned_type_for (type);
2262 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2263 the bitmap to that we should store it. */
2265 static struct ivopts_data *fd_ivopts_data;
2266 static tree
2267 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2269 bitmap *depends_on = (bitmap *) data;
2270 struct version_info *info;
2272 if (TREE_CODE (*expr_p) != SSA_NAME)
2273 return NULL_TREE;
2274 info = name_info (fd_ivopts_data, *expr_p);
2276 if (!info->inv_id || info->has_nonlin_use)
2277 return NULL_TREE;
2279 if (!*depends_on)
2280 *depends_on = BITMAP_ALLOC (NULL);
2281 bitmap_set_bit (*depends_on, info->inv_id);
2283 return NULL_TREE;
2286 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2287 position to POS. If USE is not NULL, the candidate is set as related to
2288 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2289 replacement of the final value of the iv by a direct computation. */
2291 static struct iv_cand *
2292 add_candidate_1 (struct ivopts_data *data,
2293 tree base, tree step, bool important, enum iv_position pos,
2294 struct iv_use *use, gimple incremented_at)
2296 unsigned i;
2297 struct iv_cand *cand = NULL;
2298 tree type, orig_type;
2300 /* For non-original variables, make sure their values are computed in a type
2301 that does not invoke undefined behavior on overflows (since in general,
2302 we cannot prove that these induction variables are non-wrapping). */
2303 if (pos != IP_ORIGINAL)
2305 orig_type = TREE_TYPE (base);
2306 type = generic_type_for (orig_type);
2307 if (type != orig_type)
2309 base = fold_convert (type, base);
2310 step = fold_convert (type, step);
2314 for (i = 0; i < n_iv_cands (data); i++)
2316 cand = iv_cand (data, i);
2318 if (cand->pos != pos)
2319 continue;
2321 if (cand->incremented_at != incremented_at
2322 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2323 && cand->ainc_use != use))
2324 continue;
2326 if (!cand->iv)
2328 if (!base && !step)
2329 break;
2331 continue;
2334 if (!base && !step)
2335 continue;
2337 if (operand_equal_p (base, cand->iv->base, 0)
2338 && operand_equal_p (step, cand->iv->step, 0)
2339 && (TYPE_PRECISION (TREE_TYPE (base))
2340 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2341 break;
2344 if (i == n_iv_cands (data))
2346 cand = XCNEW (struct iv_cand);
2347 cand->id = i;
2349 if (!base && !step)
2350 cand->iv = NULL;
2351 else
2352 cand->iv = alloc_iv (base, step);
2354 cand->pos = pos;
2355 if (pos != IP_ORIGINAL && cand->iv)
2357 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2358 cand->var_after = cand->var_before;
2360 cand->important = important;
2361 cand->incremented_at = incremented_at;
2362 data->iv_candidates.safe_push (cand);
2364 if (step
2365 && TREE_CODE (step) != INTEGER_CST)
2367 fd_ivopts_data = data;
2368 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2371 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2372 cand->ainc_use = use;
2373 else
2374 cand->ainc_use = NULL;
2376 if (dump_file && (dump_flags & TDF_DETAILS))
2377 dump_cand (dump_file, cand);
2380 if (important && !cand->important)
2382 cand->important = true;
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2387 if (use)
2389 bitmap_set_bit (use->related_cands, i);
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 fprintf (dump_file, "Candidate %d is related to use %d\n",
2392 cand->id, use->id);
2395 return cand;
2398 /* Returns true if incrementing the induction variable at the end of the LOOP
2399 is allowed.
2401 The purpose is to avoid splitting latch edge with a biv increment, thus
2402 creating a jump, possibly confusing other optimization passes and leaving
2403 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2404 is not available (so we do not have a better alternative), or if the latch
2405 edge is already nonempty. */
2407 static bool
2408 allow_ip_end_pos_p (struct loop *loop)
2410 if (!ip_normal_pos (loop))
2411 return true;
2413 if (!empty_block_p (ip_end_pos (loop)))
2414 return true;
2416 return false;
2419 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2420 Important field is set to IMPORTANT. */
2422 static void
2423 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2424 bool important, struct iv_use *use)
2426 basic_block use_bb = gimple_bb (use->stmt);
2427 enum machine_mode mem_mode;
2428 unsigned HOST_WIDE_INT cstepi;
2430 /* If we insert the increment in any position other than the standard
2431 ones, we must ensure that it is incremented once per iteration.
2432 It must not be in an inner nested loop, or one side of an if
2433 statement. */
2434 if (use_bb->loop_father != data->current_loop
2435 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2436 || stmt_could_throw_p (use->stmt)
2437 || !cst_and_fits_in_hwi (step))
2438 return;
2440 cstepi = int_cst_value (step);
2442 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2443 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2444 || USE_STORE_PRE_INCREMENT (mem_mode))
2445 && GET_MODE_SIZE (mem_mode) == cstepi)
2446 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2447 || USE_STORE_PRE_DECREMENT (mem_mode))
2448 && GET_MODE_SIZE (mem_mode) == -cstepi))
2450 enum tree_code code = MINUS_EXPR;
2451 tree new_base;
2452 tree new_step = step;
2454 if (POINTER_TYPE_P (TREE_TYPE (base)))
2456 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2457 code = POINTER_PLUS_EXPR;
2459 else
2460 new_step = fold_convert (TREE_TYPE (base), new_step);
2461 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2462 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2463 use->stmt);
2465 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2466 || USE_STORE_POST_INCREMENT (mem_mode))
2467 && GET_MODE_SIZE (mem_mode) == cstepi)
2468 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2469 || USE_STORE_POST_DECREMENT (mem_mode))
2470 && GET_MODE_SIZE (mem_mode) == -cstepi))
2472 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2473 use->stmt);
2477 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2478 position to POS. If USE is not NULL, the candidate is set as related to
2479 it. The candidate computation is scheduled on all available positions. */
2481 static void
2482 add_candidate (struct ivopts_data *data,
2483 tree base, tree step, bool important, struct iv_use *use)
2485 if (ip_normal_pos (data->current_loop))
2486 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2487 if (ip_end_pos (data->current_loop)
2488 && allow_ip_end_pos_p (data->current_loop))
2489 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2491 if (use != NULL && use->type == USE_ADDRESS)
2492 add_autoinc_candidates (data, base, step, important, use);
2495 /* Adds standard iv candidates. */
2497 static void
2498 add_standard_iv_candidates (struct ivopts_data *data)
2500 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2502 /* The same for a double-integer type if it is still fast enough. */
2503 if (TYPE_PRECISION
2504 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2505 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2506 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2507 build_int_cst (long_integer_type_node, 1), true, NULL);
2509 /* The same for a double-integer type if it is still fast enough. */
2510 if (TYPE_PRECISION
2511 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2512 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2513 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2514 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2518 /* Adds candidates bases on the old induction variable IV. */
2520 static void
2521 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2523 gimple phi;
2524 tree def;
2525 struct iv_cand *cand;
2527 add_candidate (data, iv->base, iv->step, true, NULL);
2529 /* The same, but with initial value zero. */
2530 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2531 add_candidate (data, size_int (0), iv->step, true, NULL);
2532 else
2533 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2534 iv->step, true, NULL);
2536 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2537 if (gimple_code (phi) == GIMPLE_PHI)
2539 /* Additionally record the possibility of leaving the original iv
2540 untouched. */
2541 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2542 /* Don't add candidate if it's from another PHI node because
2543 it's an affine iv appearing in the form of PEELED_CHREC. */
2544 phi = SSA_NAME_DEF_STMT (def);
2545 if (gimple_code (phi) != GIMPLE_PHI)
2547 cand = add_candidate_1 (data,
2548 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2549 SSA_NAME_DEF_STMT (def));
2550 cand->var_before = iv->ssa_name;
2551 cand->var_after = def;
2553 else
2554 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2558 /* Adds candidates based on the old induction variables. */
2560 static void
2561 add_old_ivs_candidates (struct ivopts_data *data)
2563 unsigned i;
2564 struct iv *iv;
2565 bitmap_iterator bi;
2567 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2569 iv = ver_info (data, i)->iv;
2570 if (iv && iv->biv_p && !integer_zerop (iv->step))
2571 add_old_iv_candidates (data, iv);
2575 /* Adds candidates based on the value of the induction variable IV and USE. */
2577 static void
2578 add_iv_value_candidates (struct ivopts_data *data,
2579 struct iv *iv, struct iv_use *use)
2581 unsigned HOST_WIDE_INT offset;
2582 tree base;
2583 tree basetype;
2585 add_candidate (data, iv->base, iv->step, false, use);
2587 /* The same, but with initial value zero. Make such variable important,
2588 since it is generic enough so that possibly many uses may be based
2589 on it. */
2590 basetype = TREE_TYPE (iv->base);
2591 if (POINTER_TYPE_P (basetype))
2592 basetype = sizetype;
2593 add_candidate (data, build_int_cst (basetype, 0),
2594 iv->step, true, use);
2596 /* Third, try removing the constant offset. Make sure to even
2597 add a candidate for &a[0] vs. (T *)&a. */
2598 base = strip_offset (iv->base, &offset);
2599 if (offset
2600 || base != iv->base)
2601 add_candidate (data, base, iv->step, false, use);
2604 /* Adds candidates based on the uses. */
2606 static void
2607 add_derived_ivs_candidates (struct ivopts_data *data)
2609 unsigned i;
2611 for (i = 0; i < n_iv_uses (data); i++)
2613 struct iv_use *use = iv_use (data, i);
2615 if (!use)
2616 continue;
2618 switch (use->type)
2620 case USE_NONLINEAR_EXPR:
2621 case USE_COMPARE:
2622 case USE_ADDRESS:
2623 /* Just add the ivs based on the value of the iv used here. */
2624 add_iv_value_candidates (data, use->iv, use);
2625 break;
2627 default:
2628 gcc_unreachable ();
2633 /* Record important candidates and add them to related_cands bitmaps
2634 if needed. */
2636 static void
2637 record_important_candidates (struct ivopts_data *data)
2639 unsigned i;
2640 struct iv_use *use;
2642 for (i = 0; i < n_iv_cands (data); i++)
2644 struct iv_cand *cand = iv_cand (data, i);
2646 if (cand->important)
2647 bitmap_set_bit (data->important_candidates, i);
2650 data->consider_all_candidates = (n_iv_cands (data)
2651 <= CONSIDER_ALL_CANDIDATES_BOUND);
2653 if (data->consider_all_candidates)
2655 /* We will not need "related_cands" bitmaps in this case,
2656 so release them to decrease peak memory consumption. */
2657 for (i = 0; i < n_iv_uses (data); i++)
2659 use = iv_use (data, i);
2660 BITMAP_FREE (use->related_cands);
2663 else
2665 /* Add important candidates to the related_cands bitmaps. */
2666 for (i = 0; i < n_iv_uses (data); i++)
2667 bitmap_ior_into (iv_use (data, i)->related_cands,
2668 data->important_candidates);
2672 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2673 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2674 we allocate a simple list to every use. */
2676 static void
2677 alloc_use_cost_map (struct ivopts_data *data)
2679 unsigned i, size, s;
2681 for (i = 0; i < n_iv_uses (data); i++)
2683 struct iv_use *use = iv_use (data, i);
2685 if (data->consider_all_candidates)
2686 size = n_iv_cands (data);
2687 else
2689 s = bitmap_count_bits (use->related_cands);
2691 /* Round up to the power of two, so that moduling by it is fast. */
2692 size = s ? (1 << ceil_log2 (s)) : 1;
2695 use->n_map_members = size;
2696 use->cost_map = XCNEWVEC (struct cost_pair, size);
2700 /* Returns description of computation cost of expression whose runtime
2701 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2703 static comp_cost
2704 new_cost (unsigned runtime, unsigned complexity)
2706 comp_cost cost;
2708 cost.cost = runtime;
2709 cost.complexity = complexity;
2711 return cost;
2714 /* Adds costs COST1 and COST2. */
2716 static comp_cost
2717 add_costs (comp_cost cost1, comp_cost cost2)
2719 cost1.cost += cost2.cost;
2720 cost1.complexity += cost2.complexity;
2722 return cost1;
2724 /* Subtracts costs COST1 and COST2. */
2726 static comp_cost
2727 sub_costs (comp_cost cost1, comp_cost cost2)
2729 cost1.cost -= cost2.cost;
2730 cost1.complexity -= cost2.complexity;
2732 return cost1;
2735 /* Returns a negative number if COST1 < COST2, a positive number if
2736 COST1 > COST2, and 0 if COST1 = COST2. */
2738 static int
2739 compare_costs (comp_cost cost1, comp_cost cost2)
2741 if (cost1.cost == cost2.cost)
2742 return cost1.complexity - cost2.complexity;
2744 return cost1.cost - cost2.cost;
2747 /* Returns true if COST is infinite. */
2749 static bool
2750 infinite_cost_p (comp_cost cost)
2752 return cost.cost == INFTY;
2755 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2756 on invariants DEPENDS_ON and that the value used in expressing it
2757 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2759 static void
2760 set_use_iv_cost (struct ivopts_data *data,
2761 struct iv_use *use, struct iv_cand *cand,
2762 comp_cost cost, bitmap depends_on, tree value,
2763 enum tree_code comp, int inv_expr_id)
2765 unsigned i, s;
2767 if (infinite_cost_p (cost))
2769 BITMAP_FREE (depends_on);
2770 return;
2773 if (data->consider_all_candidates)
2775 use->cost_map[cand->id].cand = cand;
2776 use->cost_map[cand->id].cost = cost;
2777 use->cost_map[cand->id].depends_on = depends_on;
2778 use->cost_map[cand->id].value = value;
2779 use->cost_map[cand->id].comp = comp;
2780 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2781 return;
2784 /* n_map_members is a power of two, so this computes modulo. */
2785 s = cand->id & (use->n_map_members - 1);
2786 for (i = s; i < use->n_map_members; i++)
2787 if (!use->cost_map[i].cand)
2788 goto found;
2789 for (i = 0; i < s; i++)
2790 if (!use->cost_map[i].cand)
2791 goto found;
2793 gcc_unreachable ();
2795 found:
2796 use->cost_map[i].cand = cand;
2797 use->cost_map[i].cost = cost;
2798 use->cost_map[i].depends_on = depends_on;
2799 use->cost_map[i].value = value;
2800 use->cost_map[i].comp = comp;
2801 use->cost_map[i].inv_expr_id = inv_expr_id;
2804 /* Gets cost of (USE, CANDIDATE) pair. */
2806 static struct cost_pair *
2807 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2808 struct iv_cand *cand)
2810 unsigned i, s;
2811 struct cost_pair *ret;
2813 if (!cand)
2814 return NULL;
2816 if (data->consider_all_candidates)
2818 ret = use->cost_map + cand->id;
2819 if (!ret->cand)
2820 return NULL;
2822 return ret;
2825 /* n_map_members is a power of two, so this computes modulo. */
2826 s = cand->id & (use->n_map_members - 1);
2827 for (i = s; i < use->n_map_members; i++)
2828 if (use->cost_map[i].cand == cand)
2829 return use->cost_map + i;
2830 else if (use->cost_map[i].cand == NULL)
2831 return NULL;
2832 for (i = 0; i < s; i++)
2833 if (use->cost_map[i].cand == cand)
2834 return use->cost_map + i;
2835 else if (use->cost_map[i].cand == NULL)
2836 return NULL;
2838 return NULL;
2841 /* Returns estimate on cost of computing SEQ. */
2843 static unsigned
2844 seq_cost (rtx seq, bool speed)
2846 unsigned cost = 0;
2847 rtx set;
2849 for (; seq; seq = NEXT_INSN (seq))
2851 set = single_set (seq);
2852 if (set)
2853 cost += set_src_cost (SET_SRC (set), speed);
2854 else
2855 cost++;
2858 return cost;
2861 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2862 static rtx
2863 produce_memory_decl_rtl (tree obj, int *regno)
2865 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2866 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2867 rtx x;
2869 gcc_assert (obj);
2870 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2872 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2873 x = gen_rtx_SYMBOL_REF (address_mode, name);
2874 SET_SYMBOL_REF_DECL (x, obj);
2875 x = gen_rtx_MEM (DECL_MODE (obj), x);
2876 set_mem_addr_space (x, as);
2877 targetm.encode_section_info (obj, x, true);
2879 else
2881 x = gen_raw_REG (address_mode, (*regno)++);
2882 x = gen_rtx_MEM (DECL_MODE (obj), x);
2883 set_mem_addr_space (x, as);
2886 return x;
2889 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2890 walk_tree. DATA contains the actual fake register number. */
2892 static tree
2893 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2895 tree obj = NULL_TREE;
2896 rtx x = NULL_RTX;
2897 int *regno = (int *) data;
2899 switch (TREE_CODE (*expr_p))
2901 case ADDR_EXPR:
2902 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2903 handled_component_p (*expr_p);
2904 expr_p = &TREE_OPERAND (*expr_p, 0))
2905 continue;
2906 obj = *expr_p;
2907 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2908 x = produce_memory_decl_rtl (obj, regno);
2909 break;
2911 case SSA_NAME:
2912 *ws = 0;
2913 obj = SSA_NAME_VAR (*expr_p);
2914 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2915 if (!obj)
2916 return NULL_TREE;
2917 if (!DECL_RTL_SET_P (obj))
2918 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2919 break;
2921 case VAR_DECL:
2922 case PARM_DECL:
2923 case RESULT_DECL:
2924 *ws = 0;
2925 obj = *expr_p;
2927 if (DECL_RTL_SET_P (obj))
2928 break;
2930 if (DECL_MODE (obj) == BLKmode)
2931 x = produce_memory_decl_rtl (obj, regno);
2932 else
2933 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2935 break;
2937 default:
2938 break;
2941 if (x)
2943 decl_rtl_to_reset.safe_push (obj);
2944 SET_DECL_RTL (obj, x);
2947 return NULL_TREE;
2950 /* Determines cost of the computation of EXPR. */
2952 static unsigned
2953 computation_cost (tree expr, bool speed)
2955 rtx seq, rslt;
2956 tree type = TREE_TYPE (expr);
2957 unsigned cost;
2958 /* Avoid using hard regs in ways which may be unsupported. */
2959 int regno = LAST_VIRTUAL_REGISTER + 1;
2960 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2961 enum node_frequency real_frequency = node->frequency;
2963 node->frequency = NODE_FREQUENCY_NORMAL;
2964 crtl->maybe_hot_insn_p = speed;
2965 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2966 start_sequence ();
2967 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2968 seq = get_insns ();
2969 end_sequence ();
2970 default_rtl_profile ();
2971 node->frequency = real_frequency;
2973 cost = seq_cost (seq, speed);
2974 if (MEM_P (rslt))
2975 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2976 TYPE_ADDR_SPACE (type), speed);
2977 else if (!REG_P (rslt))
2978 cost += set_src_cost (rslt, speed);
2980 return cost;
2983 /* Returns variable containing the value of candidate CAND at statement AT. */
2985 static tree
2986 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2988 if (stmt_after_increment (loop, cand, stmt))
2989 return cand->var_after;
2990 else
2991 return cand->var_before;
2994 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2995 same precision that is at least as wide as the precision of TYPE, stores
2996 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2997 type of A and B. */
2999 static tree
3000 determine_common_wider_type (tree *a, tree *b)
3002 tree wider_type = NULL;
3003 tree suba, subb;
3004 tree atype = TREE_TYPE (*a);
3006 if (CONVERT_EXPR_P (*a))
3008 suba = TREE_OPERAND (*a, 0);
3009 wider_type = TREE_TYPE (suba);
3010 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3011 return atype;
3013 else
3014 return atype;
3016 if (CONVERT_EXPR_P (*b))
3018 subb = TREE_OPERAND (*b, 0);
3019 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3020 return atype;
3022 else
3023 return atype;
3025 *a = suba;
3026 *b = subb;
3027 return wider_type;
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The expression is stored in a decomposed
3032 form into AFF. Returns false if USE cannot be expressed using CAND. */
3034 static bool
3035 get_computation_aff (struct loop *loop,
3036 struct iv_use *use, struct iv_cand *cand, gimple at,
3037 struct aff_tree *aff)
3039 tree ubase = use->iv->base;
3040 tree ustep = use->iv->step;
3041 tree cbase = cand->iv->base;
3042 tree cstep = cand->iv->step, cstep_common;
3043 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3044 tree common_type, var;
3045 tree uutype;
3046 aff_tree cbase_aff, var_aff;
3047 widest_int rat;
3049 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051 /* We do not have a precision to express the values of use. */
3052 return false;
3055 var = var_at_stmt (loop, cand, at);
3056 uutype = unsigned_type_for (utype);
3058 /* If the conversion is not noop, perform it. */
3059 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3061 cstep = fold_convert (uutype, cstep);
3062 cbase = fold_convert (uutype, cbase);
3063 var = fold_convert (uutype, var);
3066 if (!constant_multiple_of (ustep, cstep, &rat))
3067 return false;
3069 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3070 type, we achieve better folding by computing their difference in this
3071 wider type, and cast the result to UUTYPE. We do not need to worry about
3072 overflows, as all the arithmetics will in the end be performed in UUTYPE
3073 anyway. */
3074 common_type = determine_common_wider_type (&ubase, &cbase);
3076 /* use = ubase - ratio * cbase + ratio * var. */
3077 tree_to_aff_combination (ubase, common_type, aff);
3078 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3079 tree_to_aff_combination (var, uutype, &var_aff);
3081 /* We need to shift the value if we are after the increment. */
3082 if (stmt_after_increment (loop, cand, at))
3084 aff_tree cstep_aff;
3086 if (common_type != uutype)
3087 cstep_common = fold_convert (common_type, cstep);
3088 else
3089 cstep_common = cstep;
3091 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3092 aff_combination_add (&cbase_aff, &cstep_aff);
3095 aff_combination_scale (&cbase_aff, -rat);
3096 aff_combination_add (aff, &cbase_aff);
3097 if (common_type != uutype)
3098 aff_combination_convert (aff, uutype);
3100 aff_combination_scale (&var_aff, rat);
3101 aff_combination_add (aff, &var_aff);
3103 return true;
3106 /* Return the type of USE. */
3108 static tree
3109 get_use_type (struct iv_use *use)
3111 tree base_type = TREE_TYPE (use->iv->base);
3112 tree type;
3114 if (use->type == USE_ADDRESS)
3116 /* The base_type may be a void pointer. Create a pointer type based on
3117 the mem_ref instead. */
3118 type = build_pointer_type (TREE_TYPE (*use->op_p));
3119 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3120 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3122 else
3123 type = base_type;
3125 return type;
3128 /* Determines the expression by that USE is expressed from induction variable
3129 CAND at statement AT in LOOP. The computation is unshared. */
3131 static tree
3132 get_computation_at (struct loop *loop,
3133 struct iv_use *use, struct iv_cand *cand, gimple at)
3135 aff_tree aff;
3136 tree type = get_use_type (use);
3138 if (!get_computation_aff (loop, use, cand, at, &aff))
3139 return NULL_TREE;
3140 unshare_aff_combination (&aff);
3141 return fold_convert (type, aff_combination_to_tree (&aff));
3144 /* Determines the expression by that USE is expressed from induction variable
3145 CAND in LOOP. The computation is unshared. */
3147 static tree
3148 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3150 return get_computation_at (loop, use, cand, use->stmt);
3153 /* Adjust the cost COST for being in loop setup rather than loop body.
3154 If we're optimizing for space, the loop setup overhead is constant;
3155 if we're optimizing for speed, amortize it over the per-iteration cost. */
3156 static unsigned
3157 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3159 if (cost == INFTY)
3160 return cost;
3161 else if (optimize_loop_for_speed_p (data->current_loop))
3162 return cost / avg_loop_niter (data->current_loop);
3163 else
3164 return cost;
3167 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3168 validity for a memory reference accessing memory of mode MODE in
3169 address space AS. */
3172 bool
3173 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3174 addr_space_t as)
3176 #define MAX_RATIO 128
3177 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3178 static vec<sbitmap> valid_mult_list;
3179 sbitmap valid_mult;
3181 if (data_index >= valid_mult_list.length ())
3182 valid_mult_list.safe_grow_cleared (data_index + 1);
3184 valid_mult = valid_mult_list[data_index];
3185 if (!valid_mult)
3187 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3188 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3189 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3190 rtx addr, scaled;
3191 HOST_WIDE_INT i;
3193 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3194 bitmap_clear (valid_mult);
3195 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3196 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3197 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3199 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3200 if (memory_address_addr_space_p (mode, addr, as)
3201 || memory_address_addr_space_p (mode, scaled, as))
3202 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3207 fprintf (dump_file, " allowed multipliers:");
3208 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3209 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3210 fprintf (dump_file, " %d", (int) i);
3211 fprintf (dump_file, "\n");
3212 fprintf (dump_file, "\n");
3215 valid_mult_list[data_index] = valid_mult;
3218 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3219 return false;
3221 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. Compute the cost for a memory reference that accesses
3227 a memory location of mode MEM_MODE in address space AS.
3229 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3230 size of MEM_MODE / RATIO) is available. To make this determination, we
3231 look at the size of the increment to be made, which is given in CSTEP.
3232 CSTEP may be zero if the step is unknown.
3233 STMT_AFTER_INC is true iff the statement we're looking at is after the
3234 increment of the original biv.
3236 TODO -- there must be some better way. This all is quite crude. */
3238 enum ainc_type
3240 AINC_PRE_INC, /* Pre increment. */
3241 AINC_PRE_DEC, /* Pre decrement. */
3242 AINC_POST_INC, /* Post increment. */
3243 AINC_POST_DEC, /* Post decrement. */
3244 AINC_NONE /* Also the number of auto increment types. */
3247 typedef struct address_cost_data_s
3249 HOST_WIDE_INT min_offset, max_offset;
3250 unsigned costs[2][2][2][2];
3251 unsigned ainc_costs[AINC_NONE];
3252 } *address_cost_data;
3255 static comp_cost
3256 get_address_cost (bool symbol_present, bool var_present,
3257 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3258 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3259 addr_space_t as, bool speed,
3260 bool stmt_after_inc, bool *may_autoinc)
3262 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3263 static vec<address_cost_data> address_cost_data_list;
3264 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3265 address_cost_data data;
3266 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3267 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3268 unsigned cost, acost, complexity;
3269 enum ainc_type autoinc_type;
3270 bool offset_p, ratio_p, autoinc;
3271 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3272 unsigned HOST_WIDE_INT mask;
3273 unsigned bits;
3275 if (data_index >= address_cost_data_list.length ())
3276 address_cost_data_list.safe_grow_cleared (data_index + 1);
3278 data = address_cost_data_list[data_index];
3279 if (!data)
3281 HOST_WIDE_INT i;
3282 HOST_WIDE_INT rat, off = 0;
3283 int old_cse_not_expected, width;
3284 unsigned sym_p, var_p, off_p, rat_p, add_c;
3285 rtx seq, addr, base;
3286 rtx reg0, reg1;
3288 data = (address_cost_data) xcalloc (1, sizeof (*data));
3290 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3292 width = GET_MODE_BITSIZE (address_mode) - 1;
3293 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3294 width = HOST_BITS_PER_WIDE_INT - 1;
3295 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3297 for (i = width; i >= 0; i--)
3299 off = -((unsigned HOST_WIDE_INT) 1 << i);
3300 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301 if (memory_address_addr_space_p (mem_mode, addr, as))
3302 break;
3304 data->min_offset = (i == -1? 0 : off);
3306 for (i = width; i >= 0; i--)
3308 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3309 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3310 if (memory_address_addr_space_p (mem_mode, addr, as))
3311 break;
3312 /* For some TARGET, like ARM THUMB1, the offset should be nature
3313 aligned. Try an aligned offset if address_mode is not QImode. */
3314 off = (address_mode == QImode)
3316 : ((unsigned HOST_WIDE_INT) 1 << i)
3317 - GET_MODE_SIZE (address_mode);
3318 if (off > 0)
3320 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3321 if (memory_address_addr_space_p (mem_mode, addr, as))
3322 break;
3325 if (i == -1)
3326 off = 0;
3327 data->max_offset = off;
3329 if (dump_file && (dump_flags & TDF_DETAILS))
3331 fprintf (dump_file, "get_address_cost:\n");
3332 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3333 GET_MODE_NAME (mem_mode),
3334 data->min_offset);
3335 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3336 GET_MODE_NAME (mem_mode),
3337 data->max_offset);
3340 rat = 1;
3341 for (i = 2; i <= MAX_RATIO; i++)
3342 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3344 rat = i;
3345 break;
3348 /* Compute the cost of various addressing modes. */
3349 acost = 0;
3350 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3353 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3354 || USE_STORE_PRE_DECREMENT (mem_mode))
3356 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3357 has_predec[mem_mode]
3358 = memory_address_addr_space_p (mem_mode, addr, as);
3360 if (has_predec[mem_mode])
3361 data->ainc_costs[AINC_PRE_DEC]
3362 = address_cost (addr, mem_mode, as, speed);
3364 if (USE_LOAD_POST_DECREMENT (mem_mode)
3365 || USE_STORE_POST_DECREMENT (mem_mode))
3367 addr = gen_rtx_POST_DEC (address_mode, reg0);
3368 has_postdec[mem_mode]
3369 = memory_address_addr_space_p (mem_mode, addr, as);
3371 if (has_postdec[mem_mode])
3372 data->ainc_costs[AINC_POST_DEC]
3373 = address_cost (addr, mem_mode, as, speed);
3375 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3376 || USE_STORE_PRE_DECREMENT (mem_mode))
3378 addr = gen_rtx_PRE_INC (address_mode, reg0);
3379 has_preinc[mem_mode]
3380 = memory_address_addr_space_p (mem_mode, addr, as);
3382 if (has_preinc[mem_mode])
3383 data->ainc_costs[AINC_PRE_INC]
3384 = address_cost (addr, mem_mode, as, speed);
3386 if (USE_LOAD_POST_INCREMENT (mem_mode)
3387 || USE_STORE_POST_INCREMENT (mem_mode))
3389 addr = gen_rtx_POST_INC (address_mode, reg0);
3390 has_postinc[mem_mode]
3391 = memory_address_addr_space_p (mem_mode, addr, as);
3393 if (has_postinc[mem_mode])
3394 data->ainc_costs[AINC_POST_INC]
3395 = address_cost (addr, mem_mode, as, speed);
3397 for (i = 0; i < 16; i++)
3399 sym_p = i & 1;
3400 var_p = (i >> 1) & 1;
3401 off_p = (i >> 2) & 1;
3402 rat_p = (i >> 3) & 1;
3404 addr = reg0;
3405 if (rat_p)
3406 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3407 gen_int_mode (rat, address_mode));
3409 if (var_p)
3410 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3412 if (sym_p)
3414 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3415 /* ??? We can run into trouble with some backends by presenting
3416 it with symbols which haven't been properly passed through
3417 targetm.encode_section_info. By setting the local bit, we
3418 enhance the probability of things working. */
3419 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3421 if (off_p)
3422 base = gen_rtx_fmt_e (CONST, address_mode,
3423 gen_rtx_fmt_ee
3424 (PLUS, address_mode, base,
3425 gen_int_mode (off, address_mode)));
3427 else if (off_p)
3428 base = gen_int_mode (off, address_mode);
3429 else
3430 base = NULL_RTX;
3432 if (base)
3433 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3435 start_sequence ();
3436 /* To avoid splitting addressing modes, pretend that no cse will
3437 follow. */
3438 old_cse_not_expected = cse_not_expected;
3439 cse_not_expected = true;
3440 addr = memory_address_addr_space (mem_mode, addr, as);
3441 cse_not_expected = old_cse_not_expected;
3442 seq = get_insns ();
3443 end_sequence ();
3445 acost = seq_cost (seq, speed);
3446 acost += address_cost (addr, mem_mode, as, speed);
3448 if (!acost)
3449 acost = 1;
3450 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3453 /* On some targets, it is quite expensive to load symbol to a register,
3454 which makes addresses that contain symbols look much more expensive.
3455 However, the symbol will have to be loaded in any case before the
3456 loop (and quite likely we have it in register already), so it does not
3457 make much sense to penalize them too heavily. So make some final
3458 tweaks for the SYMBOL_PRESENT modes:
3460 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461 var is cheaper, use this mode with small penalty.
3462 If VAR_PRESENT is true, try whether the mode with
3463 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464 if this is the case, use it. */
3465 add_c = add_cost (speed, address_mode);
3466 for (i = 0; i < 8; i++)
3468 var_p = i & 1;
3469 off_p = (i >> 1) & 1;
3470 rat_p = (i >> 2) & 1;
3472 acost = data->costs[0][1][off_p][rat_p] + 1;
3473 if (var_p)
3474 acost += add_c;
3476 if (acost < data->costs[1][var_p][off_p][rat_p])
3477 data->costs[1][var_p][off_p][rat_p] = acost;
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3482 fprintf (dump_file, "Address costs:\n");
3484 for (i = 0; i < 16; i++)
3486 sym_p = i & 1;
3487 var_p = (i >> 1) & 1;
3488 off_p = (i >> 2) & 1;
3489 rat_p = (i >> 3) & 1;
3491 fprintf (dump_file, " ");
3492 if (sym_p)
3493 fprintf (dump_file, "sym + ");
3494 if (var_p)
3495 fprintf (dump_file, "var + ");
3496 if (off_p)
3497 fprintf (dump_file, "cst + ");
3498 if (rat_p)
3499 fprintf (dump_file, "rat * ");
3501 acost = data->costs[sym_p][var_p][off_p][rat_p];
3502 fprintf (dump_file, "index costs %d\n", acost);
3504 if (has_predec[mem_mode] || has_postdec[mem_mode]
3505 || has_preinc[mem_mode] || has_postinc[mem_mode])
3506 fprintf (dump_file, " May include autoinc/dec\n");
3507 fprintf (dump_file, "\n");
3510 address_cost_data_list[data_index] = data;
3513 bits = GET_MODE_BITSIZE (address_mode);
3514 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3515 offset &= mask;
3516 if ((offset >> (bits - 1) & 1))
3517 offset |= ~mask;
3518 s_offset = offset;
3520 autoinc = false;
3521 autoinc_type = AINC_NONE;
3522 msize = GET_MODE_SIZE (mem_mode);
3523 autoinc_offset = offset;
3524 if (stmt_after_inc)
3525 autoinc_offset += ratio * cstep;
3526 if (symbol_present || var_present || ratio != 1)
3527 autoinc = false;
3528 else
3530 if (has_postinc[mem_mode] && autoinc_offset == 0
3531 && msize == cstep)
3532 autoinc_type = AINC_POST_INC;
3533 else if (has_postdec[mem_mode] && autoinc_offset == 0
3534 && msize == -cstep)
3535 autoinc_type = AINC_POST_DEC;
3536 else if (has_preinc[mem_mode] && autoinc_offset == msize
3537 && msize == cstep)
3538 autoinc_type = AINC_PRE_INC;
3539 else if (has_predec[mem_mode] && autoinc_offset == -msize
3540 && msize == -cstep)
3541 autoinc_type = AINC_PRE_DEC;
3543 if (autoinc_type != AINC_NONE)
3544 autoinc = true;
3547 cost = 0;
3548 offset_p = (s_offset != 0
3549 && data->min_offset <= s_offset
3550 && s_offset <= data->max_offset);
3551 ratio_p = (ratio != 1
3552 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3554 if (ratio != 1 && !ratio_p)
3555 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3557 if (s_offset && !offset_p && !symbol_present)
3558 cost += add_cost (speed, address_mode);
3560 if (may_autoinc)
3561 *may_autoinc = autoinc;
3562 if (autoinc)
3563 acost = data->ainc_costs[autoinc_type];
3564 else
3565 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3566 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3567 return new_cost (cost + acost, complexity);
3570 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3571 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3572 calculating the operands of EXPR. Returns true if successful, and returns
3573 the cost in COST. */
3575 static bool
3576 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3577 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3579 comp_cost res;
3580 tree op1 = TREE_OPERAND (expr, 1);
3581 tree cst = TREE_OPERAND (mult, 1);
3582 tree multop = TREE_OPERAND (mult, 0);
3583 int m = exact_log2 (int_cst_value (cst));
3584 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3585 int sa_cost;
3586 bool equal_p = false;
3588 if (!(m >= 0 && m < maxm))
3589 return false;
3591 if (operand_equal_p (op1, mult, 0))
3592 equal_p = true;
3594 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3595 ? shiftadd_cost (speed, mode, m)
3596 : (equal_p
3597 ? shiftsub1_cost (speed, mode, m)
3598 : shiftsub0_cost (speed, mode, m)));
3599 res = new_cost (sa_cost, 0);
3600 res = add_costs (res, equal_p ? cost0 : cost1);
3602 STRIP_NOPS (multop);
3603 if (!is_gimple_val (multop))
3604 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3606 *cost = res;
3607 return true;
3610 /* Estimates cost of forcing expression EXPR into a variable. */
3612 static comp_cost
3613 force_expr_to_var_cost (tree expr, bool speed)
3615 static bool costs_initialized = false;
3616 static unsigned integer_cost [2];
3617 static unsigned symbol_cost [2];
3618 static unsigned address_cost [2];
3619 tree op0, op1;
3620 comp_cost cost0, cost1, cost;
3621 enum machine_mode mode;
3623 if (!costs_initialized)
3625 tree type = build_pointer_type (integer_type_node);
3626 tree var, addr;
3627 rtx x;
3628 int i;
3630 var = create_tmp_var_raw (integer_type_node, "test_var");
3631 TREE_STATIC (var) = 1;
3632 x = produce_memory_decl_rtl (var, NULL);
3633 SET_DECL_RTL (var, x);
3635 addr = build1 (ADDR_EXPR, type, var);
3638 for (i = 0; i < 2; i++)
3640 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3641 2000), i);
3643 symbol_cost[i] = computation_cost (addr, i) + 1;
3645 address_cost[i]
3646 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3647 if (dump_file && (dump_flags & TDF_DETAILS))
3649 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3650 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3651 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3652 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3653 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3654 fprintf (dump_file, "\n");
3658 costs_initialized = true;
3661 STRIP_NOPS (expr);
3663 if (SSA_VAR_P (expr))
3664 return no_cost;
3666 if (is_gimple_min_invariant (expr))
3668 if (TREE_CODE (expr) == INTEGER_CST)
3669 return new_cost (integer_cost [speed], 0);
3671 if (TREE_CODE (expr) == ADDR_EXPR)
3673 tree obj = TREE_OPERAND (expr, 0);
3675 if (TREE_CODE (obj) == VAR_DECL
3676 || TREE_CODE (obj) == PARM_DECL
3677 || TREE_CODE (obj) == RESULT_DECL)
3678 return new_cost (symbol_cost [speed], 0);
3681 return new_cost (address_cost [speed], 0);
3684 switch (TREE_CODE (expr))
3686 case POINTER_PLUS_EXPR:
3687 case PLUS_EXPR:
3688 case MINUS_EXPR:
3689 case MULT_EXPR:
3690 op0 = TREE_OPERAND (expr, 0);
3691 op1 = TREE_OPERAND (expr, 1);
3692 STRIP_NOPS (op0);
3693 STRIP_NOPS (op1);
3694 break;
3696 CASE_CONVERT:
3697 case NEGATE_EXPR:
3698 op0 = TREE_OPERAND (expr, 0);
3699 STRIP_NOPS (op0);
3700 op1 = NULL_TREE;
3701 break;
3703 default:
3704 /* Just an arbitrary value, FIXME. */
3705 return new_cost (target_spill_cost[speed], 0);
3708 if (op0 == NULL_TREE
3709 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3710 cost0 = no_cost;
3711 else
3712 cost0 = force_expr_to_var_cost (op0, speed);
3714 if (op1 == NULL_TREE
3715 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3716 cost1 = no_cost;
3717 else
3718 cost1 = force_expr_to_var_cost (op1, speed);
3720 mode = TYPE_MODE (TREE_TYPE (expr));
3721 switch (TREE_CODE (expr))
3723 case POINTER_PLUS_EXPR:
3724 case PLUS_EXPR:
3725 case MINUS_EXPR:
3726 case NEGATE_EXPR:
3727 cost = new_cost (add_cost (speed, mode), 0);
3728 if (TREE_CODE (expr) != NEGATE_EXPR)
3730 tree mult = NULL_TREE;
3731 comp_cost sa_cost;
3732 if (TREE_CODE (op1) == MULT_EXPR)
3733 mult = op1;
3734 else if (TREE_CODE (op0) == MULT_EXPR)
3735 mult = op0;
3737 if (mult != NULL_TREE
3738 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3739 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3740 speed, &sa_cost))
3741 return sa_cost;
3743 break;
3745 CASE_CONVERT:
3747 tree inner_mode, outer_mode;
3748 outer_mode = TREE_TYPE (expr);
3749 inner_mode = TREE_TYPE (op0);
3750 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3751 TYPE_MODE (inner_mode), speed), 0);
3753 break;
3755 case MULT_EXPR:
3756 if (cst_and_fits_in_hwi (op0))
3757 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3758 mode, speed), 0);
3759 else if (cst_and_fits_in_hwi (op1))
3760 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3761 mode, speed), 0);
3762 else
3763 return new_cost (target_spill_cost [speed], 0);
3764 break;
3766 default:
3767 gcc_unreachable ();
3770 cost = add_costs (cost, cost0);
3771 cost = add_costs (cost, cost1);
3773 /* Bound the cost by target_spill_cost. The parts of complicated
3774 computations often are either loop invariant or at least can
3775 be shared between several iv uses, so letting this grow without
3776 limits would not give reasonable results. */
3777 if (cost.cost > (int) target_spill_cost [speed])
3778 cost.cost = target_spill_cost [speed];
3780 return cost;
3783 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3786 static comp_cost
3787 force_var_cost (struct ivopts_data *data,
3788 tree expr, bitmap *depends_on)
3790 if (depends_on)
3792 fd_ivopts_data = data;
3793 walk_tree (&expr, find_depends, depends_on, NULL);
3796 return force_expr_to_var_cost (expr, data->speed);
3799 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3800 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3802 invariants the computation depends on. */
3804 static comp_cost
3805 split_address_cost (struct ivopts_data *data,
3806 tree addr, bool *symbol_present, bool *var_present,
3807 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3809 tree core;
3810 HOST_WIDE_INT bitsize;
3811 HOST_WIDE_INT bitpos;
3812 tree toffset;
3813 enum machine_mode mode;
3814 int unsignedp, volatilep;
3816 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3817 &unsignedp, &volatilep, false);
3819 if (toffset != 0
3820 || bitpos % BITS_PER_UNIT != 0
3821 || TREE_CODE (core) != VAR_DECL)
3823 *symbol_present = false;
3824 *var_present = true;
3825 fd_ivopts_data = data;
3826 walk_tree (&addr, find_depends, depends_on, NULL);
3827 return new_cost (target_spill_cost[data->speed], 0);
3830 *offset += bitpos / BITS_PER_UNIT;
3831 if (TREE_STATIC (core)
3832 || DECL_EXTERNAL (core))
3834 *symbol_present = true;
3835 *var_present = false;
3836 return no_cost;
3839 *symbol_present = false;
3840 *var_present = true;
3841 return no_cost;
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845 var + symbol + offset. The value of offset is added to OFFSET,
3846 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847 part is missing. DEPENDS_ON is a set of the invariants the computation
3848 depends on. */
3850 static comp_cost
3851 ptr_difference_cost (struct ivopts_data *data,
3852 tree e1, tree e2, bool *symbol_present, bool *var_present,
3853 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3855 HOST_WIDE_INT diff = 0;
3856 aff_tree aff_e1, aff_e2;
3857 tree type;
3859 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3861 if (ptr_difference_const (e1, e2, &diff))
3863 *offset += diff;
3864 *symbol_present = false;
3865 *var_present = false;
3866 return no_cost;
3869 if (integer_zerop (e2))
3870 return split_address_cost (data, TREE_OPERAND (e1, 0),
3871 symbol_present, var_present, offset, depends_on);
3873 *symbol_present = false;
3874 *var_present = true;
3876 type = signed_type_for (TREE_TYPE (e1));
3877 tree_to_aff_combination (e1, type, &aff_e1);
3878 tree_to_aff_combination (e2, type, &aff_e2);
3879 aff_combination_scale (&aff_e2, -1);
3880 aff_combination_add (&aff_e1, &aff_e2);
3882 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3885 /* Estimates cost of expressing difference E1 - E2 as
3886 var + symbol + offset. The value of offset is added to OFFSET,
3887 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888 part is missing. DEPENDS_ON is a set of the invariants the computation
3889 depends on. */
3891 static comp_cost
3892 difference_cost (struct ivopts_data *data,
3893 tree e1, tree e2, bool *symbol_present, bool *var_present,
3894 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3896 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3897 unsigned HOST_WIDE_INT off1, off2;
3898 aff_tree aff_e1, aff_e2;
3899 tree type;
3901 e1 = strip_offset (e1, &off1);
3902 e2 = strip_offset (e2, &off2);
3903 *offset += off1 - off2;
3905 STRIP_NOPS (e1);
3906 STRIP_NOPS (e2);
3908 if (TREE_CODE (e1) == ADDR_EXPR)
3909 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3910 offset, depends_on);
3911 *symbol_present = false;
3913 if (operand_equal_p (e1, e2, 0))
3915 *var_present = false;
3916 return no_cost;
3919 *var_present = true;
3921 if (integer_zerop (e2))
3922 return force_var_cost (data, e1, depends_on);
3924 if (integer_zerop (e1))
3926 comp_cost cost = force_var_cost (data, e2, depends_on);
3927 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3928 return cost;
3931 type = signed_type_for (TREE_TYPE (e1));
3932 tree_to_aff_combination (e1, type, &aff_e1);
3933 tree_to_aff_combination (e2, type, &aff_e2);
3934 aff_combination_scale (&aff_e2, -1);
3935 aff_combination_add (&aff_e1, &aff_e2);
3937 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3940 /* Returns true if AFF1 and AFF2 are identical. */
3942 static bool
3943 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3945 unsigned i;
3947 if (aff1->n != aff2->n)
3948 return false;
3950 for (i = 0; i < aff1->n; i++)
3952 if (aff1->elts[i].coef != aff2->elts[i].coef)
3953 return false;
3955 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3956 return false;
3958 return true;
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3963 static int
3964 get_expr_id (struct ivopts_data *data, tree expr)
3966 struct iv_inv_expr_ent ent;
3967 struct iv_inv_expr_ent **slot;
3969 ent.expr = expr;
3970 ent.hash = iterative_hash_expr (expr, 0);
3971 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3972 if (*slot)
3973 return (*slot)->id;
3975 *slot = XNEW (struct iv_inv_expr_ent);
3976 (*slot)->expr = expr;
3977 (*slot)->hash = ent.hash;
3978 (*slot)->id = data->inv_expr_id++;
3979 return (*slot)->id;
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983 requires a new compiler generated temporary. Returns -1 otherwise.
3984 ADDRESS_P is a flag indicating if the expression is for address
3985 computation. */
3987 static int
3988 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3989 tree cbase, HOST_WIDE_INT ratio,
3990 bool address_p)
3992 aff_tree ubase_aff, cbase_aff;
3993 tree expr, ub, cb;
3995 STRIP_NOPS (ubase);
3996 STRIP_NOPS (cbase);
3997 ub = ubase;
3998 cb = cbase;
4000 if ((TREE_CODE (ubase) == INTEGER_CST)
4001 && (TREE_CODE (cbase) == INTEGER_CST))
4002 return -1;
4004 /* Strips the constant part. */
4005 if (TREE_CODE (ubase) == PLUS_EXPR
4006 || TREE_CODE (ubase) == MINUS_EXPR
4007 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4009 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4010 ubase = TREE_OPERAND (ubase, 0);
4013 /* Strips the constant part. */
4014 if (TREE_CODE (cbase) == PLUS_EXPR
4015 || TREE_CODE (cbase) == MINUS_EXPR
4016 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4018 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4019 cbase = TREE_OPERAND (cbase, 0);
4022 if (address_p)
4024 if (((TREE_CODE (ubase) == SSA_NAME)
4025 || (TREE_CODE (ubase) == ADDR_EXPR
4026 && is_gimple_min_invariant (ubase)))
4027 && (TREE_CODE (cbase) == INTEGER_CST))
4028 return -1;
4030 if (((TREE_CODE (cbase) == SSA_NAME)
4031 || (TREE_CODE (cbase) == ADDR_EXPR
4032 && is_gimple_min_invariant (cbase)))
4033 && (TREE_CODE (ubase) == INTEGER_CST))
4034 return -1;
4037 if (ratio == 1)
4039 if (operand_equal_p (ubase, cbase, 0))
4040 return -1;
4042 if (TREE_CODE (ubase) == ADDR_EXPR
4043 && TREE_CODE (cbase) == ADDR_EXPR)
4045 tree usym, csym;
4047 usym = TREE_OPERAND (ubase, 0);
4048 csym = TREE_OPERAND (cbase, 0);
4049 if (TREE_CODE (usym) == ARRAY_REF)
4051 tree ind = TREE_OPERAND (usym, 1);
4052 if (TREE_CODE (ind) == INTEGER_CST
4053 && tree_fits_shwi_p (ind)
4054 && tree_to_shwi (ind) == 0)
4055 usym = TREE_OPERAND (usym, 0);
4057 if (TREE_CODE (csym) == ARRAY_REF)
4059 tree ind = TREE_OPERAND (csym, 1);
4060 if (TREE_CODE (ind) == INTEGER_CST
4061 && tree_fits_shwi_p (ind)
4062 && tree_to_shwi (ind) == 0)
4063 csym = TREE_OPERAND (csym, 0);
4065 if (operand_equal_p (usym, csym, 0))
4066 return -1;
4068 /* Now do more complex comparison */
4069 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4070 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4071 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4072 return -1;
4075 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4076 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4078 aff_combination_scale (&cbase_aff, -1 * ratio);
4079 aff_combination_add (&ubase_aff, &cbase_aff);
4080 expr = aff_combination_to_tree (&ubase_aff);
4081 return get_expr_id (data, expr);
4086 /* Determines the cost of the computation by that USE is expressed
4087 from induction variable CAND. If ADDRESS_P is true, we just need
4088 to create an address from it, otherwise we want to get it into
4089 register. A set of invariants we depend on is stored in
4090 DEPENDS_ON. AT is the statement at that the value is computed.
4091 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092 addressing is likely. */
4094 static comp_cost
4095 get_computation_cost_at (struct ivopts_data *data,
4096 struct iv_use *use, struct iv_cand *cand,
4097 bool address_p, bitmap *depends_on, gimple at,
4098 bool *can_autoinc,
4099 int *inv_expr_id)
4101 tree ubase = use->iv->base, ustep = use->iv->step;
4102 tree cbase, cstep;
4103 tree utype = TREE_TYPE (ubase), ctype;
4104 unsigned HOST_WIDE_INT cstepi, offset = 0;
4105 HOST_WIDE_INT ratio, aratio;
4106 bool var_present, symbol_present, stmt_is_after_inc;
4107 comp_cost cost;
4108 widest_int rat;
4109 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4110 enum machine_mode mem_mode = (address_p
4111 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4112 : VOIDmode);
4114 *depends_on = NULL;
4116 /* Only consider real candidates. */
4117 if (!cand->iv)
4118 return infinite_cost;
4120 cbase = cand->iv->base;
4121 cstep = cand->iv->step;
4122 ctype = TREE_TYPE (cbase);
4124 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4126 /* We do not have a precision to express the values of use. */
4127 return infinite_cost;
4130 if (address_p
4131 || (use->iv->base_object
4132 && cand->iv->base_object
4133 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4134 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4136 /* Do not try to express address of an object with computation based
4137 on address of a different object. This may cause problems in rtl
4138 level alias analysis (that does not expect this to be happening,
4139 as this is illegal in C), and would be unlikely to be useful
4140 anyway. */
4141 if (use->iv->base_object
4142 && cand->iv->base_object
4143 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4144 return infinite_cost;
4147 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4149 /* TODO -- add direct handling of this case. */
4150 goto fallback;
4153 /* CSTEPI is removed from the offset in case statement is after the
4154 increment. If the step is not constant, we use zero instead.
4155 This is a bit imprecise (there is the extra addition), but
4156 redundancy elimination is likely to transform the code so that
4157 it uses value of the variable before increment anyway,
4158 so it is not that much unrealistic. */
4159 if (cst_and_fits_in_hwi (cstep))
4160 cstepi = int_cst_value (cstep);
4161 else
4162 cstepi = 0;
4164 if (!constant_multiple_of (ustep, cstep, &rat))
4165 return infinite_cost;
4167 if (wi::fits_shwi_p (rat))
4168 ratio = rat.to_shwi ();
4169 else
4170 return infinite_cost;
4172 STRIP_NOPS (cbase);
4173 ctype = TREE_TYPE (cbase);
4175 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4177 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4178 or ratio == 1, it is better to handle this like
4180 ubase - ratio * cbase + ratio * var
4182 (also holds in the case ratio == -1, TODO. */
4184 if (cst_and_fits_in_hwi (cbase))
4186 offset = - ratio * int_cst_value (cbase);
4187 cost = difference_cost (data,
4188 ubase, build_int_cst (utype, 0),
4189 &symbol_present, &var_present, &offset,
4190 depends_on);
4191 cost.cost /= avg_loop_niter (data->current_loop);
4193 else if (ratio == 1)
4195 tree real_cbase = cbase;
4197 /* Check to see if any adjustment is needed. */
4198 if (cstepi == 0 && stmt_is_after_inc)
4200 aff_tree real_cbase_aff;
4201 aff_tree cstep_aff;
4203 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4204 &real_cbase_aff);
4205 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4207 aff_combination_add (&real_cbase_aff, &cstep_aff);
4208 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4211 cost = difference_cost (data,
4212 ubase, real_cbase,
4213 &symbol_present, &var_present, &offset,
4214 depends_on);
4215 cost.cost /= avg_loop_niter (data->current_loop);
4217 else if (address_p
4218 && !POINTER_TYPE_P (ctype)
4219 && multiplier_allowed_in_address_p
4220 (ratio, mem_mode,
4221 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4223 cbase
4224 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4225 cost = difference_cost (data,
4226 ubase, cbase,
4227 &symbol_present, &var_present, &offset,
4228 depends_on);
4229 cost.cost /= avg_loop_niter (data->current_loop);
4231 else
4233 cost = force_var_cost (data, cbase, depends_on);
4234 cost = add_costs (cost,
4235 difference_cost (data,
4236 ubase, build_int_cst (utype, 0),
4237 &symbol_present, &var_present,
4238 &offset, depends_on));
4239 cost.cost /= avg_loop_niter (data->current_loop);
4240 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4243 if (inv_expr_id)
4245 *inv_expr_id =
4246 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4247 /* Clear depends on. */
4248 if (*inv_expr_id != -1 && depends_on && *depends_on)
4249 bitmap_clear (*depends_on);
4252 /* If we are after the increment, the value of the candidate is higher by
4253 one iteration. */
4254 if (stmt_is_after_inc)
4255 offset -= ratio * cstepi;
4257 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258 (symbol/var1/const parts may be omitted). If we are looking for an
4259 address, find the cost of addressing this. */
4260 if (address_p)
4261 return add_costs (cost,
4262 get_address_cost (symbol_present, var_present,
4263 offset, ratio, cstepi,
4264 mem_mode,
4265 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4266 speed, stmt_is_after_inc,
4267 can_autoinc));
4269 /* Otherwise estimate the costs for computing the expression. */
4270 if (!symbol_present && !var_present && !offset)
4272 if (ratio != 1)
4273 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4274 return cost;
4277 /* Symbol + offset should be compile-time computable so consider that they
4278 are added once to the variable, if present. */
4279 if (var_present && (symbol_present || offset))
4280 cost.cost += adjust_setup_cost (data,
4281 add_cost (speed, TYPE_MODE (ctype)));
4283 /* Having offset does not affect runtime cost in case it is added to
4284 symbol, but it increases complexity. */
4285 if (offset)
4286 cost.complexity++;
4288 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4290 aratio = ratio > 0 ? ratio : -ratio;
4291 if (aratio != 1)
4292 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4293 return cost;
4295 fallback:
4296 if (can_autoinc)
4297 *can_autoinc = false;
4300 /* Just get the expression, expand it and measure the cost. */
4301 tree comp = get_computation_at (data->current_loop, use, cand, at);
4303 if (!comp)
4304 return infinite_cost;
4306 if (address_p)
4307 comp = build_simple_mem_ref (comp);
4309 return new_cost (computation_cost (comp, speed), 0);
4313 /* Determines the cost of the computation by that USE is expressed
4314 from induction variable CAND. If ADDRESS_P is true, we just need
4315 to create an address from it, otherwise we want to get it into
4316 register. A set of invariants we depend on is stored in
4317 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4318 autoinc addressing is likely. */
4320 static comp_cost
4321 get_computation_cost (struct ivopts_data *data,
4322 struct iv_use *use, struct iv_cand *cand,
4323 bool address_p, bitmap *depends_on,
4324 bool *can_autoinc, int *inv_expr_id)
4326 return get_computation_cost_at (data,
4327 use, cand, address_p, depends_on, use->stmt,
4328 can_autoinc, inv_expr_id);
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4332 expression. */
4334 static bool
4335 determine_use_iv_cost_generic (struct ivopts_data *data,
4336 struct iv_use *use, struct iv_cand *cand)
4338 bitmap depends_on;
4339 comp_cost cost;
4340 int inv_expr_id = -1;
4342 /* The simple case first -- if we need to express value of the preserved
4343 original biv, the cost is 0. This also prevents us from counting the
4344 cost of increment twice -- once at this use and once in the cost of
4345 the candidate. */
4346 if (cand->pos == IP_ORIGINAL
4347 && cand->incremented_at == use->stmt)
4349 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4350 ERROR_MARK, -1);
4351 return true;
4354 cost = get_computation_cost (data, use, cand, false, &depends_on,
4355 NULL, &inv_expr_id);
4357 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4358 inv_expr_id);
4360 return !infinite_cost_p (cost);
4363 /* Determines cost of basing replacement of USE on CAND in an address. */
4365 static bool
4366 determine_use_iv_cost_address (struct ivopts_data *data,
4367 struct iv_use *use, struct iv_cand *cand)
4369 bitmap depends_on;
4370 bool can_autoinc;
4371 int inv_expr_id = -1;
4372 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4373 &can_autoinc, &inv_expr_id);
4375 if (cand->ainc_use == use)
4377 if (can_autoinc)
4378 cost.cost -= cand->cost_step;
4379 /* If we generated the candidate solely for exploiting autoincrement
4380 opportunities, and it turns out it can't be used, set the cost to
4381 infinity to make sure we ignore it. */
4382 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4383 cost = infinite_cost;
4385 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4386 inv_expr_id);
4388 return !infinite_cost_p (cost);
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392 stores it to VAL. */
4394 static void
4395 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4396 aff_tree *val)
4398 aff_tree step, delta, nit;
4399 struct iv *iv = cand->iv;
4400 tree type = TREE_TYPE (iv->base);
4401 tree steptype = type;
4402 if (POINTER_TYPE_P (type))
4403 steptype = sizetype;
4404 steptype = unsigned_type_for (type);
4406 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4407 aff_combination_convert (&step, steptype);
4408 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4409 aff_combination_convert (&nit, steptype);
4410 aff_combination_mult (&nit, &step, &delta);
4411 if (stmt_after_increment (loop, cand, at))
4412 aff_combination_add (&delta, &step);
4414 tree_to_aff_combination (iv->base, type, val);
4415 if (!POINTER_TYPE_P (type))
4416 aff_combination_convert (val, steptype);
4417 aff_combination_add (val, &delta);
4420 /* Returns period of induction variable iv. */
4422 static tree
4423 iv_period (struct iv *iv)
4425 tree step = iv->step, period, type;
4426 tree pow2div;
4428 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4430 type = unsigned_type_for (TREE_TYPE (step));
4431 /* Period of the iv is lcm (step, type_range)/step -1,
4432 i.e., N*type_range/step - 1. Since type range is power
4433 of two, N == (step >> num_of_ending_zeros_binary (step),
4434 so the final result is
4436 (type_range >> num_of_ending_zeros_binary (step)) - 1
4439 pow2div = num_ending_zeros (step);
4441 period = build_low_bits_mask (type,
4442 (TYPE_PRECISION (type)
4443 - tree_to_uhwi (pow2div)));
4445 return period;
4448 /* Returns the comparison operator used when eliminating the iv USE. */
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4453 struct loop *loop = data->current_loop;
4454 basic_block ex_bb;
4455 edge exit;
4457 ex_bb = gimple_bb (use->stmt);
4458 exit = EDGE_SUCC (ex_bb, 0);
4459 if (flow_bb_inside_loop_p (loop, exit->dest))
4460 exit = EDGE_SUCC (ex_bb, 1);
4462 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4465 static tree
4466 strip_wrap_conserving_type_conversions (tree exp)
4468 while (tree_ssa_useless_type_conversion (exp)
4469 && (nowrap_type_p (TREE_TYPE (exp))
4470 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4471 exp = TREE_OPERAND (exp, 0);
4472 return exp;
4475 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4476 check for an exact match. */
4478 static bool
4479 expr_equal_p (tree e, tree what)
4481 gimple stmt;
4482 enum tree_code code;
4484 e = strip_wrap_conserving_type_conversions (e);
4485 what = strip_wrap_conserving_type_conversions (what);
4487 code = TREE_CODE (what);
4488 if (TREE_TYPE (e) != TREE_TYPE (what))
4489 return false;
4491 if (operand_equal_p (e, what, 0))
4492 return true;
4494 if (TREE_CODE (e) != SSA_NAME)
4495 return false;
4497 stmt = SSA_NAME_DEF_STMT (e);
4498 if (gimple_code (stmt) != GIMPLE_ASSIGN
4499 || gimple_assign_rhs_code (stmt) != code)
4500 return false;
4502 switch (get_gimple_rhs_class (code))
4504 case GIMPLE_BINARY_RHS:
4505 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4506 return false;
4507 /* Fallthru. */
4509 case GIMPLE_UNARY_RHS:
4510 case GIMPLE_SINGLE_RHS:
4511 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4512 default:
4513 return false;
4517 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4518 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4519 calculation is performed in non-wrapping type.
4521 TODO: More generally, we could test for the situation that
4522 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4523 This would require knowing the sign of OFFSET.
4525 Also, we only look for the first addition in the computation of BASE.
4526 More complex analysis would be better, but introducing it just for
4527 this optimization seems like an overkill. */
4529 static bool
4530 difference_cannot_overflow_p (tree base, tree offset)
4532 enum tree_code code;
4533 tree e1, e2;
4535 if (!nowrap_type_p (TREE_TYPE (base)))
4536 return false;
4538 base = expand_simple_operations (base);
4540 if (TREE_CODE (base) == SSA_NAME)
4542 gimple stmt = SSA_NAME_DEF_STMT (base);
4544 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4545 return false;
4547 code = gimple_assign_rhs_code (stmt);
4548 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4549 return false;
4551 e1 = gimple_assign_rhs1 (stmt);
4552 e2 = gimple_assign_rhs2 (stmt);
4554 else
4556 code = TREE_CODE (base);
4557 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4558 return false;
4559 e1 = TREE_OPERAND (base, 0);
4560 e2 = TREE_OPERAND (base, 1);
4563 /* TODO: deeper inspection may be necessary to prove the equality. */
4564 switch (code)
4566 case PLUS_EXPR:
4567 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4568 case POINTER_PLUS_EXPR:
4569 return expr_equal_p (e2, offset);
4571 default:
4572 return false;
4576 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4577 comparison with CAND. NITER describes the number of iterations of
4578 the loops. If successful, the comparison in COMP_P is altered accordingly.
4580 We aim to handle the following situation:
4582 sometype *base, *p;
4583 int a, b, i;
4585 i = a;
4586 p = p_0 = base + a;
4590 bla (*p);
4591 p++;
4592 i++;
4594 while (i < b);
4596 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4597 We aim to optimize this to
4599 p = p_0 = base + a;
4602 bla (*p);
4603 p++;
4605 while (p < p_0 - a + b);
4607 This preserves the correctness, since the pointer arithmetics does not
4608 overflow. More precisely:
4610 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4611 overflow in computing it or the values of p.
4612 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4613 overflow. To prove this, we use the fact that p_0 = base + a. */
4615 static bool
4616 iv_elimination_compare_lt (struct ivopts_data *data,
4617 struct iv_cand *cand, enum tree_code *comp_p,
4618 struct tree_niter_desc *niter)
4620 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4621 struct aff_tree nit, tmpa, tmpb;
4622 enum tree_code comp;
4623 HOST_WIDE_INT step;
4625 /* We need to know that the candidate induction variable does not overflow.
4626 While more complex analysis may be used to prove this, for now just
4627 check that the variable appears in the original program and that it
4628 is computed in a type that guarantees no overflows. */
4629 cand_type = TREE_TYPE (cand->iv->base);
4630 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4631 return false;
4633 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4634 the calculation of the BOUND could overflow, making the comparison
4635 invalid. */
4636 if (!data->loop_single_exit_p)
4637 return false;
4639 /* We need to be able to decide whether candidate is increasing or decreasing
4640 in order to choose the right comparison operator. */
4641 if (!cst_and_fits_in_hwi (cand->iv->step))
4642 return false;
4643 step = int_cst_value (cand->iv->step);
4645 /* Check that the number of iterations matches the expected pattern:
4646 a + 1 > b ? 0 : b - a - 1. */
4647 mbz = niter->may_be_zero;
4648 if (TREE_CODE (mbz) == GT_EXPR)
4650 /* Handle a + 1 > b. */
4651 tree op0 = TREE_OPERAND (mbz, 0);
4652 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4654 a = TREE_OPERAND (op0, 0);
4655 b = TREE_OPERAND (mbz, 1);
4657 else
4658 return false;
4660 else if (TREE_CODE (mbz) == LT_EXPR)
4662 tree op1 = TREE_OPERAND (mbz, 1);
4664 /* Handle b < a + 1. */
4665 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4667 a = TREE_OPERAND (op1, 0);
4668 b = TREE_OPERAND (mbz, 0);
4670 else
4671 return false;
4673 else
4674 return false;
4676 /* Expected number of iterations is B - A - 1. Check that it matches
4677 the actual number, i.e., that B - A - NITER = 1. */
4678 tree_to_aff_combination (niter->niter, nit_type, &nit);
4679 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4680 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4681 aff_combination_scale (&nit, -1);
4682 aff_combination_scale (&tmpa, -1);
4683 aff_combination_add (&tmpb, &tmpa);
4684 aff_combination_add (&tmpb, &nit);
4685 if (tmpb.n != 0 || tmpb.offset != 1)
4686 return false;
4688 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4689 overflow. */
4690 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4691 cand->iv->step,
4692 fold_convert (TREE_TYPE (cand->iv->step), a));
4693 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4694 return false;
4696 /* Determine the new comparison operator. */
4697 comp = step < 0 ? GT_EXPR : LT_EXPR;
4698 if (*comp_p == NE_EXPR)
4699 *comp_p = comp;
4700 else if (*comp_p == EQ_EXPR)
4701 *comp_p = invert_tree_comparison (comp, false);
4702 else
4703 gcc_unreachable ();
4705 return true;
4708 /* Check whether it is possible to express the condition in USE by comparison
4709 of candidate CAND. If so, store the value compared with to BOUND, and the
4710 comparison operator to COMP. */
4712 static bool
4713 may_eliminate_iv (struct ivopts_data *data,
4714 struct iv_use *use, struct iv_cand *cand, tree *bound,
4715 enum tree_code *comp)
4717 basic_block ex_bb;
4718 edge exit;
4719 tree period;
4720 struct loop *loop = data->current_loop;
4721 aff_tree bnd;
4722 struct tree_niter_desc *desc = NULL;
4724 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4725 return false;
4727 /* For now works only for exits that dominate the loop latch.
4728 TODO: extend to other conditions inside loop body. */
4729 ex_bb = gimple_bb (use->stmt);
4730 if (use->stmt != last_stmt (ex_bb)
4731 || gimple_code (use->stmt) != GIMPLE_COND
4732 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4733 return false;
4735 exit = EDGE_SUCC (ex_bb, 0);
4736 if (flow_bb_inside_loop_p (loop, exit->dest))
4737 exit = EDGE_SUCC (ex_bb, 1);
4738 if (flow_bb_inside_loop_p (loop, exit->dest))
4739 return false;
4741 desc = niter_for_exit (data, exit);
4742 if (!desc)
4743 return false;
4745 /* Determine whether we can use the variable to test the exit condition.
4746 This is the case iff the period of the induction variable is greater
4747 than the number of iterations for which the exit condition is true. */
4748 period = iv_period (cand->iv);
4750 /* If the number of iterations is constant, compare against it directly. */
4751 if (TREE_CODE (desc->niter) == INTEGER_CST)
4753 /* See cand_value_at. */
4754 if (stmt_after_increment (loop, cand, use->stmt))
4756 if (!tree_int_cst_lt (desc->niter, period))
4757 return false;
4759 else
4761 if (tree_int_cst_lt (period, desc->niter))
4762 return false;
4766 /* If not, and if this is the only possible exit of the loop, see whether
4767 we can get a conservative estimate on the number of iterations of the
4768 entire loop and compare against that instead. */
4769 else
4771 widest_int period_value, max_niter;
4773 max_niter = desc->max;
4774 if (stmt_after_increment (loop, cand, use->stmt))
4775 max_niter += 1;
4776 period_value = wi::to_widest (period);
4777 if (wi::gtu_p (max_niter, period_value))
4779 /* See if we can take advantage of inferred loop bound information. */
4780 if (data->loop_single_exit_p)
4782 if (!max_loop_iterations (loop, &max_niter))
4783 return false;
4784 /* The loop bound is already adjusted by adding 1. */
4785 if (wi::gtu_p (max_niter, period_value))
4786 return false;
4788 else
4789 return false;
4793 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4795 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4796 aff_combination_to_tree (&bnd));
4797 *comp = iv_elimination_compare (data, use);
4799 /* It is unlikely that computing the number of iterations using division
4800 would be more profitable than keeping the original induction variable. */
4801 if (expression_expensive_p (*bound))
4802 return false;
4804 /* Sometimes, it is possible to handle the situation that the number of
4805 iterations may be zero unless additional assumtions by using <
4806 instead of != in the exit condition.
4808 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4809 base the exit condition on it. However, that is often too
4810 expensive. */
4811 if (!integer_zerop (desc->may_be_zero))
4812 return iv_elimination_compare_lt (data, cand, comp, desc);
4814 return true;
4817 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4818 be copied, if is is used in the loop body and DATA->body_includes_call. */
4820 static int
4821 parm_decl_cost (struct ivopts_data *data, tree bound)
4823 tree sbound = bound;
4824 STRIP_NOPS (sbound);
4826 if (TREE_CODE (sbound) == SSA_NAME
4827 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4828 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4829 && data->body_includes_call)
4830 return COSTS_N_INSNS (1);
4832 return 0;
4835 /* Determines cost of basing replacement of USE on CAND in a condition. */
4837 static bool
4838 determine_use_iv_cost_condition (struct ivopts_data *data,
4839 struct iv_use *use, struct iv_cand *cand)
4841 tree bound = NULL_TREE;
4842 struct iv *cmp_iv;
4843 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4844 comp_cost elim_cost, express_cost, cost, bound_cost;
4845 bool ok;
4846 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4847 tree *control_var, *bound_cst;
4848 enum tree_code comp = ERROR_MARK;
4850 /* Only consider real candidates. */
4851 if (!cand->iv)
4853 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4854 ERROR_MARK, -1);
4855 return false;
4858 /* Try iv elimination. */
4859 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4861 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4862 if (elim_cost.cost == 0)
4863 elim_cost.cost = parm_decl_cost (data, bound);
4864 else if (TREE_CODE (bound) == INTEGER_CST)
4865 elim_cost.cost = 0;
4866 /* If we replace a loop condition 'i < n' with 'p < base + n',
4867 depends_on_elim will have 'base' and 'n' set, which implies
4868 that both 'base' and 'n' will be live during the loop. More likely,
4869 'base + n' will be loop invariant, resulting in only one live value
4870 during the loop. So in that case we clear depends_on_elim and set
4871 elim_inv_expr_id instead. */
4872 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4874 elim_inv_expr_id = get_expr_id (data, bound);
4875 bitmap_clear (depends_on_elim);
4877 /* The bound is a loop invariant, so it will be only computed
4878 once. */
4879 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4881 else
4882 elim_cost = infinite_cost;
4884 /* Try expressing the original giv. If it is compared with an invariant,
4885 note that we cannot get rid of it. */
4886 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4887 NULL, &cmp_iv);
4888 gcc_assert (ok);
4890 /* When the condition is a comparison of the candidate IV against
4891 zero, prefer this IV.
4893 TODO: The constant that we're subtracting from the cost should
4894 be target-dependent. This information should be added to the
4895 target costs for each backend. */
4896 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4897 && integer_zerop (*bound_cst)
4898 && (operand_equal_p (*control_var, cand->var_after, 0)
4899 || operand_equal_p (*control_var, cand->var_before, 0)))
4900 elim_cost.cost -= 1;
4902 express_cost = get_computation_cost (data, use, cand, false,
4903 &depends_on_express, NULL,
4904 &express_inv_expr_id);
4905 fd_ivopts_data = data;
4906 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4908 /* Count the cost of the original bound as well. */
4909 bound_cost = force_var_cost (data, *bound_cst, NULL);
4910 if (bound_cost.cost == 0)
4911 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4912 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4913 bound_cost.cost = 0;
4914 express_cost.cost += bound_cost.cost;
4916 /* Choose the better approach, preferring the eliminated IV. */
4917 if (compare_costs (elim_cost, express_cost) <= 0)
4919 cost = elim_cost;
4920 depends_on = depends_on_elim;
4921 depends_on_elim = NULL;
4922 inv_expr_id = elim_inv_expr_id;
4924 else
4926 cost = express_cost;
4927 depends_on = depends_on_express;
4928 depends_on_express = NULL;
4929 bound = NULL_TREE;
4930 comp = ERROR_MARK;
4931 inv_expr_id = express_inv_expr_id;
4934 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4936 if (depends_on_elim)
4937 BITMAP_FREE (depends_on_elim);
4938 if (depends_on_express)
4939 BITMAP_FREE (depends_on_express);
4941 return !infinite_cost_p (cost);
4944 /* Determines cost of basing replacement of USE on CAND. Returns false
4945 if USE cannot be based on CAND. */
4947 static bool
4948 determine_use_iv_cost (struct ivopts_data *data,
4949 struct iv_use *use, struct iv_cand *cand)
4951 switch (use->type)
4953 case USE_NONLINEAR_EXPR:
4954 return determine_use_iv_cost_generic (data, use, cand);
4956 case USE_ADDRESS:
4957 return determine_use_iv_cost_address (data, use, cand);
4959 case USE_COMPARE:
4960 return determine_use_iv_cost_condition (data, use, cand);
4962 default:
4963 gcc_unreachable ();
4967 /* Return true if get_computation_cost indicates that autoincrement is
4968 a possibility for the pair of USE and CAND, false otherwise. */
4970 static bool
4971 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4972 struct iv_cand *cand)
4974 bitmap depends_on;
4975 bool can_autoinc;
4976 comp_cost cost;
4978 if (use->type != USE_ADDRESS)
4979 return false;
4981 cost = get_computation_cost (data, use, cand, true, &depends_on,
4982 &can_autoinc, NULL);
4984 BITMAP_FREE (depends_on);
4986 return !infinite_cost_p (cost) && can_autoinc;
4989 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4990 use that allows autoincrement, and set their AINC_USE if possible. */
4992 static void
4993 set_autoinc_for_original_candidates (struct ivopts_data *data)
4995 unsigned i, j;
4997 for (i = 0; i < n_iv_cands (data); i++)
4999 struct iv_cand *cand = iv_cand (data, i);
5000 struct iv_use *closest_before = NULL;
5001 struct iv_use *closest_after = NULL;
5002 if (cand->pos != IP_ORIGINAL)
5003 continue;
5005 for (j = 0; j < n_iv_uses (data); j++)
5007 struct iv_use *use = iv_use (data, j);
5008 unsigned uid = gimple_uid (use->stmt);
5010 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5011 continue;
5013 if (uid < gimple_uid (cand->incremented_at)
5014 && (closest_before == NULL
5015 || uid > gimple_uid (closest_before->stmt)))
5016 closest_before = use;
5018 if (uid > gimple_uid (cand->incremented_at)
5019 && (closest_after == NULL
5020 || uid < gimple_uid (closest_after->stmt)))
5021 closest_after = use;
5024 if (closest_before != NULL
5025 && autoinc_possible_for_pair (data, closest_before, cand))
5026 cand->ainc_use = closest_before;
5027 else if (closest_after != NULL
5028 && autoinc_possible_for_pair (data, closest_after, cand))
5029 cand->ainc_use = closest_after;
5033 /* Finds the candidates for the induction variables. */
5035 static void
5036 find_iv_candidates (struct ivopts_data *data)
5038 /* Add commonly used ivs. */
5039 add_standard_iv_candidates (data);
5041 /* Add old induction variables. */
5042 add_old_ivs_candidates (data);
5044 /* Add induction variables derived from uses. */
5045 add_derived_ivs_candidates (data);
5047 set_autoinc_for_original_candidates (data);
5049 /* Record the important candidates. */
5050 record_important_candidates (data);
5053 /* Determines costs of basing the use of the iv on an iv candidate. */
5055 static void
5056 determine_use_iv_costs (struct ivopts_data *data)
5058 unsigned i, j;
5059 struct iv_use *use;
5060 struct iv_cand *cand;
5061 bitmap to_clear = BITMAP_ALLOC (NULL);
5063 alloc_use_cost_map (data);
5065 for (i = 0; i < n_iv_uses (data); i++)
5067 use = iv_use (data, i);
5069 if (data->consider_all_candidates)
5071 for (j = 0; j < n_iv_cands (data); j++)
5073 cand = iv_cand (data, j);
5074 determine_use_iv_cost (data, use, cand);
5077 else
5079 bitmap_iterator bi;
5081 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5083 cand = iv_cand (data, j);
5084 if (!determine_use_iv_cost (data, use, cand))
5085 bitmap_set_bit (to_clear, j);
5088 /* Remove the candidates for that the cost is infinite from
5089 the list of related candidates. */
5090 bitmap_and_compl_into (use->related_cands, to_clear);
5091 bitmap_clear (to_clear);
5095 BITMAP_FREE (to_clear);
5097 if (dump_file && (dump_flags & TDF_DETAILS))
5099 fprintf (dump_file, "Use-candidate costs:\n");
5101 for (i = 0; i < n_iv_uses (data); i++)
5103 use = iv_use (data, i);
5105 fprintf (dump_file, "Use %d:\n", i);
5106 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5107 for (j = 0; j < use->n_map_members; j++)
5109 if (!use->cost_map[j].cand
5110 || infinite_cost_p (use->cost_map[j].cost))
5111 continue;
5113 fprintf (dump_file, " %d\t%d\t%d\t",
5114 use->cost_map[j].cand->id,
5115 use->cost_map[j].cost.cost,
5116 use->cost_map[j].cost.complexity);
5117 if (use->cost_map[j].depends_on)
5118 bitmap_print (dump_file,
5119 use->cost_map[j].depends_on, "","");
5120 if (use->cost_map[j].inv_expr_id != -1)
5121 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5122 fprintf (dump_file, "\n");
5125 fprintf (dump_file, "\n");
5127 fprintf (dump_file, "\n");
5131 /* Determines cost of the candidate CAND. */
5133 static void
5134 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5136 comp_cost cost_base;
5137 unsigned cost, cost_step;
5138 tree base;
5140 if (!cand->iv)
5142 cand->cost = 0;
5143 return;
5146 /* There are two costs associated with the candidate -- its increment
5147 and its initialization. The second is almost negligible for any loop
5148 that rolls enough, so we take it just very little into account. */
5150 base = cand->iv->base;
5151 cost_base = force_var_cost (data, base, NULL);
5152 /* It will be exceptional that the iv register happens to be initialized with
5153 the proper value at no cost. In general, there will at least be a regcopy
5154 or a const set. */
5155 if (cost_base.cost == 0)
5156 cost_base.cost = COSTS_N_INSNS (1);
5157 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5159 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5161 /* Prefer the original ivs unless we may gain something by replacing it.
5162 The reason is to make debugging simpler; so this is not relevant for
5163 artificial ivs created by other optimization passes. */
5164 if (cand->pos != IP_ORIGINAL
5165 || !SSA_NAME_VAR (cand->var_before)
5166 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5167 cost++;
5169 /* Prefer not to insert statements into latch unless there are some
5170 already (so that we do not create unnecessary jumps). */
5171 if (cand->pos == IP_END
5172 && empty_block_p (ip_end_pos (data->current_loop)))
5173 cost++;
5175 cand->cost = cost;
5176 cand->cost_step = cost_step;
5179 /* Determines costs of computation of the candidates. */
5181 static void
5182 determine_iv_costs (struct ivopts_data *data)
5184 unsigned i;
5186 if (dump_file && (dump_flags & TDF_DETAILS))
5188 fprintf (dump_file, "Candidate costs:\n");
5189 fprintf (dump_file, " cand\tcost\n");
5192 for (i = 0; i < n_iv_cands (data); i++)
5194 struct iv_cand *cand = iv_cand (data, i);
5196 determine_iv_cost (data, cand);
5198 if (dump_file && (dump_flags & TDF_DETAILS))
5199 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5202 if (dump_file && (dump_flags & TDF_DETAILS))
5203 fprintf (dump_file, "\n");
5206 /* Calculates cost for having SIZE induction variables. */
5208 static unsigned
5209 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5211 /* We add size to the cost, so that we prefer eliminating ivs
5212 if possible. */
5213 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5214 data->body_includes_call);
5217 /* For each size of the induction variable set determine the penalty. */
5219 static void
5220 determine_set_costs (struct ivopts_data *data)
5222 unsigned j, n;
5223 gimple phi;
5224 gimple_stmt_iterator psi;
5225 tree op;
5226 struct loop *loop = data->current_loop;
5227 bitmap_iterator bi;
5229 if (dump_file && (dump_flags & TDF_DETAILS))
5231 fprintf (dump_file, "Global costs:\n");
5232 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5233 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5234 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5235 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5238 n = 0;
5239 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5241 phi = gsi_stmt (psi);
5242 op = PHI_RESULT (phi);
5244 if (virtual_operand_p (op))
5245 continue;
5247 if (get_iv (data, op))
5248 continue;
5250 n++;
5253 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5255 struct version_info *info = ver_info (data, j);
5257 if (info->inv_id && info->has_nonlin_use)
5258 n++;
5261 data->regs_used = n;
5262 if (dump_file && (dump_flags & TDF_DETAILS))
5263 fprintf (dump_file, " regs_used %d\n", n);
5265 if (dump_file && (dump_flags & TDF_DETAILS))
5267 fprintf (dump_file, " cost for size:\n");
5268 fprintf (dump_file, " ivs\tcost\n");
5269 for (j = 0; j <= 2 * target_avail_regs; j++)
5270 fprintf (dump_file, " %d\t%d\n", j,
5271 ivopts_global_cost_for_size (data, j));
5272 fprintf (dump_file, "\n");
5276 /* Returns true if A is a cheaper cost pair than B. */
5278 static bool
5279 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5281 int cmp;
5283 if (!a)
5284 return false;
5286 if (!b)
5287 return true;
5289 cmp = compare_costs (a->cost, b->cost);
5290 if (cmp < 0)
5291 return true;
5293 if (cmp > 0)
5294 return false;
5296 /* In case the costs are the same, prefer the cheaper candidate. */
5297 if (a->cand->cost < b->cand->cost)
5298 return true;
5300 return false;
5304 /* Returns candidate by that USE is expressed in IVS. */
5306 static struct cost_pair *
5307 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5309 return ivs->cand_for_use[use->id];
5312 /* Computes the cost field of IVS structure. */
5314 static void
5315 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5317 comp_cost cost = ivs->cand_use_cost;
5319 cost.cost += ivs->cand_cost;
5321 cost.cost += ivopts_global_cost_for_size (data,
5322 ivs->n_regs + ivs->num_used_inv_expr);
5324 ivs->cost = cost;
5327 /* Remove invariants in set INVS to set IVS. */
5329 static void
5330 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5332 bitmap_iterator bi;
5333 unsigned iid;
5335 if (!invs)
5336 return;
5338 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5340 ivs->n_invariant_uses[iid]--;
5341 if (ivs->n_invariant_uses[iid] == 0)
5342 ivs->n_regs--;
5346 /* Set USE not to be expressed by any candidate in IVS. */
5348 static void
5349 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5350 struct iv_use *use)
5352 unsigned uid = use->id, cid;
5353 struct cost_pair *cp;
5355 cp = ivs->cand_for_use[uid];
5356 if (!cp)
5357 return;
5358 cid = cp->cand->id;
5360 ivs->bad_uses++;
5361 ivs->cand_for_use[uid] = NULL;
5362 ivs->n_cand_uses[cid]--;
5364 if (ivs->n_cand_uses[cid] == 0)
5366 bitmap_clear_bit (ivs->cands, cid);
5367 /* Do not count the pseudocandidates. */
5368 if (cp->cand->iv)
5369 ivs->n_regs--;
5370 ivs->n_cands--;
5371 ivs->cand_cost -= cp->cand->cost;
5373 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5376 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5378 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5380 if (cp->inv_expr_id != -1)
5382 ivs->used_inv_expr[cp->inv_expr_id]--;
5383 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5384 ivs->num_used_inv_expr--;
5386 iv_ca_recount_cost (data, ivs);
5389 /* Add invariants in set INVS to set IVS. */
5391 static void
5392 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5394 bitmap_iterator bi;
5395 unsigned iid;
5397 if (!invs)
5398 return;
5400 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5402 ivs->n_invariant_uses[iid]++;
5403 if (ivs->n_invariant_uses[iid] == 1)
5404 ivs->n_regs++;
5408 /* Set cost pair for USE in set IVS to CP. */
5410 static void
5411 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5412 struct iv_use *use, struct cost_pair *cp)
5414 unsigned uid = use->id, cid;
5416 if (ivs->cand_for_use[uid] == cp)
5417 return;
5419 if (ivs->cand_for_use[uid])
5420 iv_ca_set_no_cp (data, ivs, use);
5422 if (cp)
5424 cid = cp->cand->id;
5426 ivs->bad_uses--;
5427 ivs->cand_for_use[uid] = cp;
5428 ivs->n_cand_uses[cid]++;
5429 if (ivs->n_cand_uses[cid] == 1)
5431 bitmap_set_bit (ivs->cands, cid);
5432 /* Do not count the pseudocandidates. */
5433 if (cp->cand->iv)
5434 ivs->n_regs++;
5435 ivs->n_cands++;
5436 ivs->cand_cost += cp->cand->cost;
5438 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5441 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5442 iv_ca_set_add_invariants (ivs, cp->depends_on);
5444 if (cp->inv_expr_id != -1)
5446 ivs->used_inv_expr[cp->inv_expr_id]++;
5447 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5448 ivs->num_used_inv_expr++;
5450 iv_ca_recount_cost (data, ivs);
5454 /* Extend set IVS by expressing USE by some of the candidates in it
5455 if possible. All important candidates will be considered
5456 if IMPORTANT_CANDIDATES is true. */
5458 static void
5459 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5460 struct iv_use *use, bool important_candidates)
5462 struct cost_pair *best_cp = NULL, *cp;
5463 bitmap_iterator bi;
5464 bitmap cands;
5465 unsigned i;
5467 gcc_assert (ivs->upto >= use->id);
5469 if (ivs->upto == use->id)
5471 ivs->upto++;
5472 ivs->bad_uses++;
5475 cands = (important_candidates ? data->important_candidates : ivs->cands);
5476 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5478 struct iv_cand *cand = iv_cand (data, i);
5480 cp = get_use_iv_cost (data, use, cand);
5482 if (cheaper_cost_pair (cp, best_cp))
5483 best_cp = cp;
5486 iv_ca_set_cp (data, ivs, use, best_cp);
5489 /* Get cost for assignment IVS. */
5491 static comp_cost
5492 iv_ca_cost (struct iv_ca *ivs)
5494 /* This was a conditional expression but it triggered a bug in
5495 Sun C 5.5. */
5496 if (ivs->bad_uses)
5497 return infinite_cost;
5498 else
5499 return ivs->cost;
5502 /* Returns true if all dependences of CP are among invariants in IVS. */
5504 static bool
5505 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5507 unsigned i;
5508 bitmap_iterator bi;
5510 if (!cp->depends_on)
5511 return true;
5513 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5515 if (ivs->n_invariant_uses[i] == 0)
5516 return false;
5519 return true;
5522 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5523 it before NEXT_CHANGE. */
5525 static struct iv_ca_delta *
5526 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5527 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5529 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5531 change->use = use;
5532 change->old_cp = old_cp;
5533 change->new_cp = new_cp;
5534 change->next_change = next_change;
5536 return change;
5539 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5540 are rewritten. */
5542 static struct iv_ca_delta *
5543 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5545 struct iv_ca_delta *last;
5547 if (!l2)
5548 return l1;
5550 if (!l1)
5551 return l2;
5553 for (last = l1; last->next_change; last = last->next_change)
5554 continue;
5555 last->next_change = l2;
5557 return l1;
5560 /* Reverse the list of changes DELTA, forming the inverse to it. */
5562 static struct iv_ca_delta *
5563 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5565 struct iv_ca_delta *act, *next, *prev = NULL;
5566 struct cost_pair *tmp;
5568 for (act = delta; act; act = next)
5570 next = act->next_change;
5571 act->next_change = prev;
5572 prev = act;
5574 tmp = act->old_cp;
5575 act->old_cp = act->new_cp;
5576 act->new_cp = tmp;
5579 return prev;
5582 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5583 reverted instead. */
5585 static void
5586 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5587 struct iv_ca_delta *delta, bool forward)
5589 struct cost_pair *from, *to;
5590 struct iv_ca_delta *act;
5592 if (!forward)
5593 delta = iv_ca_delta_reverse (delta);
5595 for (act = delta; act; act = act->next_change)
5597 from = act->old_cp;
5598 to = act->new_cp;
5599 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5600 iv_ca_set_cp (data, ivs, act->use, to);
5603 if (!forward)
5604 iv_ca_delta_reverse (delta);
5607 /* Returns true if CAND is used in IVS. */
5609 static bool
5610 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5612 return ivs->n_cand_uses[cand->id] > 0;
5615 /* Returns number of induction variable candidates in the set IVS. */
5617 static unsigned
5618 iv_ca_n_cands (struct iv_ca *ivs)
5620 return ivs->n_cands;
5623 /* Free the list of changes DELTA. */
5625 static void
5626 iv_ca_delta_free (struct iv_ca_delta **delta)
5628 struct iv_ca_delta *act, *next;
5630 for (act = *delta; act; act = next)
5632 next = act->next_change;
5633 free (act);
5636 *delta = NULL;
5639 /* Allocates new iv candidates assignment. */
5641 static struct iv_ca *
5642 iv_ca_new (struct ivopts_data *data)
5644 struct iv_ca *nw = XNEW (struct iv_ca);
5646 nw->upto = 0;
5647 nw->bad_uses = 0;
5648 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5649 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5650 nw->cands = BITMAP_ALLOC (NULL);
5651 nw->n_cands = 0;
5652 nw->n_regs = 0;
5653 nw->cand_use_cost = no_cost;
5654 nw->cand_cost = 0;
5655 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5656 nw->cost = no_cost;
5657 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5658 nw->num_used_inv_expr = 0;
5660 return nw;
5663 /* Free memory occupied by the set IVS. */
5665 static void
5666 iv_ca_free (struct iv_ca **ivs)
5668 free ((*ivs)->cand_for_use);
5669 free ((*ivs)->n_cand_uses);
5670 BITMAP_FREE ((*ivs)->cands);
5671 free ((*ivs)->n_invariant_uses);
5672 free ((*ivs)->used_inv_expr);
5673 free (*ivs);
5674 *ivs = NULL;
5677 /* Dumps IVS to FILE. */
5679 static void
5680 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5682 const char *pref = " invariants ";
5683 unsigned i;
5684 comp_cost cost = iv_ca_cost (ivs);
5686 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5687 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5688 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5689 bitmap_print (file, ivs->cands, " candidates: ","\n");
5691 for (i = 0; i < ivs->upto; i++)
5693 struct iv_use *use = iv_use (data, i);
5694 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5695 if (cp)
5696 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5697 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5698 else
5699 fprintf (file, " use:%d --> ??\n", use->id);
5702 for (i = 1; i <= data->max_inv_id; i++)
5703 if (ivs->n_invariant_uses[i])
5705 fprintf (file, "%s%d", pref, i);
5706 pref = ", ";
5708 fprintf (file, "\n\n");
5711 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5712 new set, and store differences in DELTA. Number of induction variables
5713 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5714 the function will try to find a solution with mimimal iv candidates. */
5716 static comp_cost
5717 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5718 struct iv_cand *cand, struct iv_ca_delta **delta,
5719 unsigned *n_ivs, bool min_ncand)
5721 unsigned i;
5722 comp_cost cost;
5723 struct iv_use *use;
5724 struct cost_pair *old_cp, *new_cp;
5726 *delta = NULL;
5727 for (i = 0; i < ivs->upto; i++)
5729 use = iv_use (data, i);
5730 old_cp = iv_ca_cand_for_use (ivs, use);
5732 if (old_cp
5733 && old_cp->cand == cand)
5734 continue;
5736 new_cp = get_use_iv_cost (data, use, cand);
5737 if (!new_cp)
5738 continue;
5740 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5741 continue;
5743 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5744 continue;
5746 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5749 iv_ca_delta_commit (data, ivs, *delta, true);
5750 cost = iv_ca_cost (ivs);
5751 if (n_ivs)
5752 *n_ivs = iv_ca_n_cands (ivs);
5753 iv_ca_delta_commit (data, ivs, *delta, false);
5755 return cost;
5758 /* Try narrowing set IVS by removing CAND. Return the cost of
5759 the new set and store the differences in DELTA. START is
5760 the candidate with which we start narrowing. */
5762 static comp_cost
5763 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5764 struct iv_cand *cand, struct iv_cand *start,
5765 struct iv_ca_delta **delta)
5767 unsigned i, ci;
5768 struct iv_use *use;
5769 struct cost_pair *old_cp, *new_cp, *cp;
5770 bitmap_iterator bi;
5771 struct iv_cand *cnd;
5772 comp_cost cost, best_cost, acost;
5774 *delta = NULL;
5775 for (i = 0; i < n_iv_uses (data); i++)
5777 use = iv_use (data, i);
5779 old_cp = iv_ca_cand_for_use (ivs, use);
5780 if (old_cp->cand != cand)
5781 continue;
5783 best_cost = iv_ca_cost (ivs);
5784 /* Start narrowing with START. */
5785 new_cp = get_use_iv_cost (data, use, start);
5787 if (data->consider_all_candidates)
5789 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5791 if (ci == cand->id || (start && ci == start->id))
5792 continue;
5794 cnd = iv_cand (data, ci);
5796 cp = get_use_iv_cost (data, use, cnd);
5797 if (!cp)
5798 continue;
5800 iv_ca_set_cp (data, ivs, use, cp);
5801 acost = iv_ca_cost (ivs);
5803 if (compare_costs (acost, best_cost) < 0)
5805 best_cost = acost;
5806 new_cp = cp;
5810 else
5812 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5814 if (ci == cand->id || (start && ci == start->id))
5815 continue;
5817 cnd = iv_cand (data, ci);
5819 cp = get_use_iv_cost (data, use, cnd);
5820 if (!cp)
5821 continue;
5823 iv_ca_set_cp (data, ivs, use, cp);
5824 acost = iv_ca_cost (ivs);
5826 if (compare_costs (acost, best_cost) < 0)
5828 best_cost = acost;
5829 new_cp = cp;
5833 /* Restore to old cp for use. */
5834 iv_ca_set_cp (data, ivs, use, old_cp);
5836 if (!new_cp)
5838 iv_ca_delta_free (delta);
5839 return infinite_cost;
5842 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5845 iv_ca_delta_commit (data, ivs, *delta, true);
5846 cost = iv_ca_cost (ivs);
5847 iv_ca_delta_commit (data, ivs, *delta, false);
5849 return cost;
5852 /* Try optimizing the set of candidates IVS by removing candidates different
5853 from to EXCEPT_CAND from it. Return cost of the new set, and store
5854 differences in DELTA. */
5856 static comp_cost
5857 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5858 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5860 bitmap_iterator bi;
5861 struct iv_ca_delta *act_delta, *best_delta;
5862 unsigned i;
5863 comp_cost best_cost, acost;
5864 struct iv_cand *cand;
5866 best_delta = NULL;
5867 best_cost = iv_ca_cost (ivs);
5869 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5871 cand = iv_cand (data, i);
5873 if (cand == except_cand)
5874 continue;
5876 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5878 if (compare_costs (acost, best_cost) < 0)
5880 best_cost = acost;
5881 iv_ca_delta_free (&best_delta);
5882 best_delta = act_delta;
5884 else
5885 iv_ca_delta_free (&act_delta);
5888 if (!best_delta)
5890 *delta = NULL;
5891 return best_cost;
5894 /* Recurse to possibly remove other unnecessary ivs. */
5895 iv_ca_delta_commit (data, ivs, best_delta, true);
5896 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5897 iv_ca_delta_commit (data, ivs, best_delta, false);
5898 *delta = iv_ca_delta_join (best_delta, *delta);
5899 return best_cost;
5902 /* Tries to extend the sets IVS in the best possible way in order
5903 to express the USE. If ORIGINALP is true, prefer candidates from
5904 the original set of IVs, otherwise favor important candidates not
5905 based on any memory object. */
5907 static bool
5908 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5909 struct iv_use *use, bool originalp)
5911 comp_cost best_cost, act_cost;
5912 unsigned i;
5913 bitmap_iterator bi;
5914 struct iv_cand *cand;
5915 struct iv_ca_delta *best_delta = NULL, *act_delta;
5916 struct cost_pair *cp;
5918 iv_ca_add_use (data, ivs, use, false);
5919 best_cost = iv_ca_cost (ivs);
5921 cp = iv_ca_cand_for_use (ivs, use);
5922 if (!cp)
5924 ivs->upto--;
5925 ivs->bad_uses--;
5926 iv_ca_add_use (data, ivs, use, true);
5927 best_cost = iv_ca_cost (ivs);
5928 cp = iv_ca_cand_for_use (ivs, use);
5930 if (cp)
5932 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5933 iv_ca_set_no_cp (data, ivs, use);
5936 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5937 first try important candidates not based on any memory object. Only if
5938 this fails, try the specific ones. Rationale -- in loops with many
5939 variables the best choice often is to use just one generic biv. If we
5940 added here many ivs specific to the uses, the optimization algorithm later
5941 would be likely to get stuck in a local minimum, thus causing us to create
5942 too many ivs. The approach from few ivs to more seems more likely to be
5943 successful -- starting from few ivs, replacing an expensive use by a
5944 specific iv should always be a win. */
5945 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5947 cand = iv_cand (data, i);
5949 if (originalp && cand->pos !=IP_ORIGINAL)
5950 continue;
5952 if (!originalp && cand->iv->base_object != NULL_TREE)
5953 continue;
5955 if (iv_ca_cand_used_p (ivs, cand))
5956 continue;
5958 cp = get_use_iv_cost (data, use, cand);
5959 if (!cp)
5960 continue;
5962 iv_ca_set_cp (data, ivs, use, cp);
5963 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5964 true);
5965 iv_ca_set_no_cp (data, ivs, use);
5966 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5968 if (compare_costs (act_cost, best_cost) < 0)
5970 best_cost = act_cost;
5972 iv_ca_delta_free (&best_delta);
5973 best_delta = act_delta;
5975 else
5976 iv_ca_delta_free (&act_delta);
5979 if (infinite_cost_p (best_cost))
5981 for (i = 0; i < use->n_map_members; i++)
5983 cp = use->cost_map + i;
5984 cand = cp->cand;
5985 if (!cand)
5986 continue;
5988 /* Already tried this. */
5989 if (cand->important)
5991 if (originalp && cand->pos == IP_ORIGINAL)
5992 continue;
5993 if (!originalp && cand->iv->base_object == NULL_TREE)
5994 continue;
5997 if (iv_ca_cand_used_p (ivs, cand))
5998 continue;
6000 act_delta = NULL;
6001 iv_ca_set_cp (data, ivs, use, cp);
6002 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6003 iv_ca_set_no_cp (data, ivs, use);
6004 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6005 cp, act_delta);
6007 if (compare_costs (act_cost, best_cost) < 0)
6009 best_cost = act_cost;
6011 if (best_delta)
6012 iv_ca_delta_free (&best_delta);
6013 best_delta = act_delta;
6015 else
6016 iv_ca_delta_free (&act_delta);
6020 iv_ca_delta_commit (data, ivs, best_delta, true);
6021 iv_ca_delta_free (&best_delta);
6023 return !infinite_cost_p (best_cost);
6026 /* Finds an initial assignment of candidates to uses. */
6028 static struct iv_ca *
6029 get_initial_solution (struct ivopts_data *data, bool originalp)
6031 struct iv_ca *ivs = iv_ca_new (data);
6032 unsigned i;
6034 for (i = 0; i < n_iv_uses (data); i++)
6035 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6037 iv_ca_free (&ivs);
6038 return NULL;
6041 return ivs;
6044 /* Tries to improve set of induction variables IVS. */
6046 static bool
6047 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6049 unsigned i, n_ivs;
6050 comp_cost acost, best_cost = iv_ca_cost (ivs);
6051 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6052 struct iv_cand *cand;
6054 /* Try extending the set of induction variables by one. */
6055 for (i = 0; i < n_iv_cands (data); i++)
6057 cand = iv_cand (data, i);
6059 if (iv_ca_cand_used_p (ivs, cand))
6060 continue;
6062 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6063 if (!act_delta)
6064 continue;
6066 /* If we successfully added the candidate and the set is small enough,
6067 try optimizing it by removing other candidates. */
6068 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6070 iv_ca_delta_commit (data, ivs, act_delta, true);
6071 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6072 iv_ca_delta_commit (data, ivs, act_delta, false);
6073 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6076 if (compare_costs (acost, best_cost) < 0)
6078 best_cost = acost;
6079 iv_ca_delta_free (&best_delta);
6080 best_delta = act_delta;
6082 else
6083 iv_ca_delta_free (&act_delta);
6086 if (!best_delta)
6088 /* Try removing the candidates from the set instead. */
6089 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6091 /* Nothing more we can do. */
6092 if (!best_delta)
6093 return false;
6096 iv_ca_delta_commit (data, ivs, best_delta, true);
6097 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6098 iv_ca_delta_free (&best_delta);
6099 return true;
6102 /* Attempts to find the optimal set of induction variables. We do simple
6103 greedy heuristic -- we try to replace at most one candidate in the selected
6104 solution and remove the unused ivs while this improves the cost. */
6106 static struct iv_ca *
6107 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6109 struct iv_ca *set;
6111 /* Get the initial solution. */
6112 set = get_initial_solution (data, originalp);
6113 if (!set)
6115 if (dump_file && (dump_flags & TDF_DETAILS))
6116 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6117 return NULL;
6120 if (dump_file && (dump_flags & TDF_DETAILS))
6122 fprintf (dump_file, "Initial set of candidates:\n");
6123 iv_ca_dump (data, dump_file, set);
6126 while (try_improve_iv_set (data, set))
6128 if (dump_file && (dump_flags & TDF_DETAILS))
6130 fprintf (dump_file, "Improved to:\n");
6131 iv_ca_dump (data, dump_file, set);
6135 return set;
6138 static struct iv_ca *
6139 find_optimal_iv_set (struct ivopts_data *data)
6141 unsigned i;
6142 struct iv_ca *set, *origset;
6143 struct iv_use *use;
6144 comp_cost cost, origcost;
6146 /* Determine the cost based on a strategy that starts with original IVs,
6147 and try again using a strategy that prefers candidates not based
6148 on any IVs. */
6149 origset = find_optimal_iv_set_1 (data, true);
6150 set = find_optimal_iv_set_1 (data, false);
6152 if (!origset && !set)
6153 return NULL;
6155 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6156 cost = set ? iv_ca_cost (set) : infinite_cost;
6158 if (dump_file && (dump_flags & TDF_DETAILS))
6160 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6161 origcost.cost, origcost.complexity);
6162 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6163 cost.cost, cost.complexity);
6166 /* Choose the one with the best cost. */
6167 if (compare_costs (origcost, cost) <= 0)
6169 if (set)
6170 iv_ca_free (&set);
6171 set = origset;
6173 else if (origset)
6174 iv_ca_free (&origset);
6176 for (i = 0; i < n_iv_uses (data); i++)
6178 use = iv_use (data, i);
6179 use->selected = iv_ca_cand_for_use (set, use)->cand;
6182 return set;
6185 /* Creates a new induction variable corresponding to CAND. */
6187 static void
6188 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6190 gimple_stmt_iterator incr_pos;
6191 tree base;
6192 bool after = false;
6194 if (!cand->iv)
6195 return;
6197 switch (cand->pos)
6199 case IP_NORMAL:
6200 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6201 break;
6203 case IP_END:
6204 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6205 after = true;
6206 break;
6208 case IP_AFTER_USE:
6209 after = true;
6210 /* fall through */
6211 case IP_BEFORE_USE:
6212 incr_pos = gsi_for_stmt (cand->incremented_at);
6213 break;
6215 case IP_ORIGINAL:
6216 /* Mark that the iv is preserved. */
6217 name_info (data, cand->var_before)->preserve_biv = true;
6218 name_info (data, cand->var_after)->preserve_biv = true;
6220 /* Rewrite the increment so that it uses var_before directly. */
6221 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6222 return;
6225 gimple_add_tmp_var (cand->var_before);
6227 base = unshare_expr (cand->iv->base);
6229 create_iv (base, unshare_expr (cand->iv->step),
6230 cand->var_before, data->current_loop,
6231 &incr_pos, after, &cand->var_before, &cand->var_after);
6234 /* Creates new induction variables described in SET. */
6236 static void
6237 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6239 unsigned i;
6240 struct iv_cand *cand;
6241 bitmap_iterator bi;
6243 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6245 cand = iv_cand (data, i);
6246 create_new_iv (data, cand);
6249 if (dump_file && (dump_flags & TDF_DETAILS))
6251 fprintf (dump_file, "\nSelected IV set: \n");
6252 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6254 cand = iv_cand (data, i);
6255 dump_cand (dump_file, cand);
6257 fprintf (dump_file, "\n");
6261 /* Rewrites USE (definition of iv used in a nonlinear expression)
6262 using candidate CAND. */
6264 static void
6265 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6266 struct iv_use *use, struct iv_cand *cand)
6268 tree comp;
6269 tree op, tgt;
6270 gimple ass;
6271 gimple_stmt_iterator bsi;
6273 /* An important special case -- if we are asked to express value of
6274 the original iv by itself, just exit; there is no need to
6275 introduce a new computation (that might also need casting the
6276 variable to unsigned and back). */
6277 if (cand->pos == IP_ORIGINAL
6278 && cand->incremented_at == use->stmt)
6280 enum tree_code stmt_code;
6282 gcc_assert (is_gimple_assign (use->stmt));
6283 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6285 /* Check whether we may leave the computation unchanged.
6286 This is the case only if it does not rely on other
6287 computations in the loop -- otherwise, the computation
6288 we rely upon may be removed in remove_unused_ivs,
6289 thus leading to ICE. */
6290 stmt_code = gimple_assign_rhs_code (use->stmt);
6291 if (stmt_code == PLUS_EXPR
6292 || stmt_code == MINUS_EXPR
6293 || stmt_code == POINTER_PLUS_EXPR)
6295 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6296 op = gimple_assign_rhs2 (use->stmt);
6297 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6298 op = gimple_assign_rhs1 (use->stmt);
6299 else
6300 op = NULL_TREE;
6302 else
6303 op = NULL_TREE;
6305 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6306 return;
6309 comp = get_computation (data->current_loop, use, cand);
6310 gcc_assert (comp != NULL_TREE);
6312 switch (gimple_code (use->stmt))
6314 case GIMPLE_PHI:
6315 tgt = PHI_RESULT (use->stmt);
6317 /* If we should keep the biv, do not replace it. */
6318 if (name_info (data, tgt)->preserve_biv)
6319 return;
6321 bsi = gsi_after_labels (gimple_bb (use->stmt));
6322 break;
6324 case GIMPLE_ASSIGN:
6325 tgt = gimple_assign_lhs (use->stmt);
6326 bsi = gsi_for_stmt (use->stmt);
6327 break;
6329 default:
6330 gcc_unreachable ();
6333 if (!valid_gimple_rhs_p (comp)
6334 || (gimple_code (use->stmt) != GIMPLE_PHI
6335 /* We can't allow re-allocating the stmt as it might be pointed
6336 to still. */
6337 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6338 >= gimple_num_ops (gsi_stmt (bsi)))))
6340 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6341 true, GSI_SAME_STMT);
6342 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6344 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6345 /* As this isn't a plain copy we have to reset alignment
6346 information. */
6347 if (SSA_NAME_PTR_INFO (comp))
6348 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6352 if (gimple_code (use->stmt) == GIMPLE_PHI)
6354 ass = gimple_build_assign (tgt, comp);
6355 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6357 bsi = gsi_for_stmt (use->stmt);
6358 remove_phi_node (&bsi, false);
6360 else
6362 gimple_assign_set_rhs_from_tree (&bsi, comp);
6363 use->stmt = gsi_stmt (bsi);
6367 /* Performs a peephole optimization to reorder the iv update statement with
6368 a mem ref to enable instruction combining in later phases. The mem ref uses
6369 the iv value before the update, so the reordering transformation requires
6370 adjustment of the offset. CAND is the selected IV_CAND.
6372 Example:
6374 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6375 iv2 = iv1 + 1;
6377 if (t < val) (1)
6378 goto L;
6379 goto Head;
6382 directly propagating t over to (1) will introduce overlapping live range
6383 thus increase register pressure. This peephole transform it into:
6386 iv2 = iv1 + 1;
6387 t = MEM_REF (base, iv2, 8, 8);
6388 if (t < val)
6389 goto L;
6390 goto Head;
6393 static void
6394 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6396 tree var_after;
6397 gimple iv_update, stmt;
6398 basic_block bb;
6399 gimple_stmt_iterator gsi, gsi_iv;
6401 if (cand->pos != IP_NORMAL)
6402 return;
6404 var_after = cand->var_after;
6405 iv_update = SSA_NAME_DEF_STMT (var_after);
6407 bb = gimple_bb (iv_update);
6408 gsi = gsi_last_nondebug_bb (bb);
6409 stmt = gsi_stmt (gsi);
6411 /* Only handle conditional statement for now. */
6412 if (gimple_code (stmt) != GIMPLE_COND)
6413 return;
6415 gsi_prev_nondebug (&gsi);
6416 stmt = gsi_stmt (gsi);
6417 if (stmt != iv_update)
6418 return;
6420 gsi_prev_nondebug (&gsi);
6421 if (gsi_end_p (gsi))
6422 return;
6424 stmt = gsi_stmt (gsi);
6425 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6426 return;
6428 if (stmt != use->stmt)
6429 return;
6431 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6432 return;
6434 if (dump_file && (dump_flags & TDF_DETAILS))
6436 fprintf (dump_file, "Reordering \n");
6437 print_gimple_stmt (dump_file, iv_update, 0, 0);
6438 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6439 fprintf (dump_file, "\n");
6442 gsi = gsi_for_stmt (use->stmt);
6443 gsi_iv = gsi_for_stmt (iv_update);
6444 gsi_move_before (&gsi_iv, &gsi);
6446 cand->pos = IP_BEFORE_USE;
6447 cand->incremented_at = use->stmt;
6450 /* Rewrites USE (address that is an iv) using candidate CAND. */
6452 static void
6453 rewrite_use_address (struct ivopts_data *data,
6454 struct iv_use *use, struct iv_cand *cand)
6456 aff_tree aff;
6457 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6458 tree base_hint = NULL_TREE;
6459 tree ref, iv;
6460 bool ok;
6462 adjust_iv_update_pos (cand, use);
6463 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6464 gcc_assert (ok);
6465 unshare_aff_combination (&aff);
6467 /* To avoid undefined overflow problems, all IV candidates use unsigned
6468 integer types. The drawback is that this makes it impossible for
6469 create_mem_ref to distinguish an IV that is based on a memory object
6470 from one that represents simply an offset.
6472 To work around this problem, we pass a hint to create_mem_ref that
6473 indicates which variable (if any) in aff is an IV based on a memory
6474 object. Note that we only consider the candidate. If this is not
6475 based on an object, the base of the reference is in some subexpression
6476 of the use -- but these will use pointer types, so they are recognized
6477 by the create_mem_ref heuristics anyway. */
6478 if (cand->iv->base_object)
6479 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6481 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6482 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6483 reference_alias_ptr_type (*use->op_p),
6484 iv, base_hint, data->speed);
6485 copy_ref_info (ref, *use->op_p);
6486 *use->op_p = ref;
6489 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6490 candidate CAND. */
6492 static void
6493 rewrite_use_compare (struct ivopts_data *data,
6494 struct iv_use *use, struct iv_cand *cand)
6496 tree comp, *var_p, op, bound;
6497 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6498 enum tree_code compare;
6499 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6500 bool ok;
6502 bound = cp->value;
6503 if (bound)
6505 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6506 tree var_type = TREE_TYPE (var);
6507 gimple_seq stmts;
6509 if (dump_file && (dump_flags & TDF_DETAILS))
6511 fprintf (dump_file, "Replacing exit test: ");
6512 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6514 compare = cp->comp;
6515 bound = unshare_expr (fold_convert (var_type, bound));
6516 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6517 if (stmts)
6518 gsi_insert_seq_on_edge_immediate (
6519 loop_preheader_edge (data->current_loop),
6520 stmts);
6522 gimple_cond_set_lhs (use->stmt, var);
6523 gimple_cond_set_code (use->stmt, compare);
6524 gimple_cond_set_rhs (use->stmt, op);
6525 return;
6528 /* The induction variable elimination failed; just express the original
6529 giv. */
6530 comp = get_computation (data->current_loop, use, cand);
6531 gcc_assert (comp != NULL_TREE);
6533 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6534 gcc_assert (ok);
6536 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6537 true, GSI_SAME_STMT);
6540 /* Rewrites USE using candidate CAND. */
6542 static void
6543 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6545 switch (use->type)
6547 case USE_NONLINEAR_EXPR:
6548 rewrite_use_nonlinear_expr (data, use, cand);
6549 break;
6551 case USE_ADDRESS:
6552 rewrite_use_address (data, use, cand);
6553 break;
6555 case USE_COMPARE:
6556 rewrite_use_compare (data, use, cand);
6557 break;
6559 default:
6560 gcc_unreachable ();
6563 update_stmt (use->stmt);
6566 /* Rewrite the uses using the selected induction variables. */
6568 static void
6569 rewrite_uses (struct ivopts_data *data)
6571 unsigned i;
6572 struct iv_cand *cand;
6573 struct iv_use *use;
6575 for (i = 0; i < n_iv_uses (data); i++)
6577 use = iv_use (data, i);
6578 cand = use->selected;
6579 gcc_assert (cand);
6581 rewrite_use (data, use, cand);
6585 /* Removes the ivs that are not used after rewriting. */
6587 static void
6588 remove_unused_ivs (struct ivopts_data *data)
6590 unsigned j;
6591 bitmap_iterator bi;
6592 bitmap toremove = BITMAP_ALLOC (NULL);
6594 /* Figure out an order in which to release SSA DEFs so that we don't
6595 release something that we'd have to propagate into a debug stmt
6596 afterwards. */
6597 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6599 struct version_info *info;
6601 info = ver_info (data, j);
6602 if (info->iv
6603 && !integer_zerop (info->iv->step)
6604 && !info->inv_id
6605 && !info->iv->have_use_for
6606 && !info->preserve_biv)
6608 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6610 tree def = info->iv->ssa_name;
6612 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6614 imm_use_iterator imm_iter;
6615 use_operand_p use_p;
6616 gimple stmt;
6617 int count = 0;
6619 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6621 if (!gimple_debug_bind_p (stmt))
6622 continue;
6624 /* We just want to determine whether to do nothing
6625 (count == 0), to substitute the computed
6626 expression into a single use of the SSA DEF by
6627 itself (count == 1), or to use a debug temp
6628 because the SSA DEF is used multiple times or as
6629 part of a larger expression (count > 1). */
6630 count++;
6631 if (gimple_debug_bind_get_value (stmt) != def)
6632 count++;
6634 if (count > 1)
6635 BREAK_FROM_IMM_USE_STMT (imm_iter);
6638 if (!count)
6639 continue;
6641 struct iv_use dummy_use;
6642 struct iv_cand *best_cand = NULL, *cand;
6643 unsigned i, best_pref = 0, cand_pref;
6645 memset (&dummy_use, 0, sizeof (dummy_use));
6646 dummy_use.iv = info->iv;
6647 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6649 cand = iv_use (data, i)->selected;
6650 if (cand == best_cand)
6651 continue;
6652 cand_pref = operand_equal_p (cand->iv->step,
6653 info->iv->step, 0)
6654 ? 4 : 0;
6655 cand_pref
6656 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6657 == TYPE_MODE (TREE_TYPE (info->iv->base))
6658 ? 2 : 0;
6659 cand_pref
6660 += TREE_CODE (cand->iv->base) == INTEGER_CST
6661 ? 1 : 0;
6662 if (best_cand == NULL || best_pref < cand_pref)
6664 best_cand = cand;
6665 best_pref = cand_pref;
6669 if (!best_cand)
6670 continue;
6672 tree comp = get_computation_at (data->current_loop,
6673 &dummy_use, best_cand,
6674 SSA_NAME_DEF_STMT (def));
6675 if (!comp)
6676 continue;
6678 if (count > 1)
6680 tree vexpr = make_node (DEBUG_EXPR_DECL);
6681 DECL_ARTIFICIAL (vexpr) = 1;
6682 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6683 if (SSA_NAME_VAR (def))
6684 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6685 else
6686 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6687 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6688 gimple_stmt_iterator gsi;
6690 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6691 gsi = gsi_after_labels (gimple_bb
6692 (SSA_NAME_DEF_STMT (def)));
6693 else
6694 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6696 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6697 comp = vexpr;
6700 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6702 if (!gimple_debug_bind_p (stmt))
6703 continue;
6705 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6706 SET_USE (use_p, comp);
6708 update_stmt (stmt);
6714 release_defs_bitset (toremove);
6716 BITMAP_FREE (toremove);
6719 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6720 for hash_map::traverse. */
6722 bool
6723 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6725 free (value);
6726 return true;
6729 /* Frees data allocated by the optimization of a single loop. */
6731 static void
6732 free_loop_data (struct ivopts_data *data)
6734 unsigned i, j;
6735 bitmap_iterator bi;
6736 tree obj;
6738 if (data->niters)
6740 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6741 delete data->niters;
6742 data->niters = NULL;
6745 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6747 struct version_info *info;
6749 info = ver_info (data, i);
6750 free (info->iv);
6751 info->iv = NULL;
6752 info->has_nonlin_use = false;
6753 info->preserve_biv = false;
6754 info->inv_id = 0;
6756 bitmap_clear (data->relevant);
6757 bitmap_clear (data->important_candidates);
6759 for (i = 0; i < n_iv_uses (data); i++)
6761 struct iv_use *use = iv_use (data, i);
6763 free (use->iv);
6764 BITMAP_FREE (use->related_cands);
6765 for (j = 0; j < use->n_map_members; j++)
6766 if (use->cost_map[j].depends_on)
6767 BITMAP_FREE (use->cost_map[j].depends_on);
6768 free (use->cost_map);
6769 free (use);
6771 data->iv_uses.truncate (0);
6773 for (i = 0; i < n_iv_cands (data); i++)
6775 struct iv_cand *cand = iv_cand (data, i);
6777 free (cand->iv);
6778 if (cand->depends_on)
6779 BITMAP_FREE (cand->depends_on);
6780 free (cand);
6782 data->iv_candidates.truncate (0);
6784 if (data->version_info_size < num_ssa_names)
6786 data->version_info_size = 2 * num_ssa_names;
6787 free (data->version_info);
6788 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6791 data->max_inv_id = 0;
6793 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6794 SET_DECL_RTL (obj, NULL_RTX);
6796 decl_rtl_to_reset.truncate (0);
6798 data->inv_expr_tab->empty ();
6799 data->inv_expr_id = 0;
6802 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6803 loop tree. */
6805 static void
6806 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6808 free_loop_data (data);
6809 free (data->version_info);
6810 BITMAP_FREE (data->relevant);
6811 BITMAP_FREE (data->important_candidates);
6813 decl_rtl_to_reset.release ();
6814 data->iv_uses.release ();
6815 data->iv_candidates.release ();
6816 delete data->inv_expr_tab;
6817 data->inv_expr_tab = NULL;
6820 /* Returns true if the loop body BODY includes any function calls. */
6822 static bool
6823 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6825 gimple_stmt_iterator gsi;
6826 unsigned i;
6828 for (i = 0; i < num_nodes; i++)
6829 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6831 gimple stmt = gsi_stmt (gsi);
6832 if (is_gimple_call (stmt)
6833 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6834 return true;
6836 return false;
6839 /* Optimizes the LOOP. Returns true if anything changed. */
6841 static bool
6842 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6844 bool changed = false;
6845 struct iv_ca *iv_ca;
6846 edge exit = single_dom_exit (loop);
6847 basic_block *body;
6849 gcc_assert (!data->niters);
6850 data->current_loop = loop;
6851 data->speed = optimize_loop_for_speed_p (loop);
6853 if (dump_file && (dump_flags & TDF_DETAILS))
6855 fprintf (dump_file, "Processing loop %d\n", loop->num);
6857 if (exit)
6859 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6860 exit->src->index, exit->dest->index);
6861 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6862 fprintf (dump_file, "\n");
6865 fprintf (dump_file, "\n");
6868 body = get_loop_body (loop);
6869 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6870 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6871 free (body);
6873 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6875 /* For each ssa name determines whether it behaves as an induction variable
6876 in some loop. */
6877 if (!find_induction_variables (data))
6878 goto finish;
6880 /* Finds interesting uses (item 1). */
6881 find_interesting_uses (data);
6882 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6883 goto finish;
6885 /* Finds candidates for the induction variables (item 2). */
6886 find_iv_candidates (data);
6888 /* Calculates the costs (item 3, part 1). */
6889 determine_iv_costs (data);
6890 determine_use_iv_costs (data);
6891 determine_set_costs (data);
6893 /* Find the optimal set of induction variables (item 3, part 2). */
6894 iv_ca = find_optimal_iv_set (data);
6895 if (!iv_ca)
6896 goto finish;
6897 changed = true;
6899 /* Create the new induction variables (item 4, part 1). */
6900 create_new_ivs (data, iv_ca);
6901 iv_ca_free (&iv_ca);
6903 /* Rewrite the uses (item 4, part 2). */
6904 rewrite_uses (data);
6906 /* Remove the ivs that are unused after rewriting. */
6907 remove_unused_ivs (data);
6909 /* We have changed the structure of induction variables; it might happen
6910 that definitions in the scev database refer to some of them that were
6911 eliminated. */
6912 scev_reset ();
6914 finish:
6915 free_loop_data (data);
6917 return changed;
6920 /* Main entry point. Optimizes induction variables in loops. */
6922 void
6923 tree_ssa_iv_optimize (void)
6925 struct loop *loop;
6926 struct ivopts_data data;
6928 tree_ssa_iv_optimize_init (&data);
6930 /* Optimize the loops starting with the innermost ones. */
6931 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6933 if (dump_file && (dump_flags & TDF_DETAILS))
6934 flow_loop_dump (loop, dump_file, NULL, 1);
6936 tree_ssa_iv_optimize_loop (&data, loop);
6939 tree_ssa_iv_optimize_finalize (&data);