Merge trunk version 211672 into gupc branch.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob7546ff6fe8361557d27c46aafd21bf795b97c46d
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand *selected;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
217 enum iv_position
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
226 /* The induction variable candidate. */
227 struct iv_cand
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
252 tree expr;
253 int id;
254 hashval_t hash;
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use *iv_use_p;
261 typedef struct iv_cand *iv_cand_p;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
273 /* Hash function for loop invariant expressions. */
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
278 return expr->hash;
281 /* Hash table equality function for expressions. */
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
290 struct ivopts_data
292 /* The currently optimized loop. */
293 struct loop *current_loop;
295 /* Numbers of iterations for all exits of the current loop. */
296 struct pointer_map_t *niters;
298 /* Number of registers used in it. */
299 unsigned regs_used;
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table <iv_inv_expr_hasher> inv_expr_tab;
311 /* Loop invariant expression id. */
312 int inv_expr_id;
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
326 /* The maximum invariant id. */
327 unsigned max_inv_id;
329 /* Whether to consider just related and important candidates when replacing a
330 use. */
331 bool consider_all_candidates;
333 /* Are we optimizing for speed? */
334 bool speed;
336 /* Whether the loop body includes any function calls. */
337 bool body_includes_call;
339 /* Whether the loop body can only be exited via single exit. */
340 bool loop_single_exit_p;
343 /* An assignment of iv candidates to uses. */
345 struct iv_ca
347 /* The number of uses covered by the assignment. */
348 unsigned upto;
350 /* Number of uses that cannot be expressed by the candidates in the set. */
351 unsigned bad_uses;
353 /* Candidate assigned to a use, together with the related costs. */
354 struct cost_pair **cand_for_use;
356 /* Number of times each candidate is used. */
357 unsigned *n_cand_uses;
359 /* The candidates used. */
360 bitmap cands;
362 /* The number of candidates in the set. */
363 unsigned n_cands;
365 /* Total number of registers needed. */
366 unsigned n_regs;
368 /* Total cost of expressing uses. */
369 comp_cost cand_use_cost;
371 /* Total cost of candidates. */
372 unsigned cand_cost;
374 /* Number of times each invariant is used. */
375 unsigned *n_invariant_uses;
377 /* The array holding the number of uses of each loop
378 invariant expressions created by ivopt. */
379 unsigned *used_inv_expr;
381 /* The number of created loop invariants. */
382 unsigned num_used_inv_expr;
384 /* Total cost of the assignment. */
385 comp_cost cost;
388 /* Difference of two iv candidate assignments. */
390 struct iv_ca_delta
392 /* Changed use. */
393 struct iv_use *use;
395 /* An old assignment (for rollback purposes). */
396 struct cost_pair *old_cp;
398 /* A new assignment. */
399 struct cost_pair *new_cp;
401 /* Next change in the list. */
402 struct iv_ca_delta *next_change;
405 /* Bound on number of candidates below that all candidates are considered. */
407 #define CONSIDER_ALL_CANDIDATES_BOUND \
408 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
411 optimizing such a loop would help, and it would take ages). */
413 #define MAX_CONSIDERED_USES \
414 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
416 /* If there are at most this number of ivs in the set, try removing unnecessary
417 ivs from the set always. */
419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
422 /* The list of trees for that the decl_rtl field must be reset is stored
423 here. */
425 static vec<tree> decl_rtl_to_reset;
427 static comp_cost force_expr_to_var_cost (tree, bool);
429 /* Number of uses recorded in DATA. */
431 static inline unsigned
432 n_iv_uses (struct ivopts_data *data)
434 return data->iv_uses.length ();
437 /* Ith use recorded in DATA. */
439 static inline struct iv_use *
440 iv_use (struct ivopts_data *data, unsigned i)
442 return data->iv_uses[i];
445 /* Number of candidates recorded in DATA. */
447 static inline unsigned
448 n_iv_cands (struct ivopts_data *data)
450 return data->iv_candidates.length ();
453 /* Ith candidate recorded in DATA. */
455 static inline struct iv_cand *
456 iv_cand (struct ivopts_data *data, unsigned i)
458 return data->iv_candidates[i];
461 /* The single loop exit if it dominates the latch, NULL otherwise. */
463 edge
464 single_dom_exit (struct loop *loop)
466 edge exit = single_exit (loop);
468 if (!exit)
469 return NULL;
471 if (!just_once_each_iteration_p (loop, exit->src))
472 return NULL;
474 return exit;
477 /* Dumps information about the induction variable IV to FILE. */
479 void
480 dump_iv (FILE *file, struct iv *iv)
482 if (iv->ssa_name)
484 fprintf (file, "ssa name ");
485 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
486 fprintf (file, "\n");
489 fprintf (file, " type ");
490 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
491 fprintf (file, "\n");
493 if (iv->step)
495 fprintf (file, " base ");
496 print_generic_expr (file, iv->base, TDF_SLIM);
497 fprintf (file, "\n");
499 fprintf (file, " step ");
500 print_generic_expr (file, iv->step, TDF_SLIM);
501 fprintf (file, "\n");
503 else
505 fprintf (file, " invariant ");
506 print_generic_expr (file, iv->base, TDF_SLIM);
507 fprintf (file, "\n");
510 if (iv->base_object)
512 fprintf (file, " base object ");
513 print_generic_expr (file, iv->base_object, TDF_SLIM);
514 fprintf (file, "\n");
517 if (iv->biv_p)
518 fprintf (file, " is a biv\n");
521 /* Dumps information about the USE to FILE. */
523 void
524 dump_use (FILE *file, struct iv_use *use)
526 fprintf (file, "use %d\n", use->id);
528 switch (use->type)
530 case USE_NONLINEAR_EXPR:
531 fprintf (file, " generic\n");
532 break;
534 case USE_ADDRESS:
535 fprintf (file, " address\n");
536 break;
538 case USE_COMPARE:
539 fprintf (file, " compare\n");
540 break;
542 default:
543 gcc_unreachable ();
546 fprintf (file, " in statement ");
547 print_gimple_stmt (file, use->stmt, 0, 0);
548 fprintf (file, "\n");
550 fprintf (file, " at position ");
551 if (use->op_p)
552 print_generic_expr (file, *use->op_p, TDF_SLIM);
553 fprintf (file, "\n");
555 dump_iv (file, use->iv);
557 if (use->related_cands)
559 fprintf (file, " related candidates ");
560 dump_bitmap (file, use->related_cands);
564 /* Dumps information about the uses to FILE. */
566 void
567 dump_uses (FILE *file, struct ivopts_data *data)
569 unsigned i;
570 struct iv_use *use;
572 for (i = 0; i < n_iv_uses (data); i++)
574 use = iv_use (data, i);
576 dump_use (file, use);
577 fprintf (file, "\n");
581 /* Dumps information about induction variable candidate CAND to FILE. */
583 void
584 dump_cand (FILE *file, struct iv_cand *cand)
586 struct iv *iv = cand->iv;
588 fprintf (file, "candidate %d%s\n",
589 cand->id, cand->important ? " (important)" : "");
591 if (cand->depends_on)
593 fprintf (file, " depends on ");
594 dump_bitmap (file, cand->depends_on);
597 if (!iv)
599 fprintf (file, " final value replacement\n");
600 return;
603 if (cand->var_before)
605 fprintf (file, " var_before ");
606 print_generic_expr (file, cand->var_before, TDF_SLIM);
607 fprintf (file, "\n");
609 if (cand->var_after)
611 fprintf (file, " var_after ");
612 print_generic_expr (file, cand->var_after, TDF_SLIM);
613 fprintf (file, "\n");
616 switch (cand->pos)
618 case IP_NORMAL:
619 fprintf (file, " incremented before exit test\n");
620 break;
622 case IP_BEFORE_USE:
623 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
624 break;
626 case IP_AFTER_USE:
627 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
628 break;
630 case IP_END:
631 fprintf (file, " incremented at end\n");
632 break;
634 case IP_ORIGINAL:
635 fprintf (file, " original biv\n");
636 break;
639 dump_iv (file, iv);
642 /* Returns the info for ssa version VER. */
644 static inline struct version_info *
645 ver_info (struct ivopts_data *data, unsigned ver)
647 return data->version_info + ver;
650 /* Returns the info for ssa name NAME. */
652 static inline struct version_info *
653 name_info (struct ivopts_data *data, tree name)
655 return ver_info (data, SSA_NAME_VERSION (name));
658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
659 emitted in LOOP. */
661 static bool
662 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
664 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
666 gcc_assert (bb);
668 if (sbb == loop->latch)
669 return true;
671 if (sbb != bb)
672 return false;
674 return stmt == last_stmt (bb);
677 /* Returns true if STMT if after the place where the original induction
678 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
679 if the positions are identical. */
681 static bool
682 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
684 basic_block cand_bb = gimple_bb (cand->incremented_at);
685 basic_block stmt_bb = gimple_bb (stmt);
687 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
688 return false;
690 if (stmt_bb != cand_bb)
691 return true;
693 if (true_if_equal
694 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
695 return true;
696 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
699 /* Returns true if STMT if after the place where the induction variable
700 CAND is incremented in LOOP. */
702 static bool
703 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
705 switch (cand->pos)
707 case IP_END:
708 return false;
710 case IP_NORMAL:
711 return stmt_after_ip_normal_pos (loop, stmt);
713 case IP_ORIGINAL:
714 case IP_AFTER_USE:
715 return stmt_after_inc_pos (cand, stmt, false);
717 case IP_BEFORE_USE:
718 return stmt_after_inc_pos (cand, stmt, true);
720 default:
721 gcc_unreachable ();
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
727 static bool
728 abnormal_ssa_name_p (tree exp)
730 if (!exp)
731 return false;
733 if (TREE_CODE (exp) != SSA_NAME)
734 return false;
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
742 static bool
743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
744 void *data ATTRIBUTE_UNUSED)
746 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
749 return false;
750 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
751 return false;
754 return !abnormal_ssa_name_p (*index);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
760 bool
761 contains_abnormal_ssa_name_p (tree expr)
763 enum tree_code code;
764 enum tree_code_class codeclass;
766 if (!expr)
767 return false;
769 code = TREE_CODE (expr);
770 codeclass = TREE_CODE_CLASS (code);
772 if (code == SSA_NAME)
773 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
775 if (code == INTEGER_CST
776 || is_gimple_min_invariant (expr))
777 return false;
779 if (code == ADDR_EXPR)
780 return !for_each_index (&TREE_OPERAND (expr, 0),
781 idx_contains_abnormal_ssa_name_p,
782 NULL);
784 if (code == COND_EXPR)
785 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
787 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
789 switch (codeclass)
791 case tcc_binary:
792 case tcc_comparison:
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
794 return true;
796 /* Fallthru. */
797 case tcc_unary:
798 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
799 return true;
801 break;
803 default:
804 gcc_unreachable ();
807 return false;
810 /* Returns the structure describing number of iterations determined from
811 EXIT of DATA->current_loop, or NULL if something goes wrong. */
813 static struct tree_niter_desc *
814 niter_for_exit (struct ivopts_data *data, edge exit)
816 struct tree_niter_desc *desc;
817 void **slot;
819 if (!data->niters)
821 data->niters = pointer_map_create ();
822 slot = NULL;
824 else
825 slot = pointer_map_contains (data->niters, exit);
827 if (!slot)
829 /* Try to determine number of iterations. We cannot safely work with ssa
830 names that appear in phi nodes on abnormal edges, so that we do not
831 create overlapping life ranges for them (PR 27283). */
832 desc = XNEW (struct tree_niter_desc);
833 if (!number_of_iterations_exit (data->current_loop,
834 exit, desc, true)
835 || contains_abnormal_ssa_name_p (desc->niter))
837 XDELETE (desc);
838 desc = NULL;
840 slot = pointer_map_insert (data->niters, exit);
841 *slot = desc;
843 else
844 desc = (struct tree_niter_desc *) *slot;
846 return desc;
849 /* Returns the structure describing number of iterations determined from
850 single dominating exit of DATA->current_loop, or NULL if something
851 goes wrong. */
853 static struct tree_niter_desc *
854 niter_for_single_dom_exit (struct ivopts_data *data)
856 edge exit = single_dom_exit (data->current_loop);
858 if (!exit)
859 return NULL;
861 return niter_for_exit (data, exit);
864 /* Initializes data structures used by the iv optimization pass, stored
865 in DATA. */
867 static void
868 tree_ssa_iv_optimize_init (struct ivopts_data *data)
870 data->version_info_size = 2 * num_ssa_names;
871 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
872 data->relevant = BITMAP_ALLOC (NULL);
873 data->important_candidates = BITMAP_ALLOC (NULL);
874 data->max_inv_id = 0;
875 data->niters = NULL;
876 data->iv_uses.create (20);
877 data->iv_candidates.create (20);
878 data->inv_expr_tab.create (10);
879 data->inv_expr_id = 0;
880 decl_rtl_to_reset.create (20);
883 /* Returns a memory object to that EXPR points. In case we are able to
884 determine that it does not point to any such object, NULL is returned. */
886 static tree
887 determine_base_object (tree expr)
889 enum tree_code code = TREE_CODE (expr);
890 tree base, obj;
892 /* If this is a pointer casted to any type, we need to determine
893 the base object for the pointer; so handle conversions before
894 throwing away non-pointer expressions. */
895 if (CONVERT_EXPR_P (expr))
896 return determine_base_object (TREE_OPERAND (expr, 0));
898 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
899 return NULL_TREE;
901 switch (code)
903 case INTEGER_CST:
904 return NULL_TREE;
906 case ADDR_EXPR:
907 obj = TREE_OPERAND (expr, 0);
908 base = get_base_address (obj);
910 if (!base)
911 return expr;
913 if (TREE_CODE (base) == MEM_REF)
914 return determine_base_object (TREE_OPERAND (base, 0));
916 return fold_convert (ptr_type_node,
917 build_fold_addr_expr (base));
919 case POINTER_PLUS_EXPR:
920 return determine_base_object (TREE_OPERAND (expr, 0));
922 case PLUS_EXPR:
923 case MINUS_EXPR:
924 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
925 gcc_unreachable ();
927 default:
928 return fold_convert (ptr_type_node, expr);
932 /* Return true if address expression with non-DECL_P operand appears
933 in EXPR. */
935 static bool
936 contain_complex_addr_expr (tree expr)
938 bool res = false;
940 STRIP_NOPS (expr);
941 switch (TREE_CODE (expr))
943 case POINTER_PLUS_EXPR:
944 case PLUS_EXPR:
945 case MINUS_EXPR:
946 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
947 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
948 break;
950 case ADDR_EXPR:
951 return (!DECL_P (TREE_OPERAND (expr, 0)));
953 default:
954 return false;
957 return res;
960 /* Allocates an induction variable with given initial value BASE and step STEP
961 for loop LOOP. */
963 static struct iv *
964 alloc_iv (tree base, tree step)
966 tree expr = base;
967 struct iv *iv = XCNEW (struct iv);
968 gcc_assert (step != NULL_TREE);
970 /* Lower address expression in base except ones with DECL_P as operand.
971 By doing this:
972 1) More accurate cost can be computed for address expressions;
973 2) Duplicate candidates won't be created for bases in different
974 forms, like &a[0] and &a. */
975 STRIP_NOPS (expr);
976 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
977 || contain_complex_addr_expr (expr))
979 aff_tree comb;
980 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
981 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
984 iv->base = base;
985 iv->base_object = determine_base_object (base);
986 iv->step = step;
987 iv->biv_p = false;
988 iv->have_use_for = false;
989 iv->use_id = 0;
990 iv->ssa_name = NULL_TREE;
992 return iv;
995 /* Sets STEP and BASE for induction variable IV. */
997 static void
998 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1000 struct version_info *info = name_info (data, iv);
1002 gcc_assert (!info->iv);
1004 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1005 info->iv = alloc_iv (base, step);
1006 info->iv->ssa_name = iv;
1009 /* Finds induction variable declaration for VAR. */
1011 static struct iv *
1012 get_iv (struct ivopts_data *data, tree var)
1014 basic_block bb;
1015 tree type = TREE_TYPE (var);
1017 if (!POINTER_TYPE_P (type)
1018 && !INTEGRAL_TYPE_P (type))
1019 return NULL;
1021 if (!name_info (data, var)->iv)
1023 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1025 if (!bb
1026 || !flow_bb_inside_loop_p (data->current_loop, bb))
1027 set_iv (data, var, var, build_int_cst (type, 0));
1030 return name_info (data, var)->iv;
1033 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1034 not define a simple affine biv with nonzero step. */
1036 static tree
1037 determine_biv_step (gimple phi)
1039 struct loop *loop = gimple_bb (phi)->loop_father;
1040 tree name = PHI_RESULT (phi);
1041 affine_iv iv;
1043 if (virtual_operand_p (name))
1044 return NULL_TREE;
1046 if (!simple_iv (loop, loop, name, &iv, true))
1047 return NULL_TREE;
1049 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1052 /* Finds basic ivs. */
1054 static bool
1055 find_bivs (struct ivopts_data *data)
1057 gimple phi;
1058 tree step, type, base;
1059 bool found = false;
1060 struct loop *loop = data->current_loop;
1061 gimple_stmt_iterator psi;
1063 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1065 phi = gsi_stmt (psi);
1067 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1068 continue;
1070 step = determine_biv_step (phi);
1071 if (!step)
1072 continue;
1074 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1075 base = expand_simple_operations (base);
1076 if (contains_abnormal_ssa_name_p (base)
1077 || contains_abnormal_ssa_name_p (step))
1078 continue;
1080 type = TREE_TYPE (PHI_RESULT (phi));
1081 base = fold_convert (type, base);
1082 if (step)
1084 if (POINTER_TYPE_P (type))
1085 step = convert_to_ptrofftype (step);
1086 else
1087 step = fold_convert (type, step);
1090 set_iv (data, PHI_RESULT (phi), base, step);
1091 found = true;
1094 return found;
1097 /* Marks basic ivs. */
1099 static void
1100 mark_bivs (struct ivopts_data *data)
1102 gimple phi, def;
1103 tree var;
1104 struct iv *iv, *incr_iv;
1105 struct loop *loop = data->current_loop;
1106 basic_block incr_bb;
1107 gimple_stmt_iterator psi;
1109 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1111 phi = gsi_stmt (psi);
1113 iv = get_iv (data, PHI_RESULT (phi));
1114 if (!iv)
1115 continue;
1117 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1118 def = SSA_NAME_DEF_STMT (var);
1119 /* Don't mark iv peeled from other one as biv. */
1120 if (def
1121 && gimple_code (def) == GIMPLE_PHI
1122 && gimple_bb (def) == loop->header)
1123 continue;
1125 incr_iv = get_iv (data, var);
1126 if (!incr_iv)
1127 continue;
1129 /* If the increment is in the subloop, ignore it. */
1130 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1131 if (incr_bb->loop_father != data->current_loop
1132 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1133 continue;
1135 iv->biv_p = true;
1136 incr_iv->biv_p = true;
1140 /* Checks whether STMT defines a linear induction variable and stores its
1141 parameters to IV. */
1143 static bool
1144 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1146 tree lhs;
1147 struct loop *loop = data->current_loop;
1149 iv->base = NULL_TREE;
1150 iv->step = NULL_TREE;
1152 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1153 return false;
1155 lhs = gimple_assign_lhs (stmt);
1156 if (TREE_CODE (lhs) != SSA_NAME)
1157 return false;
1159 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1160 return false;
1161 iv->base = expand_simple_operations (iv->base);
1163 if (contains_abnormal_ssa_name_p (iv->base)
1164 || contains_abnormal_ssa_name_p (iv->step))
1165 return false;
1167 /* If STMT could throw, then do not consider STMT as defining a GIV.
1168 While this will suppress optimizations, we can not safely delete this
1169 GIV and associated statements, even if it appears it is not used. */
1170 if (stmt_could_throw_p (stmt))
1171 return false;
1173 return true;
1176 /* Finds general ivs in statement STMT. */
1178 static void
1179 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1181 affine_iv iv;
1183 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1184 return;
1186 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1189 /* Finds general ivs in basic block BB. */
1191 static void
1192 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1194 gimple_stmt_iterator bsi;
1196 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1197 find_givs_in_stmt (data, gsi_stmt (bsi));
1200 /* Finds general ivs. */
1202 static void
1203 find_givs (struct ivopts_data *data)
1205 struct loop *loop = data->current_loop;
1206 basic_block *body = get_loop_body_in_dom_order (loop);
1207 unsigned i;
1209 for (i = 0; i < loop->num_nodes; i++)
1210 find_givs_in_bb (data, body[i]);
1211 free (body);
1214 /* For each ssa name defined in LOOP determines whether it is an induction
1215 variable and if so, its initial value and step. */
1217 static bool
1218 find_induction_variables (struct ivopts_data *data)
1220 unsigned i;
1221 bitmap_iterator bi;
1223 if (!find_bivs (data))
1224 return false;
1226 find_givs (data);
1227 mark_bivs (data);
1229 if (dump_file && (dump_flags & TDF_DETAILS))
1231 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1233 if (niter)
1235 fprintf (dump_file, " number of iterations ");
1236 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1237 if (!integer_zerop (niter->may_be_zero))
1239 fprintf (dump_file, "; zero if ");
1240 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1242 fprintf (dump_file, "\n\n");
1245 fprintf (dump_file, "Induction variables:\n\n");
1247 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1249 if (ver_info (data, i)->iv)
1250 dump_iv (dump_file, ver_info (data, i)->iv);
1254 return true;
1257 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1259 static struct iv_use *
1260 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1261 gimple stmt, enum use_type use_type)
1263 struct iv_use *use = XCNEW (struct iv_use);
1265 use->id = n_iv_uses (data);
1266 use->type = use_type;
1267 use->iv = iv;
1268 use->stmt = stmt;
1269 use->op_p = use_p;
1270 use->related_cands = BITMAP_ALLOC (NULL);
1272 /* To avoid showing ssa name in the dumps, if it was not reset by the
1273 caller. */
1274 iv->ssa_name = NULL_TREE;
1276 if (dump_file && (dump_flags & TDF_DETAILS))
1277 dump_use (dump_file, use);
1279 data->iv_uses.safe_push (use);
1281 return use;
1284 /* Checks whether OP is a loop-level invariant and if so, records it.
1285 NONLINEAR_USE is true if the invariant is used in a way we do not
1286 handle specially. */
1288 static void
1289 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1291 basic_block bb;
1292 struct version_info *info;
1294 if (TREE_CODE (op) != SSA_NAME
1295 || virtual_operand_p (op))
1296 return;
1298 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1299 if (bb
1300 && flow_bb_inside_loop_p (data->current_loop, bb))
1301 return;
1303 info = name_info (data, op);
1304 info->name = op;
1305 info->has_nonlin_use |= nonlinear_use;
1306 if (!info->inv_id)
1307 info->inv_id = ++data->max_inv_id;
1308 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1311 /* Checks whether the use OP is interesting and if so, records it. */
1313 static struct iv_use *
1314 find_interesting_uses_op (struct ivopts_data *data, tree op)
1316 struct iv *iv;
1317 struct iv *civ;
1318 gimple stmt;
1319 struct iv_use *use;
1321 if (TREE_CODE (op) != SSA_NAME)
1322 return NULL;
1324 iv = get_iv (data, op);
1325 if (!iv)
1326 return NULL;
1328 if (iv->have_use_for)
1330 use = iv_use (data, iv->use_id);
1332 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1333 return use;
1336 if (integer_zerop (iv->step))
1338 record_invariant (data, op, true);
1339 return NULL;
1341 iv->have_use_for = true;
1343 civ = XNEW (struct iv);
1344 *civ = *iv;
1346 stmt = SSA_NAME_DEF_STMT (op);
1347 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1348 || is_gimple_assign (stmt));
1350 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1351 iv->use_id = use->id;
1353 return use;
1356 /* Given a condition in statement STMT, checks whether it is a compare
1357 of an induction variable and an invariant. If this is the case,
1358 CONTROL_VAR is set to location of the iv, BOUND to the location of
1359 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1360 induction variable descriptions, and true is returned. If this is not
1361 the case, CONTROL_VAR and BOUND are set to the arguments of the
1362 condition and false is returned. */
1364 static bool
1365 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1366 tree **control_var, tree **bound,
1367 struct iv **iv_var, struct iv **iv_bound)
1369 /* The objects returned when COND has constant operands. */
1370 static struct iv const_iv;
1371 static tree zero;
1372 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1373 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1374 bool ret = false;
1376 if (gimple_code (stmt) == GIMPLE_COND)
1378 op0 = gimple_cond_lhs_ptr (stmt);
1379 op1 = gimple_cond_rhs_ptr (stmt);
1381 else
1383 op0 = gimple_assign_rhs1_ptr (stmt);
1384 op1 = gimple_assign_rhs2_ptr (stmt);
1387 zero = integer_zero_node;
1388 const_iv.step = integer_zero_node;
1390 if (TREE_CODE (*op0) == SSA_NAME)
1391 iv0 = get_iv (data, *op0);
1392 if (TREE_CODE (*op1) == SSA_NAME)
1393 iv1 = get_iv (data, *op1);
1395 /* Exactly one of the compared values must be an iv, and the other one must
1396 be an invariant. */
1397 if (!iv0 || !iv1)
1398 goto end;
1400 if (integer_zerop (iv0->step))
1402 /* Control variable may be on the other side. */
1403 tmp_op = op0; op0 = op1; op1 = tmp_op;
1404 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1406 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1408 end:
1409 if (control_var)
1410 *control_var = op0;;
1411 if (iv_var)
1412 *iv_var = iv0;;
1413 if (bound)
1414 *bound = op1;
1415 if (iv_bound)
1416 *iv_bound = iv1;
1418 return ret;
1421 /* Checks whether the condition in STMT is interesting and if so,
1422 records it. */
1424 static void
1425 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1427 tree *var_p, *bound_p;
1428 struct iv *var_iv, *civ;
1430 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1432 find_interesting_uses_op (data, *var_p);
1433 find_interesting_uses_op (data, *bound_p);
1434 return;
1437 civ = XNEW (struct iv);
1438 *civ = *var_iv;
1439 record_use (data, NULL, civ, stmt, USE_COMPARE);
1442 /* Returns the outermost loop EXPR is obviously invariant in
1443 relative to the loop LOOP, i.e. if all its operands are defined
1444 outside of the returned loop. Returns NULL if EXPR is not
1445 even obviously invariant in LOOP. */
1447 struct loop *
1448 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1450 basic_block def_bb;
1451 unsigned i, len;
1453 if (is_gimple_min_invariant (expr))
1454 return current_loops->tree_root;
1456 if (TREE_CODE (expr) == SSA_NAME)
1458 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1459 if (def_bb)
1461 if (flow_bb_inside_loop_p (loop, def_bb))
1462 return NULL;
1463 return superloop_at_depth (loop,
1464 loop_depth (def_bb->loop_father) + 1);
1467 return current_loops->tree_root;
1470 if (!EXPR_P (expr))
1471 return NULL;
1473 unsigned maxdepth = 0;
1474 len = TREE_OPERAND_LENGTH (expr);
1475 for (i = 0; i < len; i++)
1477 struct loop *ivloop;
1478 if (!TREE_OPERAND (expr, i))
1479 continue;
1481 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1482 if (!ivloop)
1483 return NULL;
1484 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1487 return superloop_at_depth (loop, maxdepth);
1490 /* Returns true if expression EXPR is obviously invariant in LOOP,
1491 i.e. if all its operands are defined outside of the LOOP. LOOP
1492 should not be the function body. */
1494 bool
1495 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1497 basic_block def_bb;
1498 unsigned i, len;
1500 gcc_assert (loop_depth (loop) > 0);
1502 if (is_gimple_min_invariant (expr))
1503 return true;
1505 if (TREE_CODE (expr) == SSA_NAME)
1507 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1508 if (def_bb
1509 && flow_bb_inside_loop_p (loop, def_bb))
1510 return false;
1512 return true;
1515 if (!EXPR_P (expr))
1516 return false;
1518 len = TREE_OPERAND_LENGTH (expr);
1519 for (i = 0; i < len; i++)
1520 if (TREE_OPERAND (expr, i)
1521 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1522 return false;
1524 return true;
1527 /* Cumulates the steps of indices into DATA and replaces their values with the
1528 initial ones. Returns false when the value of the index cannot be determined.
1529 Callback for for_each_index. */
1531 struct ifs_ivopts_data
1533 struct ivopts_data *ivopts_data;
1534 gimple stmt;
1535 tree step;
1538 static bool
1539 idx_find_step (tree base, tree *idx, void *data)
1541 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1542 struct iv *iv;
1543 tree step, iv_base, iv_step, lbound, off;
1544 struct loop *loop = dta->ivopts_data->current_loop;
1546 /* If base is a component ref, require that the offset of the reference
1547 be invariant. */
1548 if (TREE_CODE (base) == COMPONENT_REF)
1550 off = component_ref_field_offset (base);
1551 return expr_invariant_in_loop_p (loop, off);
1554 /* If base is array, first check whether we will be able to move the
1555 reference out of the loop (in order to take its address in strength
1556 reduction). In order for this to work we need both lower bound
1557 and step to be loop invariants. */
1558 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1560 /* Moreover, for a range, the size needs to be invariant as well. */
1561 if (TREE_CODE (base) == ARRAY_RANGE_REF
1562 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1563 return false;
1565 step = array_ref_element_size (base);
1566 lbound = array_ref_low_bound (base);
1568 if (!expr_invariant_in_loop_p (loop, step)
1569 || !expr_invariant_in_loop_p (loop, lbound))
1570 return false;
1573 if (TREE_CODE (*idx) != SSA_NAME)
1574 return true;
1576 iv = get_iv (dta->ivopts_data, *idx);
1577 if (!iv)
1578 return false;
1580 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1581 *&x[0], which is not folded and does not trigger the
1582 ARRAY_REF path below. */
1583 *idx = iv->base;
1585 if (integer_zerop (iv->step))
1586 return true;
1588 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1590 step = array_ref_element_size (base);
1592 /* We only handle addresses whose step is an integer constant. */
1593 if (TREE_CODE (step) != INTEGER_CST)
1594 return false;
1596 else
1597 /* The step for pointer arithmetics already is 1 byte. */
1598 step = size_one_node;
1600 iv_base = iv->base;
1601 iv_step = iv->step;
1602 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1603 sizetype, &iv_base, &iv_step, dta->stmt,
1604 false))
1606 /* The index might wrap. */
1607 return false;
1610 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1611 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1613 return true;
1616 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1617 object is passed to it in DATA. */
1619 static bool
1620 idx_record_use (tree base, tree *idx,
1621 void *vdata)
1623 struct ivopts_data *data = (struct ivopts_data *) vdata;
1624 find_interesting_uses_op (data, *idx);
1625 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1627 find_interesting_uses_op (data, array_ref_element_size (base));
1628 find_interesting_uses_op (data, array_ref_low_bound (base));
1630 return true;
1633 /* If we can prove that TOP = cst * BOT for some constant cst,
1634 store cst to MUL and return true. Otherwise return false.
1635 The returned value is always sign-extended, regardless of the
1636 signedness of TOP and BOT. */
1638 static bool
1639 constant_multiple_of (tree top, tree bot, widest_int *mul)
1641 tree mby;
1642 enum tree_code code;
1643 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1644 widest_int res, p0, p1;
1646 STRIP_NOPS (top);
1647 STRIP_NOPS (bot);
1649 if (operand_equal_p (top, bot, 0))
1651 *mul = 1;
1652 return true;
1655 code = TREE_CODE (top);
1656 switch (code)
1658 case MULT_EXPR:
1659 mby = TREE_OPERAND (top, 1);
1660 if (TREE_CODE (mby) != INTEGER_CST)
1661 return false;
1663 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1664 return false;
1666 *mul = wi::sext (res * wi::to_widest (mby), precision);
1667 return true;
1669 case PLUS_EXPR:
1670 case MINUS_EXPR:
1671 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1672 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1673 return false;
1675 if (code == MINUS_EXPR)
1676 p1 = -p1;
1677 *mul = wi::sext (p0 + p1, precision);
1678 return true;
1680 case INTEGER_CST:
1681 if (TREE_CODE (bot) != INTEGER_CST)
1682 return false;
1684 p0 = widest_int::from (top, SIGNED);
1685 p1 = widest_int::from (bot, SIGNED);
1686 if (p1 == 0)
1687 return false;
1688 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1689 return res == 0;
1691 default:
1692 return false;
1696 /* Return true if memory reference REF with step STEP may be unaligned. */
1698 static bool
1699 may_be_unaligned_p (tree ref, tree step)
1701 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1702 thus they are not misaligned. */
1703 if (TREE_CODE (ref) == TARGET_MEM_REF)
1704 return false;
1706 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1708 unsigned HOST_WIDE_INT bitpos;
1709 unsigned int ref_align;
1710 get_object_alignment_1 (ref, &ref_align, &bitpos);
1711 if (ref_align < align
1712 || (bitpos % align) != 0
1713 || (bitpos % BITS_PER_UNIT) != 0)
1714 return true;
1716 unsigned int trailing_zeros = tree_ctz (step);
1717 if (trailing_zeros < HOST_BITS_PER_INT
1718 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1719 return true;
1721 return false;
1724 /* Return true if EXPR may be non-addressable. */
1726 bool
1727 may_be_nonaddressable_p (tree expr)
1729 switch (TREE_CODE (expr))
1731 case TARGET_MEM_REF:
1732 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1733 target, thus they are always addressable. */
1734 return false;
1736 case COMPONENT_REF:
1737 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1738 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1740 case VIEW_CONVERT_EXPR:
1741 /* This kind of view-conversions may wrap non-addressable objects
1742 and make them look addressable. After some processing the
1743 non-addressability may be uncovered again, causing ADDR_EXPRs
1744 of inappropriate objects to be built. */
1745 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1746 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1747 return true;
1749 /* ... fall through ... */
1751 case ARRAY_REF:
1752 case ARRAY_RANGE_REF:
1753 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1755 CASE_CONVERT:
1756 return true;
1758 default:
1759 break;
1762 return false;
1765 /* Finds addresses in *OP_P inside STMT. */
1767 static void
1768 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1770 tree base = *op_p, step = size_zero_node;
1771 struct iv *civ;
1772 struct ifs_ivopts_data ifs_ivopts_data;
1774 /* Do not play with volatile memory references. A bit too conservative,
1775 perhaps, but safe. */
1776 if (gimple_has_volatile_ops (stmt))
1777 goto fail;
1779 /* Ignore bitfields for now. Not really something terribly complicated
1780 to handle. TODO. */
1781 if (TREE_CODE (base) == BIT_FIELD_REF)
1782 goto fail;
1784 base = unshare_expr (base);
1786 if (TREE_CODE (base) == TARGET_MEM_REF)
1788 tree type = build_pointer_type (TREE_TYPE (base));
1789 tree astep;
1791 if (TMR_BASE (base)
1792 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1794 civ = get_iv (data, TMR_BASE (base));
1795 if (!civ)
1796 goto fail;
1798 TMR_BASE (base) = civ->base;
1799 step = civ->step;
1801 if (TMR_INDEX2 (base)
1802 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1804 civ = get_iv (data, TMR_INDEX2 (base));
1805 if (!civ)
1806 goto fail;
1808 TMR_INDEX2 (base) = civ->base;
1809 step = civ->step;
1811 if (TMR_INDEX (base)
1812 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1814 civ = get_iv (data, TMR_INDEX (base));
1815 if (!civ)
1816 goto fail;
1818 TMR_INDEX (base) = civ->base;
1819 astep = civ->step;
1821 if (astep)
1823 if (TMR_STEP (base))
1824 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1826 step = fold_build2 (PLUS_EXPR, type, step, astep);
1830 if (integer_zerop (step))
1831 goto fail;
1832 base = tree_mem_ref_addr (type, base);
1834 else
1836 ifs_ivopts_data.ivopts_data = data;
1837 ifs_ivopts_data.stmt = stmt;
1838 ifs_ivopts_data.step = size_zero_node;
1839 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1840 || integer_zerop (ifs_ivopts_data.step))
1841 goto fail;
1842 step = ifs_ivopts_data.step;
1844 /* Check that the base expression is addressable. This needs
1845 to be done after substituting bases of IVs into it. */
1846 if (may_be_nonaddressable_p (base))
1847 goto fail;
1849 /* Moreover, on strict alignment platforms, check that it is
1850 sufficiently aligned. */
1851 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1852 goto fail;
1854 base = build_fold_addr_expr (base);
1856 /* Substituting bases of IVs into the base expression might
1857 have caused folding opportunities. */
1858 if (TREE_CODE (base) == ADDR_EXPR)
1860 tree *ref = &TREE_OPERAND (base, 0);
1861 while (handled_component_p (*ref))
1862 ref = &TREE_OPERAND (*ref, 0);
1863 if (TREE_CODE (*ref) == MEM_REF)
1865 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1866 TREE_OPERAND (*ref, 0),
1867 TREE_OPERAND (*ref, 1));
1868 if (tem)
1869 *ref = tem;
1874 civ = alloc_iv (base, step);
1875 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1876 return;
1878 fail:
1879 for_each_index (op_p, idx_record_use, data);
1882 /* Finds and records invariants used in STMT. */
1884 static void
1885 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1887 ssa_op_iter iter;
1888 use_operand_p use_p;
1889 tree op;
1891 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1893 op = USE_FROM_PTR (use_p);
1894 record_invariant (data, op, false);
1898 /* Finds interesting uses of induction variables in the statement STMT. */
1900 static void
1901 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1903 struct iv *iv;
1904 tree op, *lhs, *rhs;
1905 ssa_op_iter iter;
1906 use_operand_p use_p;
1907 enum tree_code code;
1909 find_invariants_stmt (data, stmt);
1911 if (gimple_code (stmt) == GIMPLE_COND)
1913 find_interesting_uses_cond (data, stmt);
1914 return;
1917 if (is_gimple_assign (stmt))
1919 lhs = gimple_assign_lhs_ptr (stmt);
1920 rhs = gimple_assign_rhs1_ptr (stmt);
1922 if (TREE_CODE (*lhs) == SSA_NAME)
1924 /* If the statement defines an induction variable, the uses are not
1925 interesting by themselves. */
1927 iv = get_iv (data, *lhs);
1929 if (iv && !integer_zerop (iv->step))
1930 return;
1933 code = gimple_assign_rhs_code (stmt);
1934 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1935 && (REFERENCE_CLASS_P (*rhs)
1936 || is_gimple_val (*rhs)))
1938 if (REFERENCE_CLASS_P (*rhs))
1939 find_interesting_uses_address (data, stmt, rhs);
1940 else
1941 find_interesting_uses_op (data, *rhs);
1943 if (REFERENCE_CLASS_P (*lhs))
1944 find_interesting_uses_address (data, stmt, lhs);
1945 return;
1947 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1949 find_interesting_uses_cond (data, stmt);
1950 return;
1953 /* TODO -- we should also handle address uses of type
1955 memory = call (whatever);
1959 call (memory). */
1962 if (gimple_code (stmt) == GIMPLE_PHI
1963 && gimple_bb (stmt) == data->current_loop->header)
1965 iv = get_iv (data, PHI_RESULT (stmt));
1967 if (iv && !integer_zerop (iv->step))
1968 return;
1971 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1973 op = USE_FROM_PTR (use_p);
1975 if (TREE_CODE (op) != SSA_NAME)
1976 continue;
1978 iv = get_iv (data, op);
1979 if (!iv)
1980 continue;
1982 find_interesting_uses_op (data, op);
1986 /* Finds interesting uses of induction variables outside of loops
1987 on loop exit edge EXIT. */
1989 static void
1990 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1992 gimple phi;
1993 gimple_stmt_iterator psi;
1994 tree def;
1996 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1998 phi = gsi_stmt (psi);
1999 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2000 if (!virtual_operand_p (def))
2001 find_interesting_uses_op (data, def);
2005 /* Finds uses of the induction variables that are interesting. */
2007 static void
2008 find_interesting_uses (struct ivopts_data *data)
2010 basic_block bb;
2011 gimple_stmt_iterator bsi;
2012 basic_block *body = get_loop_body (data->current_loop);
2013 unsigned i;
2014 struct version_info *info;
2015 edge e;
2017 if (dump_file && (dump_flags & TDF_DETAILS))
2018 fprintf (dump_file, "Uses:\n\n");
2020 for (i = 0; i < data->current_loop->num_nodes; i++)
2022 edge_iterator ei;
2023 bb = body[i];
2025 FOR_EACH_EDGE (e, ei, bb->succs)
2026 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2027 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2028 find_interesting_uses_outside (data, e);
2030 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2031 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2032 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2033 if (!is_gimple_debug (gsi_stmt (bsi)))
2034 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2037 if (dump_file && (dump_flags & TDF_DETAILS))
2039 bitmap_iterator bi;
2041 fprintf (dump_file, "\n");
2043 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2045 info = ver_info (data, i);
2046 if (info->inv_id)
2048 fprintf (dump_file, " ");
2049 print_generic_expr (dump_file, info->name, TDF_SLIM);
2050 fprintf (dump_file, " is invariant (%d)%s\n",
2051 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2055 fprintf (dump_file, "\n");
2058 free (body);
2061 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2062 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2063 we are at the top-level of the processed address. */
2065 static tree
2066 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2067 HOST_WIDE_INT *offset)
2069 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2070 enum tree_code code;
2071 tree type, orig_type = TREE_TYPE (expr);
2072 HOST_WIDE_INT off0, off1, st;
2073 tree orig_expr = expr;
2075 STRIP_NOPS (expr);
2077 type = TREE_TYPE (expr);
2078 code = TREE_CODE (expr);
2079 *offset = 0;
2081 switch (code)
2083 case INTEGER_CST:
2084 if (!cst_and_fits_in_hwi (expr)
2085 || integer_zerop (expr))
2086 return orig_expr;
2088 *offset = int_cst_value (expr);
2089 return build_int_cst (orig_type, 0);
2091 case POINTER_PLUS_EXPR:
2092 case PLUS_EXPR:
2093 case MINUS_EXPR:
2094 op0 = TREE_OPERAND (expr, 0);
2095 op1 = TREE_OPERAND (expr, 1);
2097 op0 = strip_offset_1 (op0, false, false, &off0);
2098 op1 = strip_offset_1 (op1, false, false, &off1);
2100 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2101 if (op0 == TREE_OPERAND (expr, 0)
2102 && op1 == TREE_OPERAND (expr, 1))
2103 return orig_expr;
2105 if (integer_zerop (op1))
2106 expr = op0;
2107 else if (integer_zerop (op0))
2109 if (code == MINUS_EXPR)
2110 expr = fold_build1 (NEGATE_EXPR, type, op1);
2111 else
2112 expr = op1;
2114 else
2115 expr = fold_build2 (code, type, op0, op1);
2117 return fold_convert (orig_type, expr);
2119 case MULT_EXPR:
2120 op1 = TREE_OPERAND (expr, 1);
2121 if (!cst_and_fits_in_hwi (op1))
2122 return orig_expr;
2124 op0 = TREE_OPERAND (expr, 0);
2125 op0 = strip_offset_1 (op0, false, false, &off0);
2126 if (op0 == TREE_OPERAND (expr, 0))
2127 return orig_expr;
2129 *offset = off0 * int_cst_value (op1);
2130 if (integer_zerop (op0))
2131 expr = op0;
2132 else
2133 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2135 return fold_convert (orig_type, expr);
2137 case ARRAY_REF:
2138 case ARRAY_RANGE_REF:
2139 if (!inside_addr)
2140 return orig_expr;
2142 step = array_ref_element_size (expr);
2143 if (!cst_and_fits_in_hwi (step))
2144 break;
2146 st = int_cst_value (step);
2147 op1 = TREE_OPERAND (expr, 1);
2148 op1 = strip_offset_1 (op1, false, false, &off1);
2149 *offset = off1 * st;
2151 if (top_compref
2152 && integer_zerop (op1))
2154 /* Strip the component reference completely. */
2155 op0 = TREE_OPERAND (expr, 0);
2156 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2157 *offset += off0;
2158 return op0;
2160 break;
2162 case COMPONENT_REF:
2164 tree field;
2166 if (!inside_addr)
2167 return orig_expr;
2169 tmp = component_ref_field_offset (expr);
2170 field = TREE_OPERAND (expr, 1);
2171 if (top_compref
2172 && cst_and_fits_in_hwi (tmp)
2173 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2175 HOST_WIDE_INT boffset, abs_off;
2177 /* Strip the component reference completely. */
2178 op0 = TREE_OPERAND (expr, 0);
2179 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2180 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2181 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2182 if (boffset < 0)
2183 abs_off = -abs_off;
2185 *offset = off0 + int_cst_value (tmp) + abs_off;
2186 return op0;
2189 break;
2191 case ADDR_EXPR:
2192 op0 = TREE_OPERAND (expr, 0);
2193 op0 = strip_offset_1 (op0, true, true, &off0);
2194 *offset += off0;
2196 if (op0 == TREE_OPERAND (expr, 0))
2197 return orig_expr;
2199 expr = build_fold_addr_expr (op0);
2200 return fold_convert (orig_type, expr);
2202 case MEM_REF:
2203 /* ??? Offset operand? */
2204 inside_addr = false;
2205 break;
2207 default:
2208 return orig_expr;
2211 /* Default handling of expressions for that we want to recurse into
2212 the first operand. */
2213 op0 = TREE_OPERAND (expr, 0);
2214 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2215 *offset += off0;
2217 if (op0 == TREE_OPERAND (expr, 0)
2218 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2219 return orig_expr;
2221 expr = copy_node (expr);
2222 TREE_OPERAND (expr, 0) = op0;
2223 if (op1)
2224 TREE_OPERAND (expr, 1) = op1;
2226 /* Inside address, we might strip the top level component references,
2227 thus changing type of the expression. Handling of ADDR_EXPR
2228 will fix that. */
2229 expr = fold_convert (orig_type, expr);
2231 return expr;
2234 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2236 static tree
2237 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2239 HOST_WIDE_INT off;
2240 tree core = strip_offset_1 (expr, false, false, &off);
2241 *offset = off;
2242 return core;
2245 /* Returns variant of TYPE that can be used as base for different uses.
2246 We return unsigned type with the same precision, which avoids problems
2247 with overflows. */
2249 static tree
2250 generic_type_for (tree type)
2252 if (POINTER_TYPE_P (type))
2253 return unsigned_type_for (type);
2255 if (TYPE_UNSIGNED (type))
2256 return type;
2258 return unsigned_type_for (type);
2261 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2262 the bitmap to that we should store it. */
2264 static struct ivopts_data *fd_ivopts_data;
2265 static tree
2266 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2268 bitmap *depends_on = (bitmap *) data;
2269 struct version_info *info;
2271 if (TREE_CODE (*expr_p) != SSA_NAME)
2272 return NULL_TREE;
2273 info = name_info (fd_ivopts_data, *expr_p);
2275 if (!info->inv_id || info->has_nonlin_use)
2276 return NULL_TREE;
2278 if (!*depends_on)
2279 *depends_on = BITMAP_ALLOC (NULL);
2280 bitmap_set_bit (*depends_on, info->inv_id);
2282 return NULL_TREE;
2285 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2286 position to POS. If USE is not NULL, the candidate is set as related to
2287 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2288 replacement of the final value of the iv by a direct computation. */
2290 static struct iv_cand *
2291 add_candidate_1 (struct ivopts_data *data,
2292 tree base, tree step, bool important, enum iv_position pos,
2293 struct iv_use *use, gimple incremented_at)
2295 unsigned i;
2296 struct iv_cand *cand = NULL;
2297 tree type, orig_type;
2299 /* For non-original variables, make sure their values are computed in a type
2300 that does not invoke undefined behavior on overflows (since in general,
2301 we cannot prove that these induction variables are non-wrapping). */
2302 if (pos != IP_ORIGINAL)
2304 orig_type = TREE_TYPE (base);
2305 type = generic_type_for (orig_type);
2306 if (type != orig_type)
2308 base = fold_convert (type, base);
2309 step = fold_convert (type, step);
2313 for (i = 0; i < n_iv_cands (data); i++)
2315 cand = iv_cand (data, i);
2317 if (cand->pos != pos)
2318 continue;
2320 if (cand->incremented_at != incremented_at
2321 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2322 && cand->ainc_use != use))
2323 continue;
2325 if (!cand->iv)
2327 if (!base && !step)
2328 break;
2330 continue;
2333 if (!base && !step)
2334 continue;
2336 if (operand_equal_p (base, cand->iv->base, 0)
2337 && operand_equal_p (step, cand->iv->step, 0)
2338 && (TYPE_PRECISION (TREE_TYPE (base))
2339 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2340 break;
2343 if (i == n_iv_cands (data))
2345 cand = XCNEW (struct iv_cand);
2346 cand->id = i;
2348 if (!base && !step)
2349 cand->iv = NULL;
2350 else
2351 cand->iv = alloc_iv (base, step);
2353 cand->pos = pos;
2354 if (pos != IP_ORIGINAL && cand->iv)
2356 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2357 cand->var_after = cand->var_before;
2359 cand->important = important;
2360 cand->incremented_at = incremented_at;
2361 data->iv_candidates.safe_push (cand);
2363 if (step
2364 && TREE_CODE (step) != INTEGER_CST)
2366 fd_ivopts_data = data;
2367 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2370 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2371 cand->ainc_use = use;
2372 else
2373 cand->ainc_use = NULL;
2375 if (dump_file && (dump_flags & TDF_DETAILS))
2376 dump_cand (dump_file, cand);
2379 if (important && !cand->important)
2381 cand->important = true;
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2383 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2386 if (use)
2388 bitmap_set_bit (use->related_cands, i);
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 fprintf (dump_file, "Candidate %d is related to use %d\n",
2391 cand->id, use->id);
2394 return cand;
2397 /* Returns true if incrementing the induction variable at the end of the LOOP
2398 is allowed.
2400 The purpose is to avoid splitting latch edge with a biv increment, thus
2401 creating a jump, possibly confusing other optimization passes and leaving
2402 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2403 is not available (so we do not have a better alternative), or if the latch
2404 edge is already nonempty. */
2406 static bool
2407 allow_ip_end_pos_p (struct loop *loop)
2409 if (!ip_normal_pos (loop))
2410 return true;
2412 if (!empty_block_p (ip_end_pos (loop)))
2413 return true;
2415 return false;
2418 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2419 Important field is set to IMPORTANT. */
2421 static void
2422 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2423 bool important, struct iv_use *use)
2425 basic_block use_bb = gimple_bb (use->stmt);
2426 enum machine_mode mem_mode;
2427 unsigned HOST_WIDE_INT cstepi;
2429 /* If we insert the increment in any position other than the standard
2430 ones, we must ensure that it is incremented once per iteration.
2431 It must not be in an inner nested loop, or one side of an if
2432 statement. */
2433 if (use_bb->loop_father != data->current_loop
2434 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2435 || stmt_could_throw_p (use->stmt)
2436 || !cst_and_fits_in_hwi (step))
2437 return;
2439 cstepi = int_cst_value (step);
2441 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2442 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2443 || USE_STORE_PRE_INCREMENT (mem_mode))
2444 && GET_MODE_SIZE (mem_mode) == cstepi)
2445 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2446 || USE_STORE_PRE_DECREMENT (mem_mode))
2447 && GET_MODE_SIZE (mem_mode) == -cstepi))
2449 enum tree_code code = MINUS_EXPR;
2450 tree new_base;
2451 tree new_step = step;
2453 if (POINTER_TYPE_P (TREE_TYPE (base)))
2455 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2456 code = POINTER_PLUS_EXPR;
2458 else
2459 new_step = fold_convert (TREE_TYPE (base), new_step);
2460 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2461 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2462 use->stmt);
2464 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2465 || USE_STORE_POST_INCREMENT (mem_mode))
2466 && GET_MODE_SIZE (mem_mode) == cstepi)
2467 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2468 || USE_STORE_POST_DECREMENT (mem_mode))
2469 && GET_MODE_SIZE (mem_mode) == -cstepi))
2471 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2472 use->stmt);
2476 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2477 position to POS. If USE is not NULL, the candidate is set as related to
2478 it. The candidate computation is scheduled on all available positions. */
2480 static void
2481 add_candidate (struct ivopts_data *data,
2482 tree base, tree step, bool important, struct iv_use *use)
2484 if (ip_normal_pos (data->current_loop))
2485 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2486 if (ip_end_pos (data->current_loop)
2487 && allow_ip_end_pos_p (data->current_loop))
2488 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2490 if (use != NULL && use->type == USE_ADDRESS)
2491 add_autoinc_candidates (data, base, step, important, use);
2494 /* Adds standard iv candidates. */
2496 static void
2497 add_standard_iv_candidates (struct ivopts_data *data)
2499 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2501 /* The same for a double-integer type if it is still fast enough. */
2502 if (TYPE_PRECISION
2503 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2504 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2505 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2506 build_int_cst (long_integer_type_node, 1), true, NULL);
2508 /* The same for a double-integer type if it is still fast enough. */
2509 if (TYPE_PRECISION
2510 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2511 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2512 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2513 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2517 /* Adds candidates bases on the old induction variable IV. */
2519 static void
2520 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2522 gimple phi;
2523 tree def;
2524 struct iv_cand *cand;
2526 add_candidate (data, iv->base, iv->step, true, NULL);
2528 /* The same, but with initial value zero. */
2529 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2530 add_candidate (data, size_int (0), iv->step, true, NULL);
2531 else
2532 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2533 iv->step, true, NULL);
2535 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2536 if (gimple_code (phi) == GIMPLE_PHI)
2538 /* Additionally record the possibility of leaving the original iv
2539 untouched. */
2540 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2541 /* Don't add candidate if it's from another PHI node because
2542 it's an affine iv appearing in the form of PEELED_CHREC. */
2543 phi = SSA_NAME_DEF_STMT (def);
2544 if (gimple_code (phi) != GIMPLE_PHI)
2546 cand = add_candidate_1 (data,
2547 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2548 SSA_NAME_DEF_STMT (def));
2549 cand->var_before = iv->ssa_name;
2550 cand->var_after = def;
2552 else
2553 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2557 /* Adds candidates based on the old induction variables. */
2559 static void
2560 add_old_ivs_candidates (struct ivopts_data *data)
2562 unsigned i;
2563 struct iv *iv;
2564 bitmap_iterator bi;
2566 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2568 iv = ver_info (data, i)->iv;
2569 if (iv && iv->biv_p && !integer_zerop (iv->step))
2570 add_old_iv_candidates (data, iv);
2574 /* Adds candidates based on the value of the induction variable IV and USE. */
2576 static void
2577 add_iv_value_candidates (struct ivopts_data *data,
2578 struct iv *iv, struct iv_use *use)
2580 unsigned HOST_WIDE_INT offset;
2581 tree base;
2582 tree basetype;
2584 add_candidate (data, iv->base, iv->step, false, use);
2586 /* The same, but with initial value zero. Make such variable important,
2587 since it is generic enough so that possibly many uses may be based
2588 on it. */
2589 basetype = TREE_TYPE (iv->base);
2590 if (POINTER_TYPE_P (basetype))
2591 basetype = sizetype;
2592 add_candidate (data, build_int_cst (basetype, 0),
2593 iv->step, true, use);
2595 /* Third, try removing the constant offset. Make sure to even
2596 add a candidate for &a[0] vs. (T *)&a. */
2597 base = strip_offset (iv->base, &offset);
2598 if (offset
2599 || base != iv->base)
2600 add_candidate (data, base, iv->step, false, use);
2603 /* Adds candidates based on the uses. */
2605 static void
2606 add_derived_ivs_candidates (struct ivopts_data *data)
2608 unsigned i;
2610 for (i = 0; i < n_iv_uses (data); i++)
2612 struct iv_use *use = iv_use (data, i);
2614 if (!use)
2615 continue;
2617 switch (use->type)
2619 case USE_NONLINEAR_EXPR:
2620 case USE_COMPARE:
2621 case USE_ADDRESS:
2622 /* Just add the ivs based on the value of the iv used here. */
2623 add_iv_value_candidates (data, use->iv, use);
2624 break;
2626 default:
2627 gcc_unreachable ();
2632 /* Record important candidates and add them to related_cands bitmaps
2633 if needed. */
2635 static void
2636 record_important_candidates (struct ivopts_data *data)
2638 unsigned i;
2639 struct iv_use *use;
2641 for (i = 0; i < n_iv_cands (data); i++)
2643 struct iv_cand *cand = iv_cand (data, i);
2645 if (cand->important)
2646 bitmap_set_bit (data->important_candidates, i);
2649 data->consider_all_candidates = (n_iv_cands (data)
2650 <= CONSIDER_ALL_CANDIDATES_BOUND);
2652 if (data->consider_all_candidates)
2654 /* We will not need "related_cands" bitmaps in this case,
2655 so release them to decrease peak memory consumption. */
2656 for (i = 0; i < n_iv_uses (data); i++)
2658 use = iv_use (data, i);
2659 BITMAP_FREE (use->related_cands);
2662 else
2664 /* Add important candidates to the related_cands bitmaps. */
2665 for (i = 0; i < n_iv_uses (data); i++)
2666 bitmap_ior_into (iv_use (data, i)->related_cands,
2667 data->important_candidates);
2671 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2672 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2673 we allocate a simple list to every use. */
2675 static void
2676 alloc_use_cost_map (struct ivopts_data *data)
2678 unsigned i, size, s;
2680 for (i = 0; i < n_iv_uses (data); i++)
2682 struct iv_use *use = iv_use (data, i);
2684 if (data->consider_all_candidates)
2685 size = n_iv_cands (data);
2686 else
2688 s = bitmap_count_bits (use->related_cands);
2690 /* Round up to the power of two, so that moduling by it is fast. */
2691 size = s ? (1 << ceil_log2 (s)) : 1;
2694 use->n_map_members = size;
2695 use->cost_map = XCNEWVEC (struct cost_pair, size);
2699 /* Returns description of computation cost of expression whose runtime
2700 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2702 static comp_cost
2703 new_cost (unsigned runtime, unsigned complexity)
2705 comp_cost cost;
2707 cost.cost = runtime;
2708 cost.complexity = complexity;
2710 return cost;
2713 /* Adds costs COST1 and COST2. */
2715 static comp_cost
2716 add_costs (comp_cost cost1, comp_cost cost2)
2718 cost1.cost += cost2.cost;
2719 cost1.complexity += cost2.complexity;
2721 return cost1;
2723 /* Subtracts costs COST1 and COST2. */
2725 static comp_cost
2726 sub_costs (comp_cost cost1, comp_cost cost2)
2728 cost1.cost -= cost2.cost;
2729 cost1.complexity -= cost2.complexity;
2731 return cost1;
2734 /* Returns a negative number if COST1 < COST2, a positive number if
2735 COST1 > COST2, and 0 if COST1 = COST2. */
2737 static int
2738 compare_costs (comp_cost cost1, comp_cost cost2)
2740 if (cost1.cost == cost2.cost)
2741 return cost1.complexity - cost2.complexity;
2743 return cost1.cost - cost2.cost;
2746 /* Returns true if COST is infinite. */
2748 static bool
2749 infinite_cost_p (comp_cost cost)
2751 return cost.cost == INFTY;
2754 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2755 on invariants DEPENDS_ON and that the value used in expressing it
2756 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2758 static void
2759 set_use_iv_cost (struct ivopts_data *data,
2760 struct iv_use *use, struct iv_cand *cand,
2761 comp_cost cost, bitmap depends_on, tree value,
2762 enum tree_code comp, int inv_expr_id)
2764 unsigned i, s;
2766 if (infinite_cost_p (cost))
2768 BITMAP_FREE (depends_on);
2769 return;
2772 if (data->consider_all_candidates)
2774 use->cost_map[cand->id].cand = cand;
2775 use->cost_map[cand->id].cost = cost;
2776 use->cost_map[cand->id].depends_on = depends_on;
2777 use->cost_map[cand->id].value = value;
2778 use->cost_map[cand->id].comp = comp;
2779 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2780 return;
2783 /* n_map_members is a power of two, so this computes modulo. */
2784 s = cand->id & (use->n_map_members - 1);
2785 for (i = s; i < use->n_map_members; i++)
2786 if (!use->cost_map[i].cand)
2787 goto found;
2788 for (i = 0; i < s; i++)
2789 if (!use->cost_map[i].cand)
2790 goto found;
2792 gcc_unreachable ();
2794 found:
2795 use->cost_map[i].cand = cand;
2796 use->cost_map[i].cost = cost;
2797 use->cost_map[i].depends_on = depends_on;
2798 use->cost_map[i].value = value;
2799 use->cost_map[i].comp = comp;
2800 use->cost_map[i].inv_expr_id = inv_expr_id;
2803 /* Gets cost of (USE, CANDIDATE) pair. */
2805 static struct cost_pair *
2806 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2807 struct iv_cand *cand)
2809 unsigned i, s;
2810 struct cost_pair *ret;
2812 if (!cand)
2813 return NULL;
2815 if (data->consider_all_candidates)
2817 ret = use->cost_map + cand->id;
2818 if (!ret->cand)
2819 return NULL;
2821 return ret;
2824 /* n_map_members is a power of two, so this computes modulo. */
2825 s = cand->id & (use->n_map_members - 1);
2826 for (i = s; i < use->n_map_members; i++)
2827 if (use->cost_map[i].cand == cand)
2828 return use->cost_map + i;
2829 else if (use->cost_map[i].cand == NULL)
2830 return NULL;
2831 for (i = 0; i < s; i++)
2832 if (use->cost_map[i].cand == cand)
2833 return use->cost_map + i;
2834 else if (use->cost_map[i].cand == NULL)
2835 return NULL;
2837 return NULL;
2840 /* Returns estimate on cost of computing SEQ. */
2842 static unsigned
2843 seq_cost (rtx seq, bool speed)
2845 unsigned cost = 0;
2846 rtx set;
2848 for (; seq; seq = NEXT_INSN (seq))
2850 set = single_set (seq);
2851 if (set)
2852 cost += set_src_cost (SET_SRC (set), speed);
2853 else
2854 cost++;
2857 return cost;
2860 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2861 static rtx
2862 produce_memory_decl_rtl (tree obj, int *regno)
2864 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2865 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2866 rtx x;
2868 gcc_assert (obj);
2869 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2871 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2872 x = gen_rtx_SYMBOL_REF (address_mode, name);
2873 SET_SYMBOL_REF_DECL (x, obj);
2874 x = gen_rtx_MEM (DECL_MODE (obj), x);
2875 set_mem_addr_space (x, as);
2876 targetm.encode_section_info (obj, x, true);
2878 else
2880 x = gen_raw_REG (address_mode, (*regno)++);
2881 x = gen_rtx_MEM (DECL_MODE (obj), x);
2882 set_mem_addr_space (x, as);
2885 return x;
2888 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2889 walk_tree. DATA contains the actual fake register number. */
2891 static tree
2892 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2894 tree obj = NULL_TREE;
2895 rtx x = NULL_RTX;
2896 int *regno = (int *) data;
2898 switch (TREE_CODE (*expr_p))
2900 case ADDR_EXPR:
2901 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2902 handled_component_p (*expr_p);
2903 expr_p = &TREE_OPERAND (*expr_p, 0))
2904 continue;
2905 obj = *expr_p;
2906 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2907 x = produce_memory_decl_rtl (obj, regno);
2908 break;
2910 case SSA_NAME:
2911 *ws = 0;
2912 obj = SSA_NAME_VAR (*expr_p);
2913 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2914 if (!obj)
2915 return NULL_TREE;
2916 if (!DECL_RTL_SET_P (obj))
2917 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2918 break;
2920 case VAR_DECL:
2921 case PARM_DECL:
2922 case RESULT_DECL:
2923 *ws = 0;
2924 obj = *expr_p;
2926 if (DECL_RTL_SET_P (obj))
2927 break;
2929 if (DECL_MODE (obj) == BLKmode)
2930 x = produce_memory_decl_rtl (obj, regno);
2931 else
2932 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2934 break;
2936 default:
2937 break;
2940 if (x)
2942 decl_rtl_to_reset.safe_push (obj);
2943 SET_DECL_RTL (obj, x);
2946 return NULL_TREE;
2949 /* Determines cost of the computation of EXPR. */
2951 static unsigned
2952 computation_cost (tree expr, bool speed)
2954 rtx seq, rslt;
2955 tree type = TREE_TYPE (expr);
2956 unsigned cost;
2957 /* Avoid using hard regs in ways which may be unsupported. */
2958 int regno = LAST_VIRTUAL_REGISTER + 1;
2959 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2960 enum node_frequency real_frequency = node->frequency;
2962 node->frequency = NODE_FREQUENCY_NORMAL;
2963 crtl->maybe_hot_insn_p = speed;
2964 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2965 start_sequence ();
2966 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2967 seq = get_insns ();
2968 end_sequence ();
2969 default_rtl_profile ();
2970 node->frequency = real_frequency;
2972 cost = seq_cost (seq, speed);
2973 if (MEM_P (rslt))
2974 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2975 TYPE_ADDR_SPACE (type), speed);
2976 else if (!REG_P (rslt))
2977 cost += set_src_cost (rslt, speed);
2979 return cost;
2982 /* Returns variable containing the value of candidate CAND at statement AT. */
2984 static tree
2985 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2987 if (stmt_after_increment (loop, cand, stmt))
2988 return cand->var_after;
2989 else
2990 return cand->var_before;
2993 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2994 same precision that is at least as wide as the precision of TYPE, stores
2995 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2996 type of A and B. */
2998 static tree
2999 determine_common_wider_type (tree *a, tree *b)
3001 tree wider_type = NULL;
3002 tree suba, subb;
3003 tree atype = TREE_TYPE (*a);
3005 if (CONVERT_EXPR_P (*a))
3007 suba = TREE_OPERAND (*a, 0);
3008 wider_type = TREE_TYPE (suba);
3009 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010 return atype;
3012 else
3013 return atype;
3015 if (CONVERT_EXPR_P (*b))
3017 subb = TREE_OPERAND (*b, 0);
3018 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3019 return atype;
3021 else
3022 return atype;
3024 *a = suba;
3025 *b = subb;
3026 return wider_type;
3029 /* Determines the expression by that USE is expressed from induction variable
3030 CAND at statement AT in LOOP. The expression is stored in a decomposed
3031 form into AFF. Returns false if USE cannot be expressed using CAND. */
3033 static bool
3034 get_computation_aff (struct loop *loop,
3035 struct iv_use *use, struct iv_cand *cand, gimple at,
3036 struct aff_tree *aff)
3038 tree ubase = use->iv->base;
3039 tree ustep = use->iv->step;
3040 tree cbase = cand->iv->base;
3041 tree cstep = cand->iv->step, cstep_common;
3042 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3043 tree common_type, var;
3044 tree uutype;
3045 aff_tree cbase_aff, var_aff;
3046 widest_int rat;
3048 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3050 /* We do not have a precision to express the values of use. */
3051 return false;
3054 var = var_at_stmt (loop, cand, at);
3055 uutype = unsigned_type_for (utype);
3057 /* If the conversion is not noop, perform it. */
3058 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3060 cstep = fold_convert (uutype, cstep);
3061 cbase = fold_convert (uutype, cbase);
3062 var = fold_convert (uutype, var);
3065 if (!constant_multiple_of (ustep, cstep, &rat))
3066 return false;
3068 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3069 type, we achieve better folding by computing their difference in this
3070 wider type, and cast the result to UUTYPE. We do not need to worry about
3071 overflows, as all the arithmetics will in the end be performed in UUTYPE
3072 anyway. */
3073 common_type = determine_common_wider_type (&ubase, &cbase);
3075 /* use = ubase - ratio * cbase + ratio * var. */
3076 tree_to_aff_combination (ubase, common_type, aff);
3077 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3078 tree_to_aff_combination (var, uutype, &var_aff);
3080 /* We need to shift the value if we are after the increment. */
3081 if (stmt_after_increment (loop, cand, at))
3083 aff_tree cstep_aff;
3085 if (common_type != uutype)
3086 cstep_common = fold_convert (common_type, cstep);
3087 else
3088 cstep_common = cstep;
3090 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3091 aff_combination_add (&cbase_aff, &cstep_aff);
3094 aff_combination_scale (&cbase_aff, -rat);
3095 aff_combination_add (aff, &cbase_aff);
3096 if (common_type != uutype)
3097 aff_combination_convert (aff, uutype);
3099 aff_combination_scale (&var_aff, rat);
3100 aff_combination_add (aff, &var_aff);
3102 return true;
3105 /* Return the type of USE. */
3107 static tree
3108 get_use_type (struct iv_use *use)
3110 tree base_type = TREE_TYPE (use->iv->base);
3111 tree type;
3113 if (use->type == USE_ADDRESS)
3115 /* The base_type may be a void pointer. Create a pointer type based on
3116 the mem_ref instead. */
3117 type = build_pointer_type (TREE_TYPE (*use->op_p));
3118 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3119 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3121 else
3122 type = base_type;
3124 return type;
3127 /* Determines the expression by that USE is expressed from induction variable
3128 CAND at statement AT in LOOP. The computation is unshared. */
3130 static tree
3131 get_computation_at (struct loop *loop,
3132 struct iv_use *use, struct iv_cand *cand, gimple at)
3134 aff_tree aff;
3135 tree type = get_use_type (use);
3137 if (!get_computation_aff (loop, use, cand, at, &aff))
3138 return NULL_TREE;
3139 unshare_aff_combination (&aff);
3140 return fold_convert (type, aff_combination_to_tree (&aff));
3143 /* Determines the expression by that USE is expressed from induction variable
3144 CAND in LOOP. The computation is unshared. */
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3149 return get_computation_at (loop, use, cand, use->stmt);
3152 /* Adjust the cost COST for being in loop setup rather than loop body.
3153 If we're optimizing for space, the loop setup overhead is constant;
3154 if we're optimizing for speed, amortize it over the per-iteration cost. */
3155 static unsigned
3156 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3158 if (cost == INFTY)
3159 return cost;
3160 else if (optimize_loop_for_speed_p (data->current_loop))
3161 return cost / avg_loop_niter (data->current_loop);
3162 else
3163 return cost;
3166 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3167 validity for a memory reference accessing memory of mode MODE in
3168 address space AS. */
3171 bool
3172 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3173 addr_space_t as)
3175 #define MAX_RATIO 128
3176 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3177 static vec<sbitmap> valid_mult_list;
3178 sbitmap valid_mult;
3180 if (data_index >= valid_mult_list.length ())
3181 valid_mult_list.safe_grow_cleared (data_index + 1);
3183 valid_mult = valid_mult_list[data_index];
3184 if (!valid_mult)
3186 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3187 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3188 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3189 rtx addr, scaled;
3190 HOST_WIDE_INT i;
3192 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3193 bitmap_clear (valid_mult);
3194 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3195 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3196 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3199 if (memory_address_addr_space_p (mode, addr, as)
3200 || memory_address_addr_space_p (mode, scaled, as))
3201 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3206 fprintf (dump_file, " allowed multipliers:");
3207 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3209 fprintf (dump_file, " %d", (int) i);
3210 fprintf (dump_file, "\n");
3211 fprintf (dump_file, "\n");
3214 valid_mult_list[data_index] = valid_mult;
3217 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218 return false;
3220 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3225 variable is omitted. Compute the cost for a memory reference that accesses
3226 a memory location of mode MEM_MODE in address space AS.
3228 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229 size of MEM_MODE / RATIO) is available. To make this determination, we
3230 look at the size of the increment to be made, which is given in CSTEP.
3231 CSTEP may be zero if the step is unknown.
3232 STMT_AFTER_INC is true iff the statement we're looking at is after the
3233 increment of the original biv.
3235 TODO -- there must be some better way. This all is quite crude. */
3237 enum ainc_type
3239 AINC_PRE_INC, /* Pre increment. */
3240 AINC_PRE_DEC, /* Pre decrement. */
3241 AINC_POST_INC, /* Post increment. */
3242 AINC_POST_DEC, /* Post decrement. */
3243 AINC_NONE /* Also the number of auto increment types. */
3246 typedef struct address_cost_data_s
3248 HOST_WIDE_INT min_offset, max_offset;
3249 unsigned costs[2][2][2][2];
3250 unsigned ainc_costs[AINC_NONE];
3251 } *address_cost_data;
3254 static comp_cost
3255 get_address_cost (bool symbol_present, bool var_present,
3256 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3257 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3258 addr_space_t as, bool speed,
3259 bool stmt_after_inc, bool *may_autoinc)
3261 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3262 static vec<address_cost_data> address_cost_data_list;
3263 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3264 address_cost_data data;
3265 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3266 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3267 unsigned cost, acost, complexity;
3268 enum ainc_type autoinc_type;
3269 bool offset_p, ratio_p, autoinc;
3270 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3271 unsigned HOST_WIDE_INT mask;
3272 unsigned bits;
3274 if (data_index >= address_cost_data_list.length ())
3275 address_cost_data_list.safe_grow_cleared (data_index + 1);
3277 data = address_cost_data_list[data_index];
3278 if (!data)
3280 HOST_WIDE_INT i;
3281 HOST_WIDE_INT rat, off = 0;
3282 int old_cse_not_expected, width;
3283 unsigned sym_p, var_p, off_p, rat_p, add_c;
3284 rtx seq, addr, base;
3285 rtx reg0, reg1;
3287 data = (address_cost_data) xcalloc (1, sizeof (*data));
3289 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3291 width = GET_MODE_BITSIZE (address_mode) - 1;
3292 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3293 width = HOST_BITS_PER_WIDE_INT - 1;
3294 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3296 for (i = width; i >= 0; i--)
3298 off = -((unsigned HOST_WIDE_INT) 1 << i);
3299 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3300 if (memory_address_addr_space_p (mem_mode, addr, as))
3301 break;
3303 data->min_offset = (i == -1? 0 : off);
3305 for (i = width; i >= 0; i--)
3307 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3308 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3309 if (memory_address_addr_space_p (mem_mode, addr, as))
3310 break;
3312 if (i == -1)
3313 off = 0;
3314 data->max_offset = off;
3316 if (dump_file && (dump_flags & TDF_DETAILS))
3318 fprintf (dump_file, "get_address_cost:\n");
3319 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3320 GET_MODE_NAME (mem_mode),
3321 data->min_offset);
3322 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3323 GET_MODE_NAME (mem_mode),
3324 data->max_offset);
3327 rat = 1;
3328 for (i = 2; i <= MAX_RATIO; i++)
3329 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3331 rat = i;
3332 break;
3335 /* Compute the cost of various addressing modes. */
3336 acost = 0;
3337 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3338 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3340 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3341 || USE_STORE_PRE_DECREMENT (mem_mode))
3343 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3344 has_predec[mem_mode]
3345 = memory_address_addr_space_p (mem_mode, addr, as);
3347 if (has_predec[mem_mode])
3348 data->ainc_costs[AINC_PRE_DEC]
3349 = address_cost (addr, mem_mode, as, speed);
3351 if (USE_LOAD_POST_DECREMENT (mem_mode)
3352 || USE_STORE_POST_DECREMENT (mem_mode))
3354 addr = gen_rtx_POST_DEC (address_mode, reg0);
3355 has_postdec[mem_mode]
3356 = memory_address_addr_space_p (mem_mode, addr, as);
3358 if (has_postdec[mem_mode])
3359 data->ainc_costs[AINC_POST_DEC]
3360 = address_cost (addr, mem_mode, as, speed);
3362 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3363 || USE_STORE_PRE_DECREMENT (mem_mode))
3365 addr = gen_rtx_PRE_INC (address_mode, reg0);
3366 has_preinc[mem_mode]
3367 = memory_address_addr_space_p (mem_mode, addr, as);
3369 if (has_preinc[mem_mode])
3370 data->ainc_costs[AINC_PRE_INC]
3371 = address_cost (addr, mem_mode, as, speed);
3373 if (USE_LOAD_POST_INCREMENT (mem_mode)
3374 || USE_STORE_POST_INCREMENT (mem_mode))
3376 addr = gen_rtx_POST_INC (address_mode, reg0);
3377 has_postinc[mem_mode]
3378 = memory_address_addr_space_p (mem_mode, addr, as);
3380 if (has_postinc[mem_mode])
3381 data->ainc_costs[AINC_POST_INC]
3382 = address_cost (addr, mem_mode, as, speed);
3384 for (i = 0; i < 16; i++)
3386 sym_p = i & 1;
3387 var_p = (i >> 1) & 1;
3388 off_p = (i >> 2) & 1;
3389 rat_p = (i >> 3) & 1;
3391 addr = reg0;
3392 if (rat_p)
3393 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3394 gen_int_mode (rat, address_mode));
3396 if (var_p)
3397 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3399 if (sym_p)
3401 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3402 /* ??? We can run into trouble with some backends by presenting
3403 it with symbols which haven't been properly passed through
3404 targetm.encode_section_info. By setting the local bit, we
3405 enhance the probability of things working. */
3406 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3408 if (off_p)
3409 base = gen_rtx_fmt_e (CONST, address_mode,
3410 gen_rtx_fmt_ee
3411 (PLUS, address_mode, base,
3412 gen_int_mode (off, address_mode)));
3414 else if (off_p)
3415 base = gen_int_mode (off, address_mode);
3416 else
3417 base = NULL_RTX;
3419 if (base)
3420 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3422 start_sequence ();
3423 /* To avoid splitting addressing modes, pretend that no cse will
3424 follow. */
3425 old_cse_not_expected = cse_not_expected;
3426 cse_not_expected = true;
3427 addr = memory_address_addr_space (mem_mode, addr, as);
3428 cse_not_expected = old_cse_not_expected;
3429 seq = get_insns ();
3430 end_sequence ();
3432 acost = seq_cost (seq, speed);
3433 acost += address_cost (addr, mem_mode, as, speed);
3435 if (!acost)
3436 acost = 1;
3437 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3440 /* On some targets, it is quite expensive to load symbol to a register,
3441 which makes addresses that contain symbols look much more expensive.
3442 However, the symbol will have to be loaded in any case before the
3443 loop (and quite likely we have it in register already), so it does not
3444 make much sense to penalize them too heavily. So make some final
3445 tweaks for the SYMBOL_PRESENT modes:
3447 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3448 var is cheaper, use this mode with small penalty.
3449 If VAR_PRESENT is true, try whether the mode with
3450 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3451 if this is the case, use it. */
3452 add_c = add_cost (speed, address_mode);
3453 for (i = 0; i < 8; i++)
3455 var_p = i & 1;
3456 off_p = (i >> 1) & 1;
3457 rat_p = (i >> 2) & 1;
3459 acost = data->costs[0][1][off_p][rat_p] + 1;
3460 if (var_p)
3461 acost += add_c;
3463 if (acost < data->costs[1][var_p][off_p][rat_p])
3464 data->costs[1][var_p][off_p][rat_p] = acost;
3467 if (dump_file && (dump_flags & TDF_DETAILS))
3469 fprintf (dump_file, "Address costs:\n");
3471 for (i = 0; i < 16; i++)
3473 sym_p = i & 1;
3474 var_p = (i >> 1) & 1;
3475 off_p = (i >> 2) & 1;
3476 rat_p = (i >> 3) & 1;
3478 fprintf (dump_file, " ");
3479 if (sym_p)
3480 fprintf (dump_file, "sym + ");
3481 if (var_p)
3482 fprintf (dump_file, "var + ");
3483 if (off_p)
3484 fprintf (dump_file, "cst + ");
3485 if (rat_p)
3486 fprintf (dump_file, "rat * ");
3488 acost = data->costs[sym_p][var_p][off_p][rat_p];
3489 fprintf (dump_file, "index costs %d\n", acost);
3491 if (has_predec[mem_mode] || has_postdec[mem_mode]
3492 || has_preinc[mem_mode] || has_postinc[mem_mode])
3493 fprintf (dump_file, " May include autoinc/dec\n");
3494 fprintf (dump_file, "\n");
3497 address_cost_data_list[data_index] = data;
3500 bits = GET_MODE_BITSIZE (address_mode);
3501 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3502 offset &= mask;
3503 if ((offset >> (bits - 1) & 1))
3504 offset |= ~mask;
3505 s_offset = offset;
3507 autoinc = false;
3508 autoinc_type = AINC_NONE;
3509 msize = GET_MODE_SIZE (mem_mode);
3510 autoinc_offset = offset;
3511 if (stmt_after_inc)
3512 autoinc_offset += ratio * cstep;
3513 if (symbol_present || var_present || ratio != 1)
3514 autoinc = false;
3515 else
3517 if (has_postinc[mem_mode] && autoinc_offset == 0
3518 && msize == cstep)
3519 autoinc_type = AINC_POST_INC;
3520 else if (has_postdec[mem_mode] && autoinc_offset == 0
3521 && msize == -cstep)
3522 autoinc_type = AINC_POST_DEC;
3523 else if (has_preinc[mem_mode] && autoinc_offset == msize
3524 && msize == cstep)
3525 autoinc_type = AINC_PRE_INC;
3526 else if (has_predec[mem_mode] && autoinc_offset == -msize
3527 && msize == -cstep)
3528 autoinc_type = AINC_PRE_DEC;
3530 if (autoinc_type != AINC_NONE)
3531 autoinc = true;
3534 cost = 0;
3535 offset_p = (s_offset != 0
3536 && data->min_offset <= s_offset
3537 && s_offset <= data->max_offset);
3538 ratio_p = (ratio != 1
3539 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3541 if (ratio != 1 && !ratio_p)
3542 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3544 if (s_offset && !offset_p && !symbol_present)
3545 cost += add_cost (speed, address_mode);
3547 if (may_autoinc)
3548 *may_autoinc = autoinc;
3549 if (autoinc)
3550 acost = data->ainc_costs[autoinc_type];
3551 else
3552 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3553 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3554 return new_cost (cost + acost, complexity);
3557 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3558 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3559 calculating the operands of EXPR. Returns true if successful, and returns
3560 the cost in COST. */
3562 static bool
3563 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3564 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3566 comp_cost res;
3567 tree op1 = TREE_OPERAND (expr, 1);
3568 tree cst = TREE_OPERAND (mult, 1);
3569 tree multop = TREE_OPERAND (mult, 0);
3570 int m = exact_log2 (int_cst_value (cst));
3571 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3572 int sa_cost;
3573 bool equal_p = false;
3575 if (!(m >= 0 && m < maxm))
3576 return false;
3578 if (operand_equal_p (op1, mult, 0))
3579 equal_p = true;
3581 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3582 ? shiftadd_cost (speed, mode, m)
3583 : (equal_p
3584 ? shiftsub1_cost (speed, mode, m)
3585 : shiftsub0_cost (speed, mode, m)));
3586 res = new_cost (sa_cost, 0);
3587 res = add_costs (res, equal_p ? cost0 : cost1);
3589 STRIP_NOPS (multop);
3590 if (!is_gimple_val (multop))
3591 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3593 *cost = res;
3594 return true;
3597 /* Estimates cost of forcing expression EXPR into a variable. */
3599 static comp_cost
3600 force_expr_to_var_cost (tree expr, bool speed)
3602 static bool costs_initialized = false;
3603 static unsigned integer_cost [2];
3604 static unsigned symbol_cost [2];
3605 static unsigned address_cost [2];
3606 tree op0, op1;
3607 comp_cost cost0, cost1, cost;
3608 enum machine_mode mode;
3610 if (!costs_initialized)
3612 tree type = build_pointer_type (integer_type_node);
3613 tree var, addr;
3614 rtx x;
3615 int i;
3617 var = create_tmp_var_raw (integer_type_node, "test_var");
3618 TREE_STATIC (var) = 1;
3619 x = produce_memory_decl_rtl (var, NULL);
3620 SET_DECL_RTL (var, x);
3622 addr = build1 (ADDR_EXPR, type, var);
3625 for (i = 0; i < 2; i++)
3627 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3628 2000), i);
3630 symbol_cost[i] = computation_cost (addr, i) + 1;
3632 address_cost[i]
3633 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3634 if (dump_file && (dump_flags & TDF_DETAILS))
3636 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3637 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3638 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3639 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3640 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3641 fprintf (dump_file, "\n");
3645 costs_initialized = true;
3648 STRIP_NOPS (expr);
3650 if (SSA_VAR_P (expr))
3651 return no_cost;
3653 if (is_gimple_min_invariant (expr))
3655 if (TREE_CODE (expr) == INTEGER_CST)
3656 return new_cost (integer_cost [speed], 0);
3658 if (TREE_CODE (expr) == ADDR_EXPR)
3660 tree obj = TREE_OPERAND (expr, 0);
3662 if (TREE_CODE (obj) == VAR_DECL
3663 || TREE_CODE (obj) == PARM_DECL
3664 || TREE_CODE (obj) == RESULT_DECL)
3665 return new_cost (symbol_cost [speed], 0);
3668 return new_cost (address_cost [speed], 0);
3671 switch (TREE_CODE (expr))
3673 case POINTER_PLUS_EXPR:
3674 case PLUS_EXPR:
3675 case MINUS_EXPR:
3676 case MULT_EXPR:
3677 op0 = TREE_OPERAND (expr, 0);
3678 op1 = TREE_OPERAND (expr, 1);
3679 STRIP_NOPS (op0);
3680 STRIP_NOPS (op1);
3681 break;
3683 CASE_CONVERT:
3684 case NEGATE_EXPR:
3685 op0 = TREE_OPERAND (expr, 0);
3686 STRIP_NOPS (op0);
3687 op1 = NULL_TREE;
3688 break;
3690 default:
3691 /* Just an arbitrary value, FIXME. */
3692 return new_cost (target_spill_cost[speed], 0);
3695 if (op0 == NULL_TREE
3696 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3697 cost0 = no_cost;
3698 else
3699 cost0 = force_expr_to_var_cost (op0, speed);
3701 if (op1 == NULL_TREE
3702 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3703 cost1 = no_cost;
3704 else
3705 cost1 = force_expr_to_var_cost (op1, speed);
3707 mode = TYPE_MODE (TREE_TYPE (expr));
3708 switch (TREE_CODE (expr))
3710 case POINTER_PLUS_EXPR:
3711 case PLUS_EXPR:
3712 case MINUS_EXPR:
3713 case NEGATE_EXPR:
3714 cost = new_cost (add_cost (speed, mode), 0);
3715 if (TREE_CODE (expr) != NEGATE_EXPR)
3717 tree mult = NULL_TREE;
3718 comp_cost sa_cost;
3719 if (TREE_CODE (op1) == MULT_EXPR)
3720 mult = op1;
3721 else if (TREE_CODE (op0) == MULT_EXPR)
3722 mult = op0;
3724 if (mult != NULL_TREE
3725 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3726 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3727 speed, &sa_cost))
3728 return sa_cost;
3730 break;
3732 CASE_CONVERT:
3734 tree inner_mode, outer_mode;
3735 outer_mode = TREE_TYPE (expr);
3736 inner_mode = TREE_TYPE (op0);
3737 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3738 TYPE_MODE (inner_mode), speed), 0);
3740 break;
3742 case MULT_EXPR:
3743 if (cst_and_fits_in_hwi (op0))
3744 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3745 mode, speed), 0);
3746 else if (cst_and_fits_in_hwi (op1))
3747 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3748 mode, speed), 0);
3749 else
3750 return new_cost (target_spill_cost [speed], 0);
3751 break;
3753 default:
3754 gcc_unreachable ();
3757 cost = add_costs (cost, cost0);
3758 cost = add_costs (cost, cost1);
3760 /* Bound the cost by target_spill_cost. The parts of complicated
3761 computations often are either loop invariant or at least can
3762 be shared between several iv uses, so letting this grow without
3763 limits would not give reasonable results. */
3764 if (cost.cost > (int) target_spill_cost [speed])
3765 cost.cost = target_spill_cost [speed];
3767 return cost;
3770 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3771 invariants the computation depends on. */
3773 static comp_cost
3774 force_var_cost (struct ivopts_data *data,
3775 tree expr, bitmap *depends_on)
3777 if (depends_on)
3779 fd_ivopts_data = data;
3780 walk_tree (&expr, find_depends, depends_on, NULL);
3783 return force_expr_to_var_cost (expr, data->speed);
3786 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3787 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3788 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3789 invariants the computation depends on. */
3791 static comp_cost
3792 split_address_cost (struct ivopts_data *data,
3793 tree addr, bool *symbol_present, bool *var_present,
3794 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3796 tree core;
3797 HOST_WIDE_INT bitsize;
3798 HOST_WIDE_INT bitpos;
3799 tree toffset;
3800 enum machine_mode mode;
3801 int unsignedp, volatilep;
3803 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3804 &unsignedp, &volatilep, false);
3806 if (toffset != 0
3807 || bitpos % BITS_PER_UNIT != 0
3808 || TREE_CODE (core) != VAR_DECL)
3810 *symbol_present = false;
3811 *var_present = true;
3812 fd_ivopts_data = data;
3813 walk_tree (&addr, find_depends, depends_on, NULL);
3814 return new_cost (target_spill_cost[data->speed], 0);
3817 *offset += bitpos / BITS_PER_UNIT;
3818 if (TREE_STATIC (core)
3819 || DECL_EXTERNAL (core))
3821 *symbol_present = true;
3822 *var_present = false;
3823 return no_cost;
3826 *symbol_present = false;
3827 *var_present = true;
3828 return no_cost;
3831 /* Estimates cost of expressing difference of addresses E1 - E2 as
3832 var + symbol + offset. The value of offset is added to OFFSET,
3833 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3834 part is missing. DEPENDS_ON is a set of the invariants the computation
3835 depends on. */
3837 static comp_cost
3838 ptr_difference_cost (struct ivopts_data *data,
3839 tree e1, tree e2, bool *symbol_present, bool *var_present,
3840 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3842 HOST_WIDE_INT diff = 0;
3843 aff_tree aff_e1, aff_e2;
3844 tree type;
3846 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3848 if (ptr_difference_const (e1, e2, &diff))
3850 *offset += diff;
3851 *symbol_present = false;
3852 *var_present = false;
3853 return no_cost;
3856 if (integer_zerop (e2))
3857 return split_address_cost (data, TREE_OPERAND (e1, 0),
3858 symbol_present, var_present, offset, depends_on);
3860 *symbol_present = false;
3861 *var_present = true;
3863 type = signed_type_for (TREE_TYPE (e1));
3864 tree_to_aff_combination (e1, type, &aff_e1);
3865 tree_to_aff_combination (e2, type, &aff_e2);
3866 aff_combination_scale (&aff_e2, -1);
3867 aff_combination_add (&aff_e1, &aff_e2);
3869 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3872 /* Estimates cost of expressing difference E1 - E2 as
3873 var + symbol + offset. The value of offset is added to OFFSET,
3874 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3875 part is missing. DEPENDS_ON is a set of the invariants the computation
3876 depends on. */
3878 static comp_cost
3879 difference_cost (struct ivopts_data *data,
3880 tree e1, tree e2, bool *symbol_present, bool *var_present,
3881 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3883 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3884 unsigned HOST_WIDE_INT off1, off2;
3885 aff_tree aff_e1, aff_e2;
3886 tree type;
3888 e1 = strip_offset (e1, &off1);
3889 e2 = strip_offset (e2, &off2);
3890 *offset += off1 - off2;
3892 STRIP_NOPS (e1);
3893 STRIP_NOPS (e2);
3895 if (TREE_CODE (e1) == ADDR_EXPR)
3896 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3897 offset, depends_on);
3898 *symbol_present = false;
3900 if (operand_equal_p (e1, e2, 0))
3902 *var_present = false;
3903 return no_cost;
3906 *var_present = true;
3908 if (integer_zerop (e2))
3909 return force_var_cost (data, e1, depends_on);
3911 if (integer_zerop (e1))
3913 comp_cost cost = force_var_cost (data, e2, depends_on);
3914 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3915 return cost;
3918 type = signed_type_for (TREE_TYPE (e1));
3919 tree_to_aff_combination (e1, type, &aff_e1);
3920 tree_to_aff_combination (e2, type, &aff_e2);
3921 aff_combination_scale (&aff_e2, -1);
3922 aff_combination_add (&aff_e1, &aff_e2);
3924 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3927 /* Returns true if AFF1 and AFF2 are identical. */
3929 static bool
3930 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3932 unsigned i;
3934 if (aff1->n != aff2->n)
3935 return false;
3937 for (i = 0; i < aff1->n; i++)
3939 if (aff1->elts[i].coef != aff2->elts[i].coef)
3940 return false;
3942 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3943 return false;
3945 return true;
3948 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3950 static int
3951 get_expr_id (struct ivopts_data *data, tree expr)
3953 struct iv_inv_expr_ent ent;
3954 struct iv_inv_expr_ent **slot;
3956 ent.expr = expr;
3957 ent.hash = iterative_hash_expr (expr, 0);
3958 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3959 if (*slot)
3960 return (*slot)->id;
3962 *slot = XNEW (struct iv_inv_expr_ent);
3963 (*slot)->expr = expr;
3964 (*slot)->hash = ent.hash;
3965 (*slot)->id = data->inv_expr_id++;
3966 return (*slot)->id;
3969 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3970 requires a new compiler generated temporary. Returns -1 otherwise.
3971 ADDRESS_P is a flag indicating if the expression is for address
3972 computation. */
3974 static int
3975 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3976 tree cbase, HOST_WIDE_INT ratio,
3977 bool address_p)
3979 aff_tree ubase_aff, cbase_aff;
3980 tree expr, ub, cb;
3982 STRIP_NOPS (ubase);
3983 STRIP_NOPS (cbase);
3984 ub = ubase;
3985 cb = cbase;
3987 if ((TREE_CODE (ubase) == INTEGER_CST)
3988 && (TREE_CODE (cbase) == INTEGER_CST))
3989 return -1;
3991 /* Strips the constant part. */
3992 if (TREE_CODE (ubase) == PLUS_EXPR
3993 || TREE_CODE (ubase) == MINUS_EXPR
3994 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3996 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3997 ubase = TREE_OPERAND (ubase, 0);
4000 /* Strips the constant part. */
4001 if (TREE_CODE (cbase) == PLUS_EXPR
4002 || TREE_CODE (cbase) == MINUS_EXPR
4003 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4005 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4006 cbase = TREE_OPERAND (cbase, 0);
4009 if (address_p)
4011 if (((TREE_CODE (ubase) == SSA_NAME)
4012 || (TREE_CODE (ubase) == ADDR_EXPR
4013 && is_gimple_min_invariant (ubase)))
4014 && (TREE_CODE (cbase) == INTEGER_CST))
4015 return -1;
4017 if (((TREE_CODE (cbase) == SSA_NAME)
4018 || (TREE_CODE (cbase) == ADDR_EXPR
4019 && is_gimple_min_invariant (cbase)))
4020 && (TREE_CODE (ubase) == INTEGER_CST))
4021 return -1;
4024 if (ratio == 1)
4026 if (operand_equal_p (ubase, cbase, 0))
4027 return -1;
4029 if (TREE_CODE (ubase) == ADDR_EXPR
4030 && TREE_CODE (cbase) == ADDR_EXPR)
4032 tree usym, csym;
4034 usym = TREE_OPERAND (ubase, 0);
4035 csym = TREE_OPERAND (cbase, 0);
4036 if (TREE_CODE (usym) == ARRAY_REF)
4038 tree ind = TREE_OPERAND (usym, 1);
4039 if (TREE_CODE (ind) == INTEGER_CST
4040 && tree_fits_shwi_p (ind)
4041 && tree_to_shwi (ind) == 0)
4042 usym = TREE_OPERAND (usym, 0);
4044 if (TREE_CODE (csym) == ARRAY_REF)
4046 tree ind = TREE_OPERAND (csym, 1);
4047 if (TREE_CODE (ind) == INTEGER_CST
4048 && tree_fits_shwi_p (ind)
4049 && tree_to_shwi (ind) == 0)
4050 csym = TREE_OPERAND (csym, 0);
4052 if (operand_equal_p (usym, csym, 0))
4053 return -1;
4055 /* Now do more complex comparison */
4056 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4057 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4058 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4059 return -1;
4062 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4063 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4065 aff_combination_scale (&cbase_aff, -1 * ratio);
4066 aff_combination_add (&ubase_aff, &cbase_aff);
4067 expr = aff_combination_to_tree (&ubase_aff);
4068 return get_expr_id (data, expr);
4073 /* Determines the cost of the computation by that USE is expressed
4074 from induction variable CAND. If ADDRESS_P is true, we just need
4075 to create an address from it, otherwise we want to get it into
4076 register. A set of invariants we depend on is stored in
4077 DEPENDS_ON. AT is the statement at that the value is computed.
4078 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4079 addressing is likely. */
4081 static comp_cost
4082 get_computation_cost_at (struct ivopts_data *data,
4083 struct iv_use *use, struct iv_cand *cand,
4084 bool address_p, bitmap *depends_on, gimple at,
4085 bool *can_autoinc,
4086 int *inv_expr_id)
4088 tree ubase = use->iv->base, ustep = use->iv->step;
4089 tree cbase, cstep;
4090 tree utype = TREE_TYPE (ubase), ctype;
4091 unsigned HOST_WIDE_INT cstepi, offset = 0;
4092 HOST_WIDE_INT ratio, aratio;
4093 bool var_present, symbol_present, stmt_is_after_inc;
4094 comp_cost cost;
4095 widest_int rat;
4096 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4097 enum machine_mode mem_mode = (address_p
4098 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4099 : VOIDmode);
4101 *depends_on = NULL;
4103 /* Only consider real candidates. */
4104 if (!cand->iv)
4105 return infinite_cost;
4107 cbase = cand->iv->base;
4108 cstep = cand->iv->step;
4109 ctype = TREE_TYPE (cbase);
4111 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4113 /* We do not have a precision to express the values of use. */
4114 return infinite_cost;
4117 if (address_p
4118 || (use->iv->base_object
4119 && cand->iv->base_object
4120 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4121 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4123 /* Do not try to express address of an object with computation based
4124 on address of a different object. This may cause problems in rtl
4125 level alias analysis (that does not expect this to be happening,
4126 as this is illegal in C), and would be unlikely to be useful
4127 anyway. */
4128 if (use->iv->base_object
4129 && cand->iv->base_object
4130 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4131 return infinite_cost;
4134 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4136 /* TODO -- add direct handling of this case. */
4137 goto fallback;
4140 /* CSTEPI is removed from the offset in case statement is after the
4141 increment. If the step is not constant, we use zero instead.
4142 This is a bit imprecise (there is the extra addition), but
4143 redundancy elimination is likely to transform the code so that
4144 it uses value of the variable before increment anyway,
4145 so it is not that much unrealistic. */
4146 if (cst_and_fits_in_hwi (cstep))
4147 cstepi = int_cst_value (cstep);
4148 else
4149 cstepi = 0;
4151 if (!constant_multiple_of (ustep, cstep, &rat))
4152 return infinite_cost;
4154 if (wi::fits_shwi_p (rat))
4155 ratio = rat.to_shwi ();
4156 else
4157 return infinite_cost;
4159 STRIP_NOPS (cbase);
4160 ctype = TREE_TYPE (cbase);
4162 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4164 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4165 or ratio == 1, it is better to handle this like
4167 ubase - ratio * cbase + ratio * var
4169 (also holds in the case ratio == -1, TODO. */
4171 if (cst_and_fits_in_hwi (cbase))
4173 offset = - ratio * int_cst_value (cbase);
4174 cost = difference_cost (data,
4175 ubase, build_int_cst (utype, 0),
4176 &symbol_present, &var_present, &offset,
4177 depends_on);
4178 cost.cost /= avg_loop_niter (data->current_loop);
4180 else if (ratio == 1)
4182 tree real_cbase = cbase;
4184 /* Check to see if any adjustment is needed. */
4185 if (cstepi == 0 && stmt_is_after_inc)
4187 aff_tree real_cbase_aff;
4188 aff_tree cstep_aff;
4190 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4191 &real_cbase_aff);
4192 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4194 aff_combination_add (&real_cbase_aff, &cstep_aff);
4195 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4198 cost = difference_cost (data,
4199 ubase, real_cbase,
4200 &symbol_present, &var_present, &offset,
4201 depends_on);
4202 cost.cost /= avg_loop_niter (data->current_loop);
4204 else if (address_p
4205 && !POINTER_TYPE_P (ctype)
4206 && multiplier_allowed_in_address_p
4207 (ratio, mem_mode,
4208 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4210 cbase
4211 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4212 cost = difference_cost (data,
4213 ubase, cbase,
4214 &symbol_present, &var_present, &offset,
4215 depends_on);
4216 cost.cost /= avg_loop_niter (data->current_loop);
4218 else
4220 cost = force_var_cost (data, cbase, depends_on);
4221 cost = add_costs (cost,
4222 difference_cost (data,
4223 ubase, build_int_cst (utype, 0),
4224 &symbol_present, &var_present,
4225 &offset, depends_on));
4226 cost.cost /= avg_loop_niter (data->current_loop);
4227 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4230 if (inv_expr_id)
4232 *inv_expr_id =
4233 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4234 /* Clear depends on. */
4235 if (*inv_expr_id != -1 && depends_on && *depends_on)
4236 bitmap_clear (*depends_on);
4239 /* If we are after the increment, the value of the candidate is higher by
4240 one iteration. */
4241 if (stmt_is_after_inc)
4242 offset -= ratio * cstepi;
4244 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4245 (symbol/var1/const parts may be omitted). If we are looking for an
4246 address, find the cost of addressing this. */
4247 if (address_p)
4248 return add_costs (cost,
4249 get_address_cost (symbol_present, var_present,
4250 offset, ratio, cstepi,
4251 mem_mode,
4252 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4253 speed, stmt_is_after_inc,
4254 can_autoinc));
4256 /* Otherwise estimate the costs for computing the expression. */
4257 if (!symbol_present && !var_present && !offset)
4259 if (ratio != 1)
4260 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4261 return cost;
4264 /* Symbol + offset should be compile-time computable so consider that they
4265 are added once to the variable, if present. */
4266 if (var_present && (symbol_present || offset))
4267 cost.cost += adjust_setup_cost (data,
4268 add_cost (speed, TYPE_MODE (ctype)));
4270 /* Having offset does not affect runtime cost in case it is added to
4271 symbol, but it increases complexity. */
4272 if (offset)
4273 cost.complexity++;
4275 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4277 aratio = ratio > 0 ? ratio : -ratio;
4278 if (aratio != 1)
4279 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4280 return cost;
4282 fallback:
4283 if (can_autoinc)
4284 *can_autoinc = false;
4287 /* Just get the expression, expand it and measure the cost. */
4288 tree comp = get_computation_at (data->current_loop, use, cand, at);
4290 if (!comp)
4291 return infinite_cost;
4293 if (address_p)
4294 comp = build_simple_mem_ref (comp);
4296 return new_cost (computation_cost (comp, speed), 0);
4300 /* Determines the cost of the computation by that USE is expressed
4301 from induction variable CAND. If ADDRESS_P is true, we just need
4302 to create an address from it, otherwise we want to get it into
4303 register. A set of invariants we depend on is stored in
4304 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4305 autoinc addressing is likely. */
4307 static comp_cost
4308 get_computation_cost (struct ivopts_data *data,
4309 struct iv_use *use, struct iv_cand *cand,
4310 bool address_p, bitmap *depends_on,
4311 bool *can_autoinc, int *inv_expr_id)
4313 return get_computation_cost_at (data,
4314 use, cand, address_p, depends_on, use->stmt,
4315 can_autoinc, inv_expr_id);
4318 /* Determines cost of basing replacement of USE on CAND in a generic
4319 expression. */
4321 static bool
4322 determine_use_iv_cost_generic (struct ivopts_data *data,
4323 struct iv_use *use, struct iv_cand *cand)
4325 bitmap depends_on;
4326 comp_cost cost;
4327 int inv_expr_id = -1;
4329 /* The simple case first -- if we need to express value of the preserved
4330 original biv, the cost is 0. This also prevents us from counting the
4331 cost of increment twice -- once at this use and once in the cost of
4332 the candidate. */
4333 if (cand->pos == IP_ORIGINAL
4334 && cand->incremented_at == use->stmt)
4336 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4337 ERROR_MARK, -1);
4338 return true;
4341 cost = get_computation_cost (data, use, cand, false, &depends_on,
4342 NULL, &inv_expr_id);
4344 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4345 inv_expr_id);
4347 return !infinite_cost_p (cost);
4350 /* Determines cost of basing replacement of USE on CAND in an address. */
4352 static bool
4353 determine_use_iv_cost_address (struct ivopts_data *data,
4354 struct iv_use *use, struct iv_cand *cand)
4356 bitmap depends_on;
4357 bool can_autoinc;
4358 int inv_expr_id = -1;
4359 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4360 &can_autoinc, &inv_expr_id);
4362 if (cand->ainc_use == use)
4364 if (can_autoinc)
4365 cost.cost -= cand->cost_step;
4366 /* If we generated the candidate solely for exploiting autoincrement
4367 opportunities, and it turns out it can't be used, set the cost to
4368 infinity to make sure we ignore it. */
4369 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4370 cost = infinite_cost;
4372 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4373 inv_expr_id);
4375 return !infinite_cost_p (cost);
4378 /* Computes value of candidate CAND at position AT in iteration NITER, and
4379 stores it to VAL. */
4381 static void
4382 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4383 aff_tree *val)
4385 aff_tree step, delta, nit;
4386 struct iv *iv = cand->iv;
4387 tree type = TREE_TYPE (iv->base);
4388 tree steptype = type;
4389 if (POINTER_TYPE_P (type))
4390 steptype = sizetype;
4391 steptype = unsigned_type_for (type);
4393 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4394 aff_combination_convert (&step, steptype);
4395 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4396 aff_combination_convert (&nit, steptype);
4397 aff_combination_mult (&nit, &step, &delta);
4398 if (stmt_after_increment (loop, cand, at))
4399 aff_combination_add (&delta, &step);
4401 tree_to_aff_combination (iv->base, type, val);
4402 if (!POINTER_TYPE_P (type))
4403 aff_combination_convert (val, steptype);
4404 aff_combination_add (val, &delta);
4407 /* Returns period of induction variable iv. */
4409 static tree
4410 iv_period (struct iv *iv)
4412 tree step = iv->step, period, type;
4413 tree pow2div;
4415 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4417 type = unsigned_type_for (TREE_TYPE (step));
4418 /* Period of the iv is lcm (step, type_range)/step -1,
4419 i.e., N*type_range/step - 1. Since type range is power
4420 of two, N == (step >> num_of_ending_zeros_binary (step),
4421 so the final result is
4423 (type_range >> num_of_ending_zeros_binary (step)) - 1
4426 pow2div = num_ending_zeros (step);
4428 period = build_low_bits_mask (type,
4429 (TYPE_PRECISION (type)
4430 - tree_to_uhwi (pow2div)));
4432 return period;
4435 /* Returns the comparison operator used when eliminating the iv USE. */
4437 static enum tree_code
4438 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4440 struct loop *loop = data->current_loop;
4441 basic_block ex_bb;
4442 edge exit;
4444 ex_bb = gimple_bb (use->stmt);
4445 exit = EDGE_SUCC (ex_bb, 0);
4446 if (flow_bb_inside_loop_p (loop, exit->dest))
4447 exit = EDGE_SUCC (ex_bb, 1);
4449 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4452 static tree
4453 strip_wrap_conserving_type_conversions (tree exp)
4455 while (tree_ssa_useless_type_conversion (exp)
4456 && (nowrap_type_p (TREE_TYPE (exp))
4457 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4458 exp = TREE_OPERAND (exp, 0);
4459 return exp;
4462 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4463 check for an exact match. */
4465 static bool
4466 expr_equal_p (tree e, tree what)
4468 gimple stmt;
4469 enum tree_code code;
4471 e = strip_wrap_conserving_type_conversions (e);
4472 what = strip_wrap_conserving_type_conversions (what);
4474 code = TREE_CODE (what);
4475 if (TREE_TYPE (e) != TREE_TYPE (what))
4476 return false;
4478 if (operand_equal_p (e, what, 0))
4479 return true;
4481 if (TREE_CODE (e) != SSA_NAME)
4482 return false;
4484 stmt = SSA_NAME_DEF_STMT (e);
4485 if (gimple_code (stmt) != GIMPLE_ASSIGN
4486 || gimple_assign_rhs_code (stmt) != code)
4487 return false;
4489 switch (get_gimple_rhs_class (code))
4491 case GIMPLE_BINARY_RHS:
4492 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4493 return false;
4494 /* Fallthru. */
4496 case GIMPLE_UNARY_RHS:
4497 case GIMPLE_SINGLE_RHS:
4498 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4499 default:
4500 return false;
4504 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4505 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4506 calculation is performed in non-wrapping type.
4508 TODO: More generally, we could test for the situation that
4509 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4510 This would require knowing the sign of OFFSET.
4512 Also, we only look for the first addition in the computation of BASE.
4513 More complex analysis would be better, but introducing it just for
4514 this optimization seems like an overkill. */
4516 static bool
4517 difference_cannot_overflow_p (tree base, tree offset)
4519 enum tree_code code;
4520 tree e1, e2;
4522 if (!nowrap_type_p (TREE_TYPE (base)))
4523 return false;
4525 base = expand_simple_operations (base);
4527 if (TREE_CODE (base) == SSA_NAME)
4529 gimple stmt = SSA_NAME_DEF_STMT (base);
4531 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4532 return false;
4534 code = gimple_assign_rhs_code (stmt);
4535 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4536 return false;
4538 e1 = gimple_assign_rhs1 (stmt);
4539 e2 = gimple_assign_rhs2 (stmt);
4541 else
4543 code = TREE_CODE (base);
4544 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4545 return false;
4546 e1 = TREE_OPERAND (base, 0);
4547 e2 = TREE_OPERAND (base, 1);
4550 /* TODO: deeper inspection may be necessary to prove the equality. */
4551 switch (code)
4553 case PLUS_EXPR:
4554 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4555 case POINTER_PLUS_EXPR:
4556 return expr_equal_p (e2, offset);
4558 default:
4559 return false;
4563 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4564 comparison with CAND. NITER describes the number of iterations of
4565 the loops. If successful, the comparison in COMP_P is altered accordingly.
4567 We aim to handle the following situation:
4569 sometype *base, *p;
4570 int a, b, i;
4572 i = a;
4573 p = p_0 = base + a;
4577 bla (*p);
4578 p++;
4579 i++;
4581 while (i < b);
4583 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4584 We aim to optimize this to
4586 p = p_0 = base + a;
4589 bla (*p);
4590 p++;
4592 while (p < p_0 - a + b);
4594 This preserves the correctness, since the pointer arithmetics does not
4595 overflow. More precisely:
4597 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4598 overflow in computing it or the values of p.
4599 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4600 overflow. To prove this, we use the fact that p_0 = base + a. */
4602 static bool
4603 iv_elimination_compare_lt (struct ivopts_data *data,
4604 struct iv_cand *cand, enum tree_code *comp_p,
4605 struct tree_niter_desc *niter)
4607 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4608 struct aff_tree nit, tmpa, tmpb;
4609 enum tree_code comp;
4610 HOST_WIDE_INT step;
4612 /* We need to know that the candidate induction variable does not overflow.
4613 While more complex analysis may be used to prove this, for now just
4614 check that the variable appears in the original program and that it
4615 is computed in a type that guarantees no overflows. */
4616 cand_type = TREE_TYPE (cand->iv->base);
4617 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4618 return false;
4620 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4621 the calculation of the BOUND could overflow, making the comparison
4622 invalid. */
4623 if (!data->loop_single_exit_p)
4624 return false;
4626 /* We need to be able to decide whether candidate is increasing or decreasing
4627 in order to choose the right comparison operator. */
4628 if (!cst_and_fits_in_hwi (cand->iv->step))
4629 return false;
4630 step = int_cst_value (cand->iv->step);
4632 /* Check that the number of iterations matches the expected pattern:
4633 a + 1 > b ? 0 : b - a - 1. */
4634 mbz = niter->may_be_zero;
4635 if (TREE_CODE (mbz) == GT_EXPR)
4637 /* Handle a + 1 > b. */
4638 tree op0 = TREE_OPERAND (mbz, 0);
4639 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4641 a = TREE_OPERAND (op0, 0);
4642 b = TREE_OPERAND (mbz, 1);
4644 else
4645 return false;
4647 else if (TREE_CODE (mbz) == LT_EXPR)
4649 tree op1 = TREE_OPERAND (mbz, 1);
4651 /* Handle b < a + 1. */
4652 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4654 a = TREE_OPERAND (op1, 0);
4655 b = TREE_OPERAND (mbz, 0);
4657 else
4658 return false;
4660 else
4661 return false;
4663 /* Expected number of iterations is B - A - 1. Check that it matches
4664 the actual number, i.e., that B - A - NITER = 1. */
4665 tree_to_aff_combination (niter->niter, nit_type, &nit);
4666 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4667 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4668 aff_combination_scale (&nit, -1);
4669 aff_combination_scale (&tmpa, -1);
4670 aff_combination_add (&tmpb, &tmpa);
4671 aff_combination_add (&tmpb, &nit);
4672 if (tmpb.n != 0 || tmpb.offset != 1)
4673 return false;
4675 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4676 overflow. */
4677 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4678 cand->iv->step,
4679 fold_convert (TREE_TYPE (cand->iv->step), a));
4680 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4681 return false;
4683 /* Determine the new comparison operator. */
4684 comp = step < 0 ? GT_EXPR : LT_EXPR;
4685 if (*comp_p == NE_EXPR)
4686 *comp_p = comp;
4687 else if (*comp_p == EQ_EXPR)
4688 *comp_p = invert_tree_comparison (comp, false);
4689 else
4690 gcc_unreachable ();
4692 return true;
4695 /* Check whether it is possible to express the condition in USE by comparison
4696 of candidate CAND. If so, store the value compared with to BOUND, and the
4697 comparison operator to COMP. */
4699 static bool
4700 may_eliminate_iv (struct ivopts_data *data,
4701 struct iv_use *use, struct iv_cand *cand, tree *bound,
4702 enum tree_code *comp)
4704 basic_block ex_bb;
4705 edge exit;
4706 tree period;
4707 struct loop *loop = data->current_loop;
4708 aff_tree bnd;
4709 struct tree_niter_desc *desc = NULL;
4711 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4712 return false;
4714 /* For now works only for exits that dominate the loop latch.
4715 TODO: extend to other conditions inside loop body. */
4716 ex_bb = gimple_bb (use->stmt);
4717 if (use->stmt != last_stmt (ex_bb)
4718 || gimple_code (use->stmt) != GIMPLE_COND
4719 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4720 return false;
4722 exit = EDGE_SUCC (ex_bb, 0);
4723 if (flow_bb_inside_loop_p (loop, exit->dest))
4724 exit = EDGE_SUCC (ex_bb, 1);
4725 if (flow_bb_inside_loop_p (loop, exit->dest))
4726 return false;
4728 desc = niter_for_exit (data, exit);
4729 if (!desc)
4730 return false;
4732 /* Determine whether we can use the variable to test the exit condition.
4733 This is the case iff the period of the induction variable is greater
4734 than the number of iterations for which the exit condition is true. */
4735 period = iv_period (cand->iv);
4737 /* If the number of iterations is constant, compare against it directly. */
4738 if (TREE_CODE (desc->niter) == INTEGER_CST)
4740 /* See cand_value_at. */
4741 if (stmt_after_increment (loop, cand, use->stmt))
4743 if (!tree_int_cst_lt (desc->niter, period))
4744 return false;
4746 else
4748 if (tree_int_cst_lt (period, desc->niter))
4749 return false;
4753 /* If not, and if this is the only possible exit of the loop, see whether
4754 we can get a conservative estimate on the number of iterations of the
4755 entire loop and compare against that instead. */
4756 else
4758 widest_int period_value, max_niter;
4760 max_niter = desc->max;
4761 if (stmt_after_increment (loop, cand, use->stmt))
4762 max_niter += 1;
4763 period_value = wi::to_widest (period);
4764 if (wi::gtu_p (max_niter, period_value))
4766 /* See if we can take advantage of inferred loop bound information. */
4767 if (data->loop_single_exit_p)
4769 if (!max_loop_iterations (loop, &max_niter))
4770 return false;
4771 /* The loop bound is already adjusted by adding 1. */
4772 if (wi::gtu_p (max_niter, period_value))
4773 return false;
4775 else
4776 return false;
4780 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4782 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4783 aff_combination_to_tree (&bnd));
4784 *comp = iv_elimination_compare (data, use);
4786 /* It is unlikely that computing the number of iterations using division
4787 would be more profitable than keeping the original induction variable. */
4788 if (expression_expensive_p (*bound))
4789 return false;
4791 /* Sometimes, it is possible to handle the situation that the number of
4792 iterations may be zero unless additional assumtions by using <
4793 instead of != in the exit condition.
4795 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4796 base the exit condition on it. However, that is often too
4797 expensive. */
4798 if (!integer_zerop (desc->may_be_zero))
4799 return iv_elimination_compare_lt (data, cand, comp, desc);
4801 return true;
4804 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4805 be copied, if is is used in the loop body and DATA->body_includes_call. */
4807 static int
4808 parm_decl_cost (struct ivopts_data *data, tree bound)
4810 tree sbound = bound;
4811 STRIP_NOPS (sbound);
4813 if (TREE_CODE (sbound) == SSA_NAME
4814 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4815 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4816 && data->body_includes_call)
4817 return COSTS_N_INSNS (1);
4819 return 0;
4822 /* Determines cost of basing replacement of USE on CAND in a condition. */
4824 static bool
4825 determine_use_iv_cost_condition (struct ivopts_data *data,
4826 struct iv_use *use, struct iv_cand *cand)
4828 tree bound = NULL_TREE;
4829 struct iv *cmp_iv;
4830 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4831 comp_cost elim_cost, express_cost, cost, bound_cost;
4832 bool ok;
4833 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4834 tree *control_var, *bound_cst;
4835 enum tree_code comp = ERROR_MARK;
4837 /* Only consider real candidates. */
4838 if (!cand->iv)
4840 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4841 ERROR_MARK, -1);
4842 return false;
4845 /* Try iv elimination. */
4846 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4848 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4849 if (elim_cost.cost == 0)
4850 elim_cost.cost = parm_decl_cost (data, bound);
4851 else if (TREE_CODE (bound) == INTEGER_CST)
4852 elim_cost.cost = 0;
4853 /* If we replace a loop condition 'i < n' with 'p < base + n',
4854 depends_on_elim will have 'base' and 'n' set, which implies
4855 that both 'base' and 'n' will be live during the loop. More likely,
4856 'base + n' will be loop invariant, resulting in only one live value
4857 during the loop. So in that case we clear depends_on_elim and set
4858 elim_inv_expr_id instead. */
4859 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4861 elim_inv_expr_id = get_expr_id (data, bound);
4862 bitmap_clear (depends_on_elim);
4864 /* The bound is a loop invariant, so it will be only computed
4865 once. */
4866 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4868 else
4869 elim_cost = infinite_cost;
4871 /* Try expressing the original giv. If it is compared with an invariant,
4872 note that we cannot get rid of it. */
4873 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4874 NULL, &cmp_iv);
4875 gcc_assert (ok);
4877 /* When the condition is a comparison of the candidate IV against
4878 zero, prefer this IV.
4880 TODO: The constant that we're subtracting from the cost should
4881 be target-dependent. This information should be added to the
4882 target costs for each backend. */
4883 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4884 && integer_zerop (*bound_cst)
4885 && (operand_equal_p (*control_var, cand->var_after, 0)
4886 || operand_equal_p (*control_var, cand->var_before, 0)))
4887 elim_cost.cost -= 1;
4889 express_cost = get_computation_cost (data, use, cand, false,
4890 &depends_on_express, NULL,
4891 &express_inv_expr_id);
4892 fd_ivopts_data = data;
4893 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4895 /* Count the cost of the original bound as well. */
4896 bound_cost = force_var_cost (data, *bound_cst, NULL);
4897 if (bound_cost.cost == 0)
4898 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4899 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4900 bound_cost.cost = 0;
4901 express_cost.cost += bound_cost.cost;
4903 /* Choose the better approach, preferring the eliminated IV. */
4904 if (compare_costs (elim_cost, express_cost) <= 0)
4906 cost = elim_cost;
4907 depends_on = depends_on_elim;
4908 depends_on_elim = NULL;
4909 inv_expr_id = elim_inv_expr_id;
4911 else
4913 cost = express_cost;
4914 depends_on = depends_on_express;
4915 depends_on_express = NULL;
4916 bound = NULL_TREE;
4917 comp = ERROR_MARK;
4918 inv_expr_id = express_inv_expr_id;
4921 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4923 if (depends_on_elim)
4924 BITMAP_FREE (depends_on_elim);
4925 if (depends_on_express)
4926 BITMAP_FREE (depends_on_express);
4928 return !infinite_cost_p (cost);
4931 /* Determines cost of basing replacement of USE on CAND. Returns false
4932 if USE cannot be based on CAND. */
4934 static bool
4935 determine_use_iv_cost (struct ivopts_data *data,
4936 struct iv_use *use, struct iv_cand *cand)
4938 switch (use->type)
4940 case USE_NONLINEAR_EXPR:
4941 return determine_use_iv_cost_generic (data, use, cand);
4943 case USE_ADDRESS:
4944 return determine_use_iv_cost_address (data, use, cand);
4946 case USE_COMPARE:
4947 return determine_use_iv_cost_condition (data, use, cand);
4949 default:
4950 gcc_unreachable ();
4954 /* Return true if get_computation_cost indicates that autoincrement is
4955 a possibility for the pair of USE and CAND, false otherwise. */
4957 static bool
4958 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4959 struct iv_cand *cand)
4961 bitmap depends_on;
4962 bool can_autoinc;
4963 comp_cost cost;
4965 if (use->type != USE_ADDRESS)
4966 return false;
4968 cost = get_computation_cost (data, use, cand, true, &depends_on,
4969 &can_autoinc, NULL);
4971 BITMAP_FREE (depends_on);
4973 return !infinite_cost_p (cost) && can_autoinc;
4976 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4977 use that allows autoincrement, and set their AINC_USE if possible. */
4979 static void
4980 set_autoinc_for_original_candidates (struct ivopts_data *data)
4982 unsigned i, j;
4984 for (i = 0; i < n_iv_cands (data); i++)
4986 struct iv_cand *cand = iv_cand (data, i);
4987 struct iv_use *closest_before = NULL;
4988 struct iv_use *closest_after = NULL;
4989 if (cand->pos != IP_ORIGINAL)
4990 continue;
4992 for (j = 0; j < n_iv_uses (data); j++)
4994 struct iv_use *use = iv_use (data, j);
4995 unsigned uid = gimple_uid (use->stmt);
4997 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4998 continue;
5000 if (uid < gimple_uid (cand->incremented_at)
5001 && (closest_before == NULL
5002 || uid > gimple_uid (closest_before->stmt)))
5003 closest_before = use;
5005 if (uid > gimple_uid (cand->incremented_at)
5006 && (closest_after == NULL
5007 || uid < gimple_uid (closest_after->stmt)))
5008 closest_after = use;
5011 if (closest_before != NULL
5012 && autoinc_possible_for_pair (data, closest_before, cand))
5013 cand->ainc_use = closest_before;
5014 else if (closest_after != NULL
5015 && autoinc_possible_for_pair (data, closest_after, cand))
5016 cand->ainc_use = closest_after;
5020 /* Finds the candidates for the induction variables. */
5022 static void
5023 find_iv_candidates (struct ivopts_data *data)
5025 /* Add commonly used ivs. */
5026 add_standard_iv_candidates (data);
5028 /* Add old induction variables. */
5029 add_old_ivs_candidates (data);
5031 /* Add induction variables derived from uses. */
5032 add_derived_ivs_candidates (data);
5034 set_autoinc_for_original_candidates (data);
5036 /* Record the important candidates. */
5037 record_important_candidates (data);
5040 /* Determines costs of basing the use of the iv on an iv candidate. */
5042 static void
5043 determine_use_iv_costs (struct ivopts_data *data)
5045 unsigned i, j;
5046 struct iv_use *use;
5047 struct iv_cand *cand;
5048 bitmap to_clear = BITMAP_ALLOC (NULL);
5050 alloc_use_cost_map (data);
5052 for (i = 0; i < n_iv_uses (data); i++)
5054 use = iv_use (data, i);
5056 if (data->consider_all_candidates)
5058 for (j = 0; j < n_iv_cands (data); j++)
5060 cand = iv_cand (data, j);
5061 determine_use_iv_cost (data, use, cand);
5064 else
5066 bitmap_iterator bi;
5068 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5070 cand = iv_cand (data, j);
5071 if (!determine_use_iv_cost (data, use, cand))
5072 bitmap_set_bit (to_clear, j);
5075 /* Remove the candidates for that the cost is infinite from
5076 the list of related candidates. */
5077 bitmap_and_compl_into (use->related_cands, to_clear);
5078 bitmap_clear (to_clear);
5082 BITMAP_FREE (to_clear);
5084 if (dump_file && (dump_flags & TDF_DETAILS))
5086 fprintf (dump_file, "Use-candidate costs:\n");
5088 for (i = 0; i < n_iv_uses (data); i++)
5090 use = iv_use (data, i);
5092 fprintf (dump_file, "Use %d:\n", i);
5093 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5094 for (j = 0; j < use->n_map_members; j++)
5096 if (!use->cost_map[j].cand
5097 || infinite_cost_p (use->cost_map[j].cost))
5098 continue;
5100 fprintf (dump_file, " %d\t%d\t%d\t",
5101 use->cost_map[j].cand->id,
5102 use->cost_map[j].cost.cost,
5103 use->cost_map[j].cost.complexity);
5104 if (use->cost_map[j].depends_on)
5105 bitmap_print (dump_file,
5106 use->cost_map[j].depends_on, "","");
5107 if (use->cost_map[j].inv_expr_id != -1)
5108 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5109 fprintf (dump_file, "\n");
5112 fprintf (dump_file, "\n");
5114 fprintf (dump_file, "\n");
5118 /* Determines cost of the candidate CAND. */
5120 static void
5121 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5123 comp_cost cost_base;
5124 unsigned cost, cost_step;
5125 tree base;
5127 if (!cand->iv)
5129 cand->cost = 0;
5130 return;
5133 /* There are two costs associated with the candidate -- its increment
5134 and its initialization. The second is almost negligible for any loop
5135 that rolls enough, so we take it just very little into account. */
5137 base = cand->iv->base;
5138 cost_base = force_var_cost (data, base, NULL);
5139 /* It will be exceptional that the iv register happens to be initialized with
5140 the proper value at no cost. In general, there will at least be a regcopy
5141 or a const set. */
5142 if (cost_base.cost == 0)
5143 cost_base.cost = COSTS_N_INSNS (1);
5144 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5146 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5148 /* Prefer the original ivs unless we may gain something by replacing it.
5149 The reason is to make debugging simpler; so this is not relevant for
5150 artificial ivs created by other optimization passes. */
5151 if (cand->pos != IP_ORIGINAL
5152 || !SSA_NAME_VAR (cand->var_before)
5153 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5154 cost++;
5156 /* Prefer not to insert statements into latch unless there are some
5157 already (so that we do not create unnecessary jumps). */
5158 if (cand->pos == IP_END
5159 && empty_block_p (ip_end_pos (data->current_loop)))
5160 cost++;
5162 cand->cost = cost;
5163 cand->cost_step = cost_step;
5166 /* Determines costs of computation of the candidates. */
5168 static void
5169 determine_iv_costs (struct ivopts_data *data)
5171 unsigned i;
5173 if (dump_file && (dump_flags & TDF_DETAILS))
5175 fprintf (dump_file, "Candidate costs:\n");
5176 fprintf (dump_file, " cand\tcost\n");
5179 for (i = 0; i < n_iv_cands (data); i++)
5181 struct iv_cand *cand = iv_cand (data, i);
5183 determine_iv_cost (data, cand);
5185 if (dump_file && (dump_flags & TDF_DETAILS))
5186 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5189 if (dump_file && (dump_flags & TDF_DETAILS))
5190 fprintf (dump_file, "\n");
5193 /* Calculates cost for having SIZE induction variables. */
5195 static unsigned
5196 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5198 /* We add size to the cost, so that we prefer eliminating ivs
5199 if possible. */
5200 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5201 data->body_includes_call);
5204 /* For each size of the induction variable set determine the penalty. */
5206 static void
5207 determine_set_costs (struct ivopts_data *data)
5209 unsigned j, n;
5210 gimple phi;
5211 gimple_stmt_iterator psi;
5212 tree op;
5213 struct loop *loop = data->current_loop;
5214 bitmap_iterator bi;
5216 if (dump_file && (dump_flags & TDF_DETAILS))
5218 fprintf (dump_file, "Global costs:\n");
5219 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5220 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5221 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5222 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5225 n = 0;
5226 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5228 phi = gsi_stmt (psi);
5229 op = PHI_RESULT (phi);
5231 if (virtual_operand_p (op))
5232 continue;
5234 if (get_iv (data, op))
5235 continue;
5237 n++;
5240 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5242 struct version_info *info = ver_info (data, j);
5244 if (info->inv_id && info->has_nonlin_use)
5245 n++;
5248 data->regs_used = n;
5249 if (dump_file && (dump_flags & TDF_DETAILS))
5250 fprintf (dump_file, " regs_used %d\n", n);
5252 if (dump_file && (dump_flags & TDF_DETAILS))
5254 fprintf (dump_file, " cost for size:\n");
5255 fprintf (dump_file, " ivs\tcost\n");
5256 for (j = 0; j <= 2 * target_avail_regs; j++)
5257 fprintf (dump_file, " %d\t%d\n", j,
5258 ivopts_global_cost_for_size (data, j));
5259 fprintf (dump_file, "\n");
5263 /* Returns true if A is a cheaper cost pair than B. */
5265 static bool
5266 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5268 int cmp;
5270 if (!a)
5271 return false;
5273 if (!b)
5274 return true;
5276 cmp = compare_costs (a->cost, b->cost);
5277 if (cmp < 0)
5278 return true;
5280 if (cmp > 0)
5281 return false;
5283 /* In case the costs are the same, prefer the cheaper candidate. */
5284 if (a->cand->cost < b->cand->cost)
5285 return true;
5287 return false;
5291 /* Returns candidate by that USE is expressed in IVS. */
5293 static struct cost_pair *
5294 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5296 return ivs->cand_for_use[use->id];
5299 /* Computes the cost field of IVS structure. */
5301 static void
5302 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5304 comp_cost cost = ivs->cand_use_cost;
5306 cost.cost += ivs->cand_cost;
5308 cost.cost += ivopts_global_cost_for_size (data,
5309 ivs->n_regs + ivs->num_used_inv_expr);
5311 ivs->cost = cost;
5314 /* Remove invariants in set INVS to set IVS. */
5316 static void
5317 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5319 bitmap_iterator bi;
5320 unsigned iid;
5322 if (!invs)
5323 return;
5325 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5327 ivs->n_invariant_uses[iid]--;
5328 if (ivs->n_invariant_uses[iid] == 0)
5329 ivs->n_regs--;
5333 /* Set USE not to be expressed by any candidate in IVS. */
5335 static void
5336 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5337 struct iv_use *use)
5339 unsigned uid = use->id, cid;
5340 struct cost_pair *cp;
5342 cp = ivs->cand_for_use[uid];
5343 if (!cp)
5344 return;
5345 cid = cp->cand->id;
5347 ivs->bad_uses++;
5348 ivs->cand_for_use[uid] = NULL;
5349 ivs->n_cand_uses[cid]--;
5351 if (ivs->n_cand_uses[cid] == 0)
5353 bitmap_clear_bit (ivs->cands, cid);
5354 /* Do not count the pseudocandidates. */
5355 if (cp->cand->iv)
5356 ivs->n_regs--;
5357 ivs->n_cands--;
5358 ivs->cand_cost -= cp->cand->cost;
5360 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5363 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5365 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5367 if (cp->inv_expr_id != -1)
5369 ivs->used_inv_expr[cp->inv_expr_id]--;
5370 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5371 ivs->num_used_inv_expr--;
5373 iv_ca_recount_cost (data, ivs);
5376 /* Add invariants in set INVS to set IVS. */
5378 static void
5379 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5381 bitmap_iterator bi;
5382 unsigned iid;
5384 if (!invs)
5385 return;
5387 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5389 ivs->n_invariant_uses[iid]++;
5390 if (ivs->n_invariant_uses[iid] == 1)
5391 ivs->n_regs++;
5395 /* Set cost pair for USE in set IVS to CP. */
5397 static void
5398 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5399 struct iv_use *use, struct cost_pair *cp)
5401 unsigned uid = use->id, cid;
5403 if (ivs->cand_for_use[uid] == cp)
5404 return;
5406 if (ivs->cand_for_use[uid])
5407 iv_ca_set_no_cp (data, ivs, use);
5409 if (cp)
5411 cid = cp->cand->id;
5413 ivs->bad_uses--;
5414 ivs->cand_for_use[uid] = cp;
5415 ivs->n_cand_uses[cid]++;
5416 if (ivs->n_cand_uses[cid] == 1)
5418 bitmap_set_bit (ivs->cands, cid);
5419 /* Do not count the pseudocandidates. */
5420 if (cp->cand->iv)
5421 ivs->n_regs++;
5422 ivs->n_cands++;
5423 ivs->cand_cost += cp->cand->cost;
5425 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5428 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5429 iv_ca_set_add_invariants (ivs, cp->depends_on);
5431 if (cp->inv_expr_id != -1)
5433 ivs->used_inv_expr[cp->inv_expr_id]++;
5434 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5435 ivs->num_used_inv_expr++;
5437 iv_ca_recount_cost (data, ivs);
5441 /* Extend set IVS by expressing USE by some of the candidates in it
5442 if possible. All important candidates will be considered
5443 if IMPORTANT_CANDIDATES is true. */
5445 static void
5446 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5447 struct iv_use *use, bool important_candidates)
5449 struct cost_pair *best_cp = NULL, *cp;
5450 bitmap_iterator bi;
5451 bitmap cands;
5452 unsigned i;
5454 gcc_assert (ivs->upto >= use->id);
5456 if (ivs->upto == use->id)
5458 ivs->upto++;
5459 ivs->bad_uses++;
5462 cands = (important_candidates ? data->important_candidates : ivs->cands);
5463 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5465 struct iv_cand *cand = iv_cand (data, i);
5467 cp = get_use_iv_cost (data, use, cand);
5469 if (cheaper_cost_pair (cp, best_cp))
5470 best_cp = cp;
5473 iv_ca_set_cp (data, ivs, use, best_cp);
5476 /* Get cost for assignment IVS. */
5478 static comp_cost
5479 iv_ca_cost (struct iv_ca *ivs)
5481 /* This was a conditional expression but it triggered a bug in
5482 Sun C 5.5. */
5483 if (ivs->bad_uses)
5484 return infinite_cost;
5485 else
5486 return ivs->cost;
5489 /* Returns true if all dependences of CP are among invariants in IVS. */
5491 static bool
5492 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5494 unsigned i;
5495 bitmap_iterator bi;
5497 if (!cp->depends_on)
5498 return true;
5500 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5502 if (ivs->n_invariant_uses[i] == 0)
5503 return false;
5506 return true;
5509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5510 it before NEXT_CHANGE. */
5512 static struct iv_ca_delta *
5513 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5514 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5516 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5518 change->use = use;
5519 change->old_cp = old_cp;
5520 change->new_cp = new_cp;
5521 change->next_change = next_change;
5523 return change;
5526 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5527 are rewritten. */
5529 static struct iv_ca_delta *
5530 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5532 struct iv_ca_delta *last;
5534 if (!l2)
5535 return l1;
5537 if (!l1)
5538 return l2;
5540 for (last = l1; last->next_change; last = last->next_change)
5541 continue;
5542 last->next_change = l2;
5544 return l1;
5547 /* Reverse the list of changes DELTA, forming the inverse to it. */
5549 static struct iv_ca_delta *
5550 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5552 struct iv_ca_delta *act, *next, *prev = NULL;
5553 struct cost_pair *tmp;
5555 for (act = delta; act; act = next)
5557 next = act->next_change;
5558 act->next_change = prev;
5559 prev = act;
5561 tmp = act->old_cp;
5562 act->old_cp = act->new_cp;
5563 act->new_cp = tmp;
5566 return prev;
5569 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5570 reverted instead. */
5572 static void
5573 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5574 struct iv_ca_delta *delta, bool forward)
5576 struct cost_pair *from, *to;
5577 struct iv_ca_delta *act;
5579 if (!forward)
5580 delta = iv_ca_delta_reverse (delta);
5582 for (act = delta; act; act = act->next_change)
5584 from = act->old_cp;
5585 to = act->new_cp;
5586 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5587 iv_ca_set_cp (data, ivs, act->use, to);
5590 if (!forward)
5591 iv_ca_delta_reverse (delta);
5594 /* Returns true if CAND is used in IVS. */
5596 static bool
5597 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5599 return ivs->n_cand_uses[cand->id] > 0;
5602 /* Returns number of induction variable candidates in the set IVS. */
5604 static unsigned
5605 iv_ca_n_cands (struct iv_ca *ivs)
5607 return ivs->n_cands;
5610 /* Free the list of changes DELTA. */
5612 static void
5613 iv_ca_delta_free (struct iv_ca_delta **delta)
5615 struct iv_ca_delta *act, *next;
5617 for (act = *delta; act; act = next)
5619 next = act->next_change;
5620 free (act);
5623 *delta = NULL;
5626 /* Allocates new iv candidates assignment. */
5628 static struct iv_ca *
5629 iv_ca_new (struct ivopts_data *data)
5631 struct iv_ca *nw = XNEW (struct iv_ca);
5633 nw->upto = 0;
5634 nw->bad_uses = 0;
5635 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5636 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5637 nw->cands = BITMAP_ALLOC (NULL);
5638 nw->n_cands = 0;
5639 nw->n_regs = 0;
5640 nw->cand_use_cost = no_cost;
5641 nw->cand_cost = 0;
5642 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5643 nw->cost = no_cost;
5644 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5645 nw->num_used_inv_expr = 0;
5647 return nw;
5650 /* Free memory occupied by the set IVS. */
5652 static void
5653 iv_ca_free (struct iv_ca **ivs)
5655 free ((*ivs)->cand_for_use);
5656 free ((*ivs)->n_cand_uses);
5657 BITMAP_FREE ((*ivs)->cands);
5658 free ((*ivs)->n_invariant_uses);
5659 free ((*ivs)->used_inv_expr);
5660 free (*ivs);
5661 *ivs = NULL;
5664 /* Dumps IVS to FILE. */
5666 static void
5667 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5669 const char *pref = " invariants ";
5670 unsigned i;
5671 comp_cost cost = iv_ca_cost (ivs);
5673 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5674 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5675 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5676 bitmap_print (file, ivs->cands, " candidates: ","\n");
5678 for (i = 0; i < ivs->upto; i++)
5680 struct iv_use *use = iv_use (data, i);
5681 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5682 if (cp)
5683 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5684 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5685 else
5686 fprintf (file, " use:%d --> ??\n", use->id);
5689 for (i = 1; i <= data->max_inv_id; i++)
5690 if (ivs->n_invariant_uses[i])
5692 fprintf (file, "%s%d", pref, i);
5693 pref = ", ";
5695 fprintf (file, "\n\n");
5698 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5699 new set, and store differences in DELTA. Number of induction variables
5700 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5701 the function will try to find a solution with mimimal iv candidates. */
5703 static comp_cost
5704 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5705 struct iv_cand *cand, struct iv_ca_delta **delta,
5706 unsigned *n_ivs, bool min_ncand)
5708 unsigned i;
5709 comp_cost cost;
5710 struct iv_use *use;
5711 struct cost_pair *old_cp, *new_cp;
5713 *delta = NULL;
5714 for (i = 0; i < ivs->upto; i++)
5716 use = iv_use (data, i);
5717 old_cp = iv_ca_cand_for_use (ivs, use);
5719 if (old_cp
5720 && old_cp->cand == cand)
5721 continue;
5723 new_cp = get_use_iv_cost (data, use, cand);
5724 if (!new_cp)
5725 continue;
5727 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5728 continue;
5730 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5731 continue;
5733 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5736 iv_ca_delta_commit (data, ivs, *delta, true);
5737 cost = iv_ca_cost (ivs);
5738 if (n_ivs)
5739 *n_ivs = iv_ca_n_cands (ivs);
5740 iv_ca_delta_commit (data, ivs, *delta, false);
5742 return cost;
5745 /* Try narrowing set IVS by removing CAND. Return the cost of
5746 the new set and store the differences in DELTA. START is
5747 the candidate with which we start narrowing. */
5749 static comp_cost
5750 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5751 struct iv_cand *cand, struct iv_cand *start,
5752 struct iv_ca_delta **delta)
5754 unsigned i, ci;
5755 struct iv_use *use;
5756 struct cost_pair *old_cp, *new_cp, *cp;
5757 bitmap_iterator bi;
5758 struct iv_cand *cnd;
5759 comp_cost cost, best_cost, acost;
5761 *delta = NULL;
5762 for (i = 0; i < n_iv_uses (data); i++)
5764 use = iv_use (data, i);
5766 old_cp = iv_ca_cand_for_use (ivs, use);
5767 if (old_cp->cand != cand)
5768 continue;
5770 best_cost = iv_ca_cost (ivs);
5771 /* Start narrowing with START. */
5772 new_cp = get_use_iv_cost (data, use, start);
5774 if (data->consider_all_candidates)
5776 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5778 if (ci == cand->id || (start && ci == start->id))
5779 continue;
5781 cnd = iv_cand (data, ci);
5783 cp = get_use_iv_cost (data, use, cnd);
5784 if (!cp)
5785 continue;
5787 iv_ca_set_cp (data, ivs, use, cp);
5788 acost = iv_ca_cost (ivs);
5790 if (compare_costs (acost, best_cost) < 0)
5792 best_cost = acost;
5793 new_cp = cp;
5797 else
5799 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5801 if (ci == cand->id || (start && ci == start->id))
5802 continue;
5804 cnd = iv_cand (data, ci);
5806 cp = get_use_iv_cost (data, use, cnd);
5807 if (!cp)
5808 continue;
5810 iv_ca_set_cp (data, ivs, use, cp);
5811 acost = iv_ca_cost (ivs);
5813 if (compare_costs (acost, best_cost) < 0)
5815 best_cost = acost;
5816 new_cp = cp;
5820 /* Restore to old cp for use. */
5821 iv_ca_set_cp (data, ivs, use, old_cp);
5823 if (!new_cp)
5825 iv_ca_delta_free (delta);
5826 return infinite_cost;
5829 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5832 iv_ca_delta_commit (data, ivs, *delta, true);
5833 cost = iv_ca_cost (ivs);
5834 iv_ca_delta_commit (data, ivs, *delta, false);
5836 return cost;
5839 /* Try optimizing the set of candidates IVS by removing candidates different
5840 from to EXCEPT_CAND from it. Return cost of the new set, and store
5841 differences in DELTA. */
5843 static comp_cost
5844 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5845 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5847 bitmap_iterator bi;
5848 struct iv_ca_delta *act_delta, *best_delta;
5849 unsigned i;
5850 comp_cost best_cost, acost;
5851 struct iv_cand *cand;
5853 best_delta = NULL;
5854 best_cost = iv_ca_cost (ivs);
5856 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5858 cand = iv_cand (data, i);
5860 if (cand == except_cand)
5861 continue;
5863 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5865 if (compare_costs (acost, best_cost) < 0)
5867 best_cost = acost;
5868 iv_ca_delta_free (&best_delta);
5869 best_delta = act_delta;
5871 else
5872 iv_ca_delta_free (&act_delta);
5875 if (!best_delta)
5877 *delta = NULL;
5878 return best_cost;
5881 /* Recurse to possibly remove other unnecessary ivs. */
5882 iv_ca_delta_commit (data, ivs, best_delta, true);
5883 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5884 iv_ca_delta_commit (data, ivs, best_delta, false);
5885 *delta = iv_ca_delta_join (best_delta, *delta);
5886 return best_cost;
5889 /* Tries to extend the sets IVS in the best possible way in order
5890 to express the USE. If ORIGINALP is true, prefer candidates from
5891 the original set of IVs, otherwise favor important candidates not
5892 based on any memory object. */
5894 static bool
5895 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5896 struct iv_use *use, bool originalp)
5898 comp_cost best_cost, act_cost;
5899 unsigned i;
5900 bitmap_iterator bi;
5901 struct iv_cand *cand;
5902 struct iv_ca_delta *best_delta = NULL, *act_delta;
5903 struct cost_pair *cp;
5905 iv_ca_add_use (data, ivs, use, false);
5906 best_cost = iv_ca_cost (ivs);
5908 cp = iv_ca_cand_for_use (ivs, use);
5909 if (!cp)
5911 ivs->upto--;
5912 ivs->bad_uses--;
5913 iv_ca_add_use (data, ivs, use, true);
5914 best_cost = iv_ca_cost (ivs);
5915 cp = iv_ca_cand_for_use (ivs, use);
5917 if (cp)
5919 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5920 iv_ca_set_no_cp (data, ivs, use);
5923 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5924 first try important candidates not based on any memory object. Only if
5925 this fails, try the specific ones. Rationale -- in loops with many
5926 variables the best choice often is to use just one generic biv. If we
5927 added here many ivs specific to the uses, the optimization algorithm later
5928 would be likely to get stuck in a local minimum, thus causing us to create
5929 too many ivs. The approach from few ivs to more seems more likely to be
5930 successful -- starting from few ivs, replacing an expensive use by a
5931 specific iv should always be a win. */
5932 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5934 cand = iv_cand (data, i);
5936 if (originalp && cand->pos !=IP_ORIGINAL)
5937 continue;
5939 if (!originalp && cand->iv->base_object != NULL_TREE)
5940 continue;
5942 if (iv_ca_cand_used_p (ivs, cand))
5943 continue;
5945 cp = get_use_iv_cost (data, use, cand);
5946 if (!cp)
5947 continue;
5949 iv_ca_set_cp (data, ivs, use, cp);
5950 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5951 true);
5952 iv_ca_set_no_cp (data, ivs, use);
5953 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5955 if (compare_costs (act_cost, best_cost) < 0)
5957 best_cost = act_cost;
5959 iv_ca_delta_free (&best_delta);
5960 best_delta = act_delta;
5962 else
5963 iv_ca_delta_free (&act_delta);
5966 if (infinite_cost_p (best_cost))
5968 for (i = 0; i < use->n_map_members; i++)
5970 cp = use->cost_map + i;
5971 cand = cp->cand;
5972 if (!cand)
5973 continue;
5975 /* Already tried this. */
5976 if (cand->important)
5978 if (originalp && cand->pos == IP_ORIGINAL)
5979 continue;
5980 if (!originalp && cand->iv->base_object == NULL_TREE)
5981 continue;
5984 if (iv_ca_cand_used_p (ivs, cand))
5985 continue;
5987 act_delta = NULL;
5988 iv_ca_set_cp (data, ivs, use, cp);
5989 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5990 iv_ca_set_no_cp (data, ivs, use);
5991 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5992 cp, act_delta);
5994 if (compare_costs (act_cost, best_cost) < 0)
5996 best_cost = act_cost;
5998 if (best_delta)
5999 iv_ca_delta_free (&best_delta);
6000 best_delta = act_delta;
6002 else
6003 iv_ca_delta_free (&act_delta);
6007 iv_ca_delta_commit (data, ivs, best_delta, true);
6008 iv_ca_delta_free (&best_delta);
6010 return !infinite_cost_p (best_cost);
6013 /* Finds an initial assignment of candidates to uses. */
6015 static struct iv_ca *
6016 get_initial_solution (struct ivopts_data *data, bool originalp)
6018 struct iv_ca *ivs = iv_ca_new (data);
6019 unsigned i;
6021 for (i = 0; i < n_iv_uses (data); i++)
6022 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6024 iv_ca_free (&ivs);
6025 return NULL;
6028 return ivs;
6031 /* Tries to improve set of induction variables IVS. */
6033 static bool
6034 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6036 unsigned i, n_ivs;
6037 comp_cost acost, best_cost = iv_ca_cost (ivs);
6038 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6039 struct iv_cand *cand;
6041 /* Try extending the set of induction variables by one. */
6042 for (i = 0; i < n_iv_cands (data); i++)
6044 cand = iv_cand (data, i);
6046 if (iv_ca_cand_used_p (ivs, cand))
6047 continue;
6049 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6050 if (!act_delta)
6051 continue;
6053 /* If we successfully added the candidate and the set is small enough,
6054 try optimizing it by removing other candidates. */
6055 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6057 iv_ca_delta_commit (data, ivs, act_delta, true);
6058 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6059 iv_ca_delta_commit (data, ivs, act_delta, false);
6060 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6063 if (compare_costs (acost, best_cost) < 0)
6065 best_cost = acost;
6066 iv_ca_delta_free (&best_delta);
6067 best_delta = act_delta;
6069 else
6070 iv_ca_delta_free (&act_delta);
6073 if (!best_delta)
6075 /* Try removing the candidates from the set instead. */
6076 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6078 /* Nothing more we can do. */
6079 if (!best_delta)
6080 return false;
6083 iv_ca_delta_commit (data, ivs, best_delta, true);
6084 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6085 iv_ca_delta_free (&best_delta);
6086 return true;
6089 /* Attempts to find the optimal set of induction variables. We do simple
6090 greedy heuristic -- we try to replace at most one candidate in the selected
6091 solution and remove the unused ivs while this improves the cost. */
6093 static struct iv_ca *
6094 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6096 struct iv_ca *set;
6098 /* Get the initial solution. */
6099 set = get_initial_solution (data, originalp);
6100 if (!set)
6102 if (dump_file && (dump_flags & TDF_DETAILS))
6103 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6104 return NULL;
6107 if (dump_file && (dump_flags & TDF_DETAILS))
6109 fprintf (dump_file, "Initial set of candidates:\n");
6110 iv_ca_dump (data, dump_file, set);
6113 while (try_improve_iv_set (data, set))
6115 if (dump_file && (dump_flags & TDF_DETAILS))
6117 fprintf (dump_file, "Improved to:\n");
6118 iv_ca_dump (data, dump_file, set);
6122 return set;
6125 static struct iv_ca *
6126 find_optimal_iv_set (struct ivopts_data *data)
6128 unsigned i;
6129 struct iv_ca *set, *origset;
6130 struct iv_use *use;
6131 comp_cost cost, origcost;
6133 /* Determine the cost based on a strategy that starts with original IVs,
6134 and try again using a strategy that prefers candidates not based
6135 on any IVs. */
6136 origset = find_optimal_iv_set_1 (data, true);
6137 set = find_optimal_iv_set_1 (data, false);
6139 if (!origset && !set)
6140 return NULL;
6142 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6143 cost = set ? iv_ca_cost (set) : infinite_cost;
6145 if (dump_file && (dump_flags & TDF_DETAILS))
6147 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6148 origcost.cost, origcost.complexity);
6149 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6150 cost.cost, cost.complexity);
6153 /* Choose the one with the best cost. */
6154 if (compare_costs (origcost, cost) <= 0)
6156 if (set)
6157 iv_ca_free (&set);
6158 set = origset;
6160 else if (origset)
6161 iv_ca_free (&origset);
6163 for (i = 0; i < n_iv_uses (data); i++)
6165 use = iv_use (data, i);
6166 use->selected = iv_ca_cand_for_use (set, use)->cand;
6169 return set;
6172 /* Creates a new induction variable corresponding to CAND. */
6174 static void
6175 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6177 gimple_stmt_iterator incr_pos;
6178 tree base;
6179 bool after = false;
6181 if (!cand->iv)
6182 return;
6184 switch (cand->pos)
6186 case IP_NORMAL:
6187 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6188 break;
6190 case IP_END:
6191 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6192 after = true;
6193 break;
6195 case IP_AFTER_USE:
6196 after = true;
6197 /* fall through */
6198 case IP_BEFORE_USE:
6199 incr_pos = gsi_for_stmt (cand->incremented_at);
6200 break;
6202 case IP_ORIGINAL:
6203 /* Mark that the iv is preserved. */
6204 name_info (data, cand->var_before)->preserve_biv = true;
6205 name_info (data, cand->var_after)->preserve_biv = true;
6207 /* Rewrite the increment so that it uses var_before directly. */
6208 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6209 return;
6212 gimple_add_tmp_var (cand->var_before);
6214 base = unshare_expr (cand->iv->base);
6216 create_iv (base, unshare_expr (cand->iv->step),
6217 cand->var_before, data->current_loop,
6218 &incr_pos, after, &cand->var_before, &cand->var_after);
6221 /* Creates new induction variables described in SET. */
6223 static void
6224 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6226 unsigned i;
6227 struct iv_cand *cand;
6228 bitmap_iterator bi;
6230 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6232 cand = iv_cand (data, i);
6233 create_new_iv (data, cand);
6236 if (dump_file && (dump_flags & TDF_DETAILS))
6238 fprintf (dump_file, "\nSelected IV set: \n");
6239 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6241 cand = iv_cand (data, i);
6242 dump_cand (dump_file, cand);
6244 fprintf (dump_file, "\n");
6248 /* Rewrites USE (definition of iv used in a nonlinear expression)
6249 using candidate CAND. */
6251 static void
6252 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6253 struct iv_use *use, struct iv_cand *cand)
6255 tree comp;
6256 tree op, tgt;
6257 gimple ass;
6258 gimple_stmt_iterator bsi;
6260 /* An important special case -- if we are asked to express value of
6261 the original iv by itself, just exit; there is no need to
6262 introduce a new computation (that might also need casting the
6263 variable to unsigned and back). */
6264 if (cand->pos == IP_ORIGINAL
6265 && cand->incremented_at == use->stmt)
6267 enum tree_code stmt_code;
6269 gcc_assert (is_gimple_assign (use->stmt));
6270 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6272 /* Check whether we may leave the computation unchanged.
6273 This is the case only if it does not rely on other
6274 computations in the loop -- otherwise, the computation
6275 we rely upon may be removed in remove_unused_ivs,
6276 thus leading to ICE. */
6277 stmt_code = gimple_assign_rhs_code (use->stmt);
6278 if (stmt_code == PLUS_EXPR
6279 || stmt_code == MINUS_EXPR
6280 || stmt_code == POINTER_PLUS_EXPR)
6282 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6283 op = gimple_assign_rhs2 (use->stmt);
6284 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6285 op = gimple_assign_rhs1 (use->stmt);
6286 else
6287 op = NULL_TREE;
6289 else
6290 op = NULL_TREE;
6292 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6293 return;
6296 comp = get_computation (data->current_loop, use, cand);
6297 gcc_assert (comp != NULL_TREE);
6299 switch (gimple_code (use->stmt))
6301 case GIMPLE_PHI:
6302 tgt = PHI_RESULT (use->stmt);
6304 /* If we should keep the biv, do not replace it. */
6305 if (name_info (data, tgt)->preserve_biv)
6306 return;
6308 bsi = gsi_after_labels (gimple_bb (use->stmt));
6309 break;
6311 case GIMPLE_ASSIGN:
6312 tgt = gimple_assign_lhs (use->stmt);
6313 bsi = gsi_for_stmt (use->stmt);
6314 break;
6316 default:
6317 gcc_unreachable ();
6320 if (!valid_gimple_rhs_p (comp)
6321 || (gimple_code (use->stmt) != GIMPLE_PHI
6322 /* We can't allow re-allocating the stmt as it might be pointed
6323 to still. */
6324 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6325 >= gimple_num_ops (gsi_stmt (bsi)))))
6327 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6328 true, GSI_SAME_STMT);
6329 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6331 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6332 /* As this isn't a plain copy we have to reset alignment
6333 information. */
6334 if (SSA_NAME_PTR_INFO (comp))
6335 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6339 if (gimple_code (use->stmt) == GIMPLE_PHI)
6341 ass = gimple_build_assign (tgt, comp);
6342 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6344 bsi = gsi_for_stmt (use->stmt);
6345 remove_phi_node (&bsi, false);
6347 else
6349 gimple_assign_set_rhs_from_tree (&bsi, comp);
6350 use->stmt = gsi_stmt (bsi);
6354 /* Performs a peephole optimization to reorder the iv update statement with
6355 a mem ref to enable instruction combining in later phases. The mem ref uses
6356 the iv value before the update, so the reordering transformation requires
6357 adjustment of the offset. CAND is the selected IV_CAND.
6359 Example:
6361 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6362 iv2 = iv1 + 1;
6364 if (t < val) (1)
6365 goto L;
6366 goto Head;
6369 directly propagating t over to (1) will introduce overlapping live range
6370 thus increase register pressure. This peephole transform it into:
6373 iv2 = iv1 + 1;
6374 t = MEM_REF (base, iv2, 8, 8);
6375 if (t < val)
6376 goto L;
6377 goto Head;
6380 static void
6381 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6383 tree var_after;
6384 gimple iv_update, stmt;
6385 basic_block bb;
6386 gimple_stmt_iterator gsi, gsi_iv;
6388 if (cand->pos != IP_NORMAL)
6389 return;
6391 var_after = cand->var_after;
6392 iv_update = SSA_NAME_DEF_STMT (var_after);
6394 bb = gimple_bb (iv_update);
6395 gsi = gsi_last_nondebug_bb (bb);
6396 stmt = gsi_stmt (gsi);
6398 /* Only handle conditional statement for now. */
6399 if (gimple_code (stmt) != GIMPLE_COND)
6400 return;
6402 gsi_prev_nondebug (&gsi);
6403 stmt = gsi_stmt (gsi);
6404 if (stmt != iv_update)
6405 return;
6407 gsi_prev_nondebug (&gsi);
6408 if (gsi_end_p (gsi))
6409 return;
6411 stmt = gsi_stmt (gsi);
6412 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6413 return;
6415 if (stmt != use->stmt)
6416 return;
6418 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6419 return;
6421 if (dump_file && (dump_flags & TDF_DETAILS))
6423 fprintf (dump_file, "Reordering \n");
6424 print_gimple_stmt (dump_file, iv_update, 0, 0);
6425 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6426 fprintf (dump_file, "\n");
6429 gsi = gsi_for_stmt (use->stmt);
6430 gsi_iv = gsi_for_stmt (iv_update);
6431 gsi_move_before (&gsi_iv, &gsi);
6433 cand->pos = IP_BEFORE_USE;
6434 cand->incremented_at = use->stmt;
6437 /* Rewrites USE (address that is an iv) using candidate CAND. */
6439 static void
6440 rewrite_use_address (struct ivopts_data *data,
6441 struct iv_use *use, struct iv_cand *cand)
6443 aff_tree aff;
6444 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6445 tree base_hint = NULL_TREE;
6446 tree ref, iv;
6447 bool ok;
6449 adjust_iv_update_pos (cand, use);
6450 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6451 gcc_assert (ok);
6452 unshare_aff_combination (&aff);
6454 /* To avoid undefined overflow problems, all IV candidates use unsigned
6455 integer types. The drawback is that this makes it impossible for
6456 create_mem_ref to distinguish an IV that is based on a memory object
6457 from one that represents simply an offset.
6459 To work around this problem, we pass a hint to create_mem_ref that
6460 indicates which variable (if any) in aff is an IV based on a memory
6461 object. Note that we only consider the candidate. If this is not
6462 based on an object, the base of the reference is in some subexpression
6463 of the use -- but these will use pointer types, so they are recognized
6464 by the create_mem_ref heuristics anyway. */
6465 if (cand->iv->base_object)
6466 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6468 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6469 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6470 reference_alias_ptr_type (*use->op_p),
6471 iv, base_hint, data->speed);
6472 copy_ref_info (ref, *use->op_p);
6473 *use->op_p = ref;
6476 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6477 candidate CAND. */
6479 static void
6480 rewrite_use_compare (struct ivopts_data *data,
6481 struct iv_use *use, struct iv_cand *cand)
6483 tree comp, *var_p, op, bound;
6484 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6485 enum tree_code compare;
6486 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6487 bool ok;
6489 bound = cp->value;
6490 if (bound)
6492 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6493 tree var_type = TREE_TYPE (var);
6494 gimple_seq stmts;
6496 if (dump_file && (dump_flags & TDF_DETAILS))
6498 fprintf (dump_file, "Replacing exit test: ");
6499 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6501 compare = cp->comp;
6502 bound = unshare_expr (fold_convert (var_type, bound));
6503 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6504 if (stmts)
6505 gsi_insert_seq_on_edge_immediate (
6506 loop_preheader_edge (data->current_loop),
6507 stmts);
6509 gimple_cond_set_lhs (use->stmt, var);
6510 gimple_cond_set_code (use->stmt, compare);
6511 gimple_cond_set_rhs (use->stmt, op);
6512 return;
6515 /* The induction variable elimination failed; just express the original
6516 giv. */
6517 comp = get_computation (data->current_loop, use, cand);
6518 gcc_assert (comp != NULL_TREE);
6520 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6521 gcc_assert (ok);
6523 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6524 true, GSI_SAME_STMT);
6527 /* Rewrites USE using candidate CAND. */
6529 static void
6530 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6532 switch (use->type)
6534 case USE_NONLINEAR_EXPR:
6535 rewrite_use_nonlinear_expr (data, use, cand);
6536 break;
6538 case USE_ADDRESS:
6539 rewrite_use_address (data, use, cand);
6540 break;
6542 case USE_COMPARE:
6543 rewrite_use_compare (data, use, cand);
6544 break;
6546 default:
6547 gcc_unreachable ();
6550 update_stmt (use->stmt);
6553 /* Rewrite the uses using the selected induction variables. */
6555 static void
6556 rewrite_uses (struct ivopts_data *data)
6558 unsigned i;
6559 struct iv_cand *cand;
6560 struct iv_use *use;
6562 for (i = 0; i < n_iv_uses (data); i++)
6564 use = iv_use (data, i);
6565 cand = use->selected;
6566 gcc_assert (cand);
6568 rewrite_use (data, use, cand);
6572 /* Removes the ivs that are not used after rewriting. */
6574 static void
6575 remove_unused_ivs (struct ivopts_data *data)
6577 unsigned j;
6578 bitmap_iterator bi;
6579 bitmap toremove = BITMAP_ALLOC (NULL);
6581 /* Figure out an order in which to release SSA DEFs so that we don't
6582 release something that we'd have to propagate into a debug stmt
6583 afterwards. */
6584 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6586 struct version_info *info;
6588 info = ver_info (data, j);
6589 if (info->iv
6590 && !integer_zerop (info->iv->step)
6591 && !info->inv_id
6592 && !info->iv->have_use_for
6593 && !info->preserve_biv)
6595 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6597 tree def = info->iv->ssa_name;
6599 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6601 imm_use_iterator imm_iter;
6602 use_operand_p use_p;
6603 gimple stmt;
6604 int count = 0;
6606 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6608 if (!gimple_debug_bind_p (stmt))
6609 continue;
6611 /* We just want to determine whether to do nothing
6612 (count == 0), to substitute the computed
6613 expression into a single use of the SSA DEF by
6614 itself (count == 1), or to use a debug temp
6615 because the SSA DEF is used multiple times or as
6616 part of a larger expression (count > 1). */
6617 count++;
6618 if (gimple_debug_bind_get_value (stmt) != def)
6619 count++;
6621 if (count > 1)
6622 BREAK_FROM_IMM_USE_STMT (imm_iter);
6625 if (!count)
6626 continue;
6628 struct iv_use dummy_use;
6629 struct iv_cand *best_cand = NULL, *cand;
6630 unsigned i, best_pref = 0, cand_pref;
6632 memset (&dummy_use, 0, sizeof (dummy_use));
6633 dummy_use.iv = info->iv;
6634 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6636 cand = iv_use (data, i)->selected;
6637 if (cand == best_cand)
6638 continue;
6639 cand_pref = operand_equal_p (cand->iv->step,
6640 info->iv->step, 0)
6641 ? 4 : 0;
6642 cand_pref
6643 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6644 == TYPE_MODE (TREE_TYPE (info->iv->base))
6645 ? 2 : 0;
6646 cand_pref
6647 += TREE_CODE (cand->iv->base) == INTEGER_CST
6648 ? 1 : 0;
6649 if (best_cand == NULL || best_pref < cand_pref)
6651 best_cand = cand;
6652 best_pref = cand_pref;
6656 if (!best_cand)
6657 continue;
6659 tree comp = get_computation_at (data->current_loop,
6660 &dummy_use, best_cand,
6661 SSA_NAME_DEF_STMT (def));
6662 if (!comp)
6663 continue;
6665 if (count > 1)
6667 tree vexpr = make_node (DEBUG_EXPR_DECL);
6668 DECL_ARTIFICIAL (vexpr) = 1;
6669 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6670 if (SSA_NAME_VAR (def))
6671 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6672 else
6673 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6674 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6675 gimple_stmt_iterator gsi;
6677 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6678 gsi = gsi_after_labels (gimple_bb
6679 (SSA_NAME_DEF_STMT (def)));
6680 else
6681 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6683 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6684 comp = vexpr;
6687 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6689 if (!gimple_debug_bind_p (stmt))
6690 continue;
6692 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6693 SET_USE (use_p, comp);
6695 update_stmt (stmt);
6701 release_defs_bitset (toremove);
6703 BITMAP_FREE (toremove);
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707 for pointer_map_traverse. */
6709 static bool
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6711 void *data ATTRIBUTE_UNUSED)
6713 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6715 free (niter);
6716 return true;
6719 /* Frees data allocated by the optimization of a single loop. */
6721 static void
6722 free_loop_data (struct ivopts_data *data)
6724 unsigned i, j;
6725 bitmap_iterator bi;
6726 tree obj;
6728 if (data->niters)
6730 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6731 pointer_map_destroy (data->niters);
6732 data->niters = NULL;
6735 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6737 struct version_info *info;
6739 info = ver_info (data, i);
6740 free (info->iv);
6741 info->iv = NULL;
6742 info->has_nonlin_use = false;
6743 info->preserve_biv = false;
6744 info->inv_id = 0;
6746 bitmap_clear (data->relevant);
6747 bitmap_clear (data->important_candidates);
6749 for (i = 0; i < n_iv_uses (data); i++)
6751 struct iv_use *use = iv_use (data, i);
6753 free (use->iv);
6754 BITMAP_FREE (use->related_cands);
6755 for (j = 0; j < use->n_map_members; j++)
6756 if (use->cost_map[j].depends_on)
6757 BITMAP_FREE (use->cost_map[j].depends_on);
6758 free (use->cost_map);
6759 free (use);
6761 data->iv_uses.truncate (0);
6763 for (i = 0; i < n_iv_cands (data); i++)
6765 struct iv_cand *cand = iv_cand (data, i);
6767 free (cand->iv);
6768 if (cand->depends_on)
6769 BITMAP_FREE (cand->depends_on);
6770 free (cand);
6772 data->iv_candidates.truncate (0);
6774 if (data->version_info_size < num_ssa_names)
6776 data->version_info_size = 2 * num_ssa_names;
6777 free (data->version_info);
6778 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6781 data->max_inv_id = 0;
6783 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6784 SET_DECL_RTL (obj, NULL_RTX);
6786 decl_rtl_to_reset.truncate (0);
6788 data->inv_expr_tab.empty ();
6789 data->inv_expr_id = 0;
6792 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6793 loop tree. */
6795 static void
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6798 free_loop_data (data);
6799 free (data->version_info);
6800 BITMAP_FREE (data->relevant);
6801 BITMAP_FREE (data->important_candidates);
6803 decl_rtl_to_reset.release ();
6804 data->iv_uses.release ();
6805 data->iv_candidates.release ();
6806 data->inv_expr_tab.dispose ();
6809 /* Returns true if the loop body BODY includes any function calls. */
6811 static bool
6812 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6814 gimple_stmt_iterator gsi;
6815 unsigned i;
6817 for (i = 0; i < num_nodes; i++)
6818 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6820 gimple stmt = gsi_stmt (gsi);
6821 if (is_gimple_call (stmt)
6822 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6823 return true;
6825 return false;
6828 /* Optimizes the LOOP. Returns true if anything changed. */
6830 static bool
6831 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6833 bool changed = false;
6834 struct iv_ca *iv_ca;
6835 edge exit = single_dom_exit (loop);
6836 basic_block *body;
6838 gcc_assert (!data->niters);
6839 data->current_loop = loop;
6840 data->speed = optimize_loop_for_speed_p (loop);
6842 if (dump_file && (dump_flags & TDF_DETAILS))
6844 fprintf (dump_file, "Processing loop %d\n", loop->num);
6846 if (exit)
6848 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6849 exit->src->index, exit->dest->index);
6850 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6851 fprintf (dump_file, "\n");
6854 fprintf (dump_file, "\n");
6857 body = get_loop_body (loop);
6858 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6859 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6860 free (body);
6862 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6864 /* For each ssa name determines whether it behaves as an induction variable
6865 in some loop. */
6866 if (!find_induction_variables (data))
6867 goto finish;
6869 /* Finds interesting uses (item 1). */
6870 find_interesting_uses (data);
6871 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6872 goto finish;
6874 /* Finds candidates for the induction variables (item 2). */
6875 find_iv_candidates (data);
6877 /* Calculates the costs (item 3, part 1). */
6878 determine_iv_costs (data);
6879 determine_use_iv_costs (data);
6880 determine_set_costs (data);
6882 /* Find the optimal set of induction variables (item 3, part 2). */
6883 iv_ca = find_optimal_iv_set (data);
6884 if (!iv_ca)
6885 goto finish;
6886 changed = true;
6888 /* Create the new induction variables (item 4, part 1). */
6889 create_new_ivs (data, iv_ca);
6890 iv_ca_free (&iv_ca);
6892 /* Rewrite the uses (item 4, part 2). */
6893 rewrite_uses (data);
6895 /* Remove the ivs that are unused after rewriting. */
6896 remove_unused_ivs (data);
6898 /* We have changed the structure of induction variables; it might happen
6899 that definitions in the scev database refer to some of them that were
6900 eliminated. */
6901 scev_reset ();
6903 finish:
6904 free_loop_data (data);
6906 return changed;
6909 /* Main entry point. Optimizes induction variables in loops. */
6911 void
6912 tree_ssa_iv_optimize (void)
6914 struct loop *loop;
6915 struct ivopts_data data;
6917 tree_ssa_iv_optimize_init (&data);
6919 /* Optimize the loops starting with the innermost ones. */
6920 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6922 if (dump_file && (dump_flags & TDF_DETAILS))
6923 flow_loop_dump (loop, dump_file, NULL, 1);
6925 tree_ssa_iv_optimize_loop (&data, loop);
6928 tree_ssa_iv_optimize_finalize (&data);