1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
72 #include "tree-pass.h"
76 #include "insn-config.h"
80 #include "gimple-pretty-print.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
98 #include "tree-scalar-evolution.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop
*loop
)
122 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
124 return AVG_LOOP_NITER (loop
);
129 /* Representation of the induction variable. */
132 tree base
; /* Initial value of the iv. */
133 tree base_object
; /* A memory object to that the induction variable points. */
134 tree step
; /* Step of the iv (constant only). */
135 tree ssa_name
; /* The ssa name with the value. */
136 unsigned use_id
; /* The identifier in the use if it is the case. */
137 bool biv_p
; /* Is it a biv? */
138 bool have_use_for
; /* Do we already have a use for it? */
139 bool no_overflow
; /* True if the iv doesn't overflow. */
140 bool have_address_use
;/* For biv, indicate if it's used in any address
144 /* Per-ssa version information (induction variable descriptions, etc.). */
147 tree name
; /* The ssa name. */
148 struct iv
*iv
; /* Induction variable description. */
149 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv
; /* For the original biv, whether to preserve it. */
152 unsigned inv_id
; /* Id of an invariant. */
158 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
159 USE_ADDRESS
, /* Use in an address. */
160 USE_COMPARE
/* Use is a compare. */
163 /* Cost of a computation. */
166 int cost
; /* The runtime cost. */
167 unsigned complexity
; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost
= {0, 0};
174 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
176 /* The candidate - cost pair. */
179 struct iv_cand
*cand
; /* The candidate. */
180 comp_cost cost
; /* The cost. */
181 bitmap depends_on
; /* The list of invariants that have to be
183 tree value
; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp
; /* For iv elimination, the comparison. */
187 int inv_expr_id
; /* Loop invariant expression id. */
193 unsigned id
; /* The id of the use. */
194 unsigned sub_id
; /* The id of the sub use. */
195 enum use_type type
; /* Type of the use. */
196 struct iv
*iv
; /* The induction variable it is based on. */
197 gimple
*stmt
; /* Statement in that it occurs. */
198 tree
*op_p
; /* The place where it occurs. */
199 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
202 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
203 struct cost_pair
*cost_map
;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand
*selected
;
207 /* The selected candidate. */
209 struct iv_use
*next
; /* The next sub use. */
210 tree addr_base
; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset
;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
218 IP_NORMAL
, /* At the end, just before the exit condition. */
219 IP_END
, /* At the end of the latch block. */
220 IP_BEFORE_USE
, /* Immediately before a specific use. */
221 IP_AFTER_USE
, /* Immediately after a specific use. */
222 IP_ORIGINAL
/* The original biv. */
225 /* The induction variable candidate. */
228 unsigned id
; /* The number of the candidate. */
229 bool important
; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
232 gimple
*incremented_at
;/* For original biv, the statement where it is
234 tree var_before
; /* The variable used for it before increment. */
235 tree var_after
; /* The variable used for it after increment. */
236 struct iv
*iv
; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost
; /* Cost of the candidate. */
241 unsigned cost_step
; /* Cost of the candidate's increment operation. */
242 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on
; /* The list of invariants that are used in step of the
246 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
255 /* IV uses from which this common candidate is derived. */
256 auto_vec
<iv_use
*> uses
;
260 /* Hashtable helpers. */
262 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
264 static inline hashval_t
hash (const iv_common_cand
*);
265 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
268 /* Hash function for possible common candidates. */
271 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
276 /* Hash table equality function for common candidates. */
279 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
280 const iv_common_cand
*ccand2
)
282 return (ccand1
->hash
== ccand2
->hash
283 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
284 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
286 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
301 static inline hashval_t
hash (const iv_inv_expr_ent
*);
302 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
305 /* Hash function for loop invariant expressions. */
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
313 /* Hash table equality function for expressions. */
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
317 const iv_inv_expr_ent
*expr2
)
319 return expr1
->hash
== expr2
->hash
320 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
325 /* The currently optimized loop. */
326 struct loop
*current_loop
;
327 source_location loop_loc
;
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map
<edge
, tree_niter_desc
*> *niters
;
332 /* Number of registers used in it. */
335 /* The size of version_info array allocated. */
336 unsigned version_info_size
;
338 /* The array of information for the ssa names. */
339 struct version_info
*version_info
;
341 /* The hashtable of loop invariant expressions created
343 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
345 /* Loop invariant expression id. */
348 /* The bitmap of indices in version_info whose value was changed. */
351 /* The uses of induction variables. */
352 vec
<iv_use
*> iv_uses
;
354 /* The candidates. */
355 vec
<iv_cand
*> iv_candidates
;
357 /* A bitmap of important candidates. */
358 bitmap important_candidates
;
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
366 /* The common candidates. */
367 vec
<iv_common_cand
*> iv_common_cands
;
369 /* The maximum invariant id. */
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr
;
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack
;
378 /* Whether to consider just related and important candidates when replacing a
380 bool consider_all_candidates
;
382 /* Are we optimizing for speed? */
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call
;
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p
;
392 /* An assignment of iv candidates to uses. */
396 /* The number of uses covered by the assignment. */
399 /* Number of uses that cannot be expressed by the candidates in the set. */
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair
**cand_for_use
;
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses
;
408 /* The candidates used. */
411 /* The number of candidates in the set. */
414 /* Total number of registers needed. */
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost
;
420 /* Total cost of candidates. */
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses
;
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr
;
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr
;
433 /* Total cost of the assignment. */
437 /* Difference of two iv candidate assignments. */
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair
*old_cp
;
447 /* A new assignment. */
448 struct cost_pair
*new_cp
;
450 /* Next change in the list. */
451 struct iv_ca_delta
*next_change
;
454 /* Bound on number of candidates below that all candidates are considered. */
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471 /* The list of trees for that the decl_rtl field must be reset is stored
474 static vec
<tree
> decl_rtl_to_reset
;
476 static comp_cost
force_expr_to_var_cost (tree
, bool);
478 /* Number of uses recorded in DATA. */
480 static inline unsigned
481 n_iv_uses (struct ivopts_data
*data
)
483 return data
->iv_uses
.length ();
486 /* Ith use recorded in DATA. */
488 static inline struct iv_use
*
489 iv_use (struct ivopts_data
*data
, unsigned i
)
491 return data
->iv_uses
[i
];
494 /* Number of candidates recorded in DATA. */
496 static inline unsigned
497 n_iv_cands (struct ivopts_data
*data
)
499 return data
->iv_candidates
.length ();
502 /* Ith candidate recorded in DATA. */
504 static inline struct iv_cand
*
505 iv_cand (struct ivopts_data
*data
, unsigned i
)
507 return data
->iv_candidates
[i
];
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
513 single_dom_exit (struct loop
*loop
)
515 edge exit
= single_exit (loop
);
520 if (!just_once_each_iteration_p (loop
, exit
->src
))
526 /* Dumps information about the induction variable IV to FILE. */
529 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
531 if (iv
->ssa_name
&& dump_name
)
533 fprintf (file
, "ssa name ");
534 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
535 fprintf (file
, "\n");
538 fprintf (file
, " type ");
539 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
540 fprintf (file
, "\n");
544 fprintf (file
, " base ");
545 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
546 fprintf (file
, "\n");
548 fprintf (file
, " step ");
549 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
550 fprintf (file
, "\n");
554 fprintf (file
, " invariant ");
555 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
556 fprintf (file
, "\n");
561 fprintf (file
, " base object ");
562 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
563 fprintf (file
, "\n");
567 fprintf (file
, " is a biv\n");
570 fprintf (file
, " iv doesn't overflow wrto loop niter\n");
573 /* Dumps information about the USE to FILE. */
576 dump_use (FILE *file
, struct iv_use
*use
)
578 fprintf (file
, "use %d", use
->id
);
580 fprintf (file
, ".%d", use
->sub_id
);
582 fprintf (file
, "\n");
586 case USE_NONLINEAR_EXPR
:
587 fprintf (file
, " generic\n");
591 fprintf (file
, " address\n");
595 fprintf (file
, " compare\n");
602 fprintf (file
, " in statement ");
603 print_gimple_stmt (file
, use
->stmt
, 0, 0);
604 fprintf (file
, "\n");
606 fprintf (file
, " at position ");
608 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
609 fprintf (file
, "\n");
611 dump_iv (file
, use
->iv
, false);
613 if (use
->related_cands
)
615 fprintf (file
, " related candidates ");
616 dump_bitmap (file
, use
->related_cands
);
620 /* Dumps information about the uses to FILE. */
623 dump_uses (FILE *file
, struct ivopts_data
*data
)
628 for (i
= 0; i
< n_iv_uses (data
); i
++)
630 use
= iv_use (data
, i
);
633 dump_use (file
, use
);
637 fprintf (file
, "\n");
641 /* Dumps information about induction variable candidate CAND to FILE. */
644 dump_cand (FILE *file
, struct iv_cand
*cand
)
646 struct iv
*iv
= cand
->iv
;
648 fprintf (file
, "candidate %d%s\n",
649 cand
->id
, cand
->important
? " (important)" : "");
651 if (cand
->depends_on
)
653 fprintf (file
, " depends on ");
654 dump_bitmap (file
, cand
->depends_on
);
659 fprintf (file
, " final value replacement\n");
663 if (cand
->var_before
)
665 fprintf (file
, " var_before ");
666 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
667 fprintf (file
, "\n");
671 fprintf (file
, " var_after ");
672 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
673 fprintf (file
, "\n");
679 fprintf (file
, " incremented before exit test\n");
683 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
687 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
691 fprintf (file
, " incremented at end\n");
695 fprintf (file
, " original biv\n");
699 dump_iv (file
, iv
, false);
702 /* Returns the info for ssa version VER. */
704 static inline struct version_info
*
705 ver_info (struct ivopts_data
*data
, unsigned ver
)
707 return data
->version_info
+ ver
;
710 /* Returns the info for ssa name NAME. */
712 static inline struct version_info
*
713 name_info (struct ivopts_data
*data
, tree name
)
715 return ver_info (data
, SSA_NAME_VERSION (name
));
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
722 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
724 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
728 if (sbb
== loop
->latch
)
734 return stmt
== last_stmt (bb
);
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
742 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
744 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
745 basic_block stmt_bb
= gimple_bb (stmt
);
747 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
750 if (stmt_bb
!= cand_bb
)
754 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
756 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
763 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
771 return stmt_after_ip_normal_pos (loop
, stmt
);
775 return stmt_after_inc_pos (cand
, stmt
, false);
778 return stmt_after_inc_pos (cand
, stmt
, true);
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
788 abnormal_ssa_name_p (tree exp
)
793 if (TREE_CODE (exp
) != SSA_NAME
)
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
803 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
804 void *data ATTRIBUTE_UNUSED
)
806 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
808 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
810 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
814 return !abnormal_ssa_name_p (*index
);
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
821 contains_abnormal_ssa_name_p (tree expr
)
824 enum tree_code_class codeclass
;
829 code
= TREE_CODE (expr
);
830 codeclass
= TREE_CODE_CLASS (code
);
832 if (code
== SSA_NAME
)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
835 if (code
== INTEGER_CST
836 || is_gimple_min_invariant (expr
))
839 if (code
== ADDR_EXPR
)
840 return !for_each_index (&TREE_OPERAND (expr
, 0),
841 idx_contains_abnormal_ssa_name_p
,
844 if (code
== COND_EXPR
)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
873 static struct tree_niter_desc
*
874 niter_for_exit (struct ivopts_data
*data
, edge exit
)
876 struct tree_niter_desc
*desc
;
877 tree_niter_desc
**slot
;
881 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
885 slot
= data
->niters
->get (exit
);
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc
= XNEW (struct tree_niter_desc
);
893 if (!number_of_iterations_exit (data
->current_loop
,
895 || contains_abnormal_ssa_name_p (desc
->niter
))
900 data
->niters
->put (exit
, desc
);
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
912 static struct tree_niter_desc
*
913 niter_for_single_dom_exit (struct ivopts_data
*data
)
915 edge exit
= single_dom_exit (data
->current_loop
);
920 return niter_for_exit (data
, exit
);
923 /* Initializes data structures used by the iv optimization pass, stored
927 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
929 data
->version_info_size
= 2 * num_ssa_names
;
930 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
931 data
->relevant
= BITMAP_ALLOC (NULL
);
932 data
->important_candidates
= BITMAP_ALLOC (NULL
);
933 data
->max_inv_id
= 0;
935 data
->iv_uses
.create (20);
936 data
->iv_candidates
.create (20);
937 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
938 data
->inv_expr_id
= 0;
939 data
->name_expansion_cache
= NULL
;
940 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
941 data
->iv_common_cands
.create (20);
942 decl_rtl_to_reset
.create (20);
943 gcc_obstack_init (&data
->iv_obstack
);
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
950 determine_base_object (tree expr
)
952 enum tree_code code
= TREE_CODE (expr
);
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr
))
959 return determine_base_object (TREE_OPERAND (expr
, 0));
961 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
970 obj
= TREE_OPERAND (expr
, 0);
971 base
= get_base_address (obj
);
976 if (TREE_CODE (base
) == MEM_REF
)
977 return determine_base_object (TREE_OPERAND (base
, 0));
979 return fold_convert (ptr_type_node
,
980 build_fold_addr_expr (base
));
982 case POINTER_PLUS_EXPR
:
983 return determine_base_object (TREE_OPERAND (expr
, 0));
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
991 return fold_convert (ptr_type_node
, expr
);
995 /* Return true if address expression with non-DECL_P operand appears
999 contain_complex_addr_expr (tree expr
)
1004 switch (TREE_CODE (expr
))
1006 case POINTER_PLUS_EXPR
:
1009 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1010 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1014 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1027 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1028 bool no_overflow
= false)
1031 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1032 sizeof (struct iv
));
1033 gcc_assert (step
!= NULL_TREE
);
1035 /* Lower address expression in base except ones with DECL_P as operand.
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1041 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1042 || contain_complex_addr_expr (expr
))
1045 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1046 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1050 iv
->base_object
= determine_base_object (base
);
1053 iv
->have_use_for
= false;
1055 iv
->ssa_name
= NULL_TREE
;
1056 iv
->no_overflow
= no_overflow
;
1057 iv
->have_address_use
= false;
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1066 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1069 struct version_info
*info
= name_info (data
, iv
);
1071 gcc_assert (!info
->iv
);
1073 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1074 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1075 info
->iv
->ssa_name
= iv
;
1078 /* Finds induction variable declaration for VAR. */
1081 get_iv (struct ivopts_data
*data
, tree var
)
1084 tree type
= TREE_TYPE (var
);
1086 if (!POINTER_TYPE_P (type
)
1087 && !INTEGRAL_TYPE_P (type
))
1090 if (!name_info (data
, var
)->iv
)
1092 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1095 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1096 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1099 return name_info (data
, var
)->iv
;
1102 /* Return the first non-invariant ssa var found in EXPR. */
1105 extract_single_var_from_expr (tree expr
)
1109 enum tree_code code
;
1111 if (!expr
|| is_gimple_min_invariant (expr
))
1114 code
= TREE_CODE (expr
);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1117 n
= TREE_OPERAND_LENGTH (expr
);
1118 for (i
= 0; i
< n
; i
++)
1120 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1126 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1129 /* Finds basic ivs. */
1132 find_bivs (struct ivopts_data
*data
)
1136 tree step
, type
, base
, stop
;
1138 struct loop
*loop
= data
->current_loop
;
1141 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1148 if (virtual_operand_p (PHI_RESULT (phi
)))
1151 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1154 if (integer_zerop (iv
.step
))
1158 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1163 See PR64705 for the rationale. */
1164 stop
= extract_single_var_from_expr (step
);
1165 base
= expand_simple_operations (base
, stop
);
1166 if (contains_abnormal_ssa_name_p (base
)
1167 || contains_abnormal_ssa_name_p (step
))
1170 type
= TREE_TYPE (PHI_RESULT (phi
));
1171 base
= fold_convert (type
, base
);
1174 if (POINTER_TYPE_P (type
))
1175 step
= convert_to_ptrofftype (step
);
1177 step
= fold_convert (type
, step
);
1180 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1187 /* Marks basic ivs. */
1190 mark_bivs (struct ivopts_data
*data
)
1195 struct iv
*iv
, *incr_iv
;
1196 struct loop
*loop
= data
->current_loop
;
1197 basic_block incr_bb
;
1200 data
->bivs_not_used_in_addr
= 0;
1201 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1205 iv
= get_iv (data
, PHI_RESULT (phi
));
1209 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1210 def
= SSA_NAME_DEF_STMT (var
);
1211 /* Don't mark iv peeled from other one as biv. */
1213 && gimple_code (def
) == GIMPLE_PHI
1214 && gimple_bb (def
) == loop
->header
)
1217 incr_iv
= get_iv (data
, var
);
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1223 if (incr_bb
->loop_father
!= data
->current_loop
1224 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1228 incr_iv
->biv_p
= true;
1229 if (iv
->no_overflow
)
1230 data
->bivs_not_used_in_addr
++;
1231 if (incr_iv
->no_overflow
)
1232 data
->bivs_not_used_in_addr
++;
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1240 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1243 struct loop
*loop
= data
->current_loop
;
1245 iv
->base
= NULL_TREE
;
1246 iv
->step
= NULL_TREE
;
1248 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1251 lhs
= gimple_assign_lhs (stmt
);
1252 if (TREE_CODE (lhs
) != SSA_NAME
)
1255 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1262 See PR64705 for the rationale. */
1263 stop
= extract_single_var_from_expr (iv
->step
);
1264 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1265 if (contains_abnormal_ssa_name_p (iv
->base
)
1266 || contains_abnormal_ssa_name_p (iv
->step
))
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt
))
1278 /* Finds general ivs in statement STMT. */
1281 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1285 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1288 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1291 /* Finds general ivs in basic block BB. */
1294 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1296 gimple_stmt_iterator bsi
;
1298 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1299 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1302 /* Finds general ivs. */
1305 find_givs (struct ivopts_data
*data
)
1307 struct loop
*loop
= data
->current_loop
;
1308 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1311 for (i
= 0; i
< loop
->num_nodes
; i
++)
1312 find_givs_in_bb (data
, body
[i
]);
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1320 find_induction_variables (struct ivopts_data
*data
)
1325 if (!find_bivs (data
))
1331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1333 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1337 fprintf (dump_file
, " number of iterations ");
1338 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1339 if (!integer_zerop (niter
->may_be_zero
))
1341 fprintf (dump_file
, "; zero if ");
1342 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1344 fprintf (dump_file
, "\n\n");
1347 fprintf (dump_file
, "Induction variables:\n\n");
1349 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1351 if (ver_info (data
, i
)->iv
)
1352 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1364 static struct iv_use
*
1365 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1366 gimple
*stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1367 unsigned HOST_WIDE_INT addr_offset
= 0)
1369 struct iv_use
*use
= XCNEW (struct iv_use
);
1371 use
->id
= n_iv_uses (data
);
1373 use
->type
= use_type
;
1377 use
->related_cands
= BITMAP_ALLOC (NULL
);
1379 use
->addr_base
= addr_base
;
1380 use
->addr_offset
= addr_offset
;
1382 data
->iv_uses
.safe_push (use
);
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1390 static struct iv_use
*
1391 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1392 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
,
1393 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1394 unsigned int id_group
)
1396 struct iv_use
*use
= XCNEW (struct iv_use
);
1397 struct iv_use
*group
= iv_use (data
, id_group
);
1399 use
->id
= group
->id
;
1401 use
->type
= use_type
;
1405 use
->related_cands
= NULL
;
1406 use
->addr_base
= addr_base
;
1407 use
->addr_offset
= addr_offset
;
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset
<= group
->addr_offset
)
1412 use
->related_cands
= group
->related_cands
;
1413 group
->related_cands
= NULL
;
1415 data
->iv_uses
[id_group
] = use
;
1423 group
= group
->next
;
1425 while (group
&& addr_offset
> group
->addr_offset
);
1426 use
->next
= pre
->next
;
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1438 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1441 struct version_info
*info
;
1443 if (TREE_CODE (op
) != SSA_NAME
1444 || virtual_operand_p (op
))
1447 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1449 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1452 info
= name_info (data
, op
);
1454 info
->has_nonlin_use
|= nonlinear_use
;
1456 info
->inv_id
= ++data
->max_inv_id
;
1457 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1460 /* Checks whether the use OP is interesting and if so, records it. */
1462 static struct iv_use
*
1463 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1469 if (TREE_CODE (op
) != SSA_NAME
)
1472 iv
= get_iv (data
, op
);
1476 if (iv
->have_use_for
)
1478 use
= iv_use (data
, iv
->use_id
);
1480 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1484 if (integer_zerop (iv
->step
))
1486 record_invariant (data
, op
, true);
1489 iv
->have_use_for
= true;
1491 stmt
= SSA_NAME_DEF_STMT (op
);
1492 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1493 || is_gimple_assign (stmt
));
1495 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1496 iv
->use_id
= use
->id
;
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1510 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1511 tree
**control_var
, tree
**bound
,
1512 struct iv
**iv_var
, struct iv
**iv_bound
)
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv
;
1517 tree
*op0
= &zero
, *op1
= &zero
;
1518 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1521 if (gimple_code (stmt
) == GIMPLE_COND
)
1523 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1524 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1525 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1529 op0
= gimple_assign_rhs1_ptr (stmt
);
1530 op1
= gimple_assign_rhs2_ptr (stmt
);
1533 zero
= integer_zero_node
;
1534 const_iv
.step
= integer_zero_node
;
1536 if (TREE_CODE (*op0
) == SSA_NAME
)
1537 iv0
= get_iv (data
, *op0
);
1538 if (TREE_CODE (*op1
) == SSA_NAME
)
1539 iv1
= get_iv (data
, *op1
);
1541 /* Exactly one of the compared values must be an iv, and the other one must
1546 if (integer_zerop (iv0
->step
))
1548 /* Control variable may be on the other side. */
1549 std::swap (op0
, op1
);
1550 std::swap (iv0
, iv1
);
1552 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1567 /* Checks whether the condition in STMT is interesting and if so,
1571 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1573 tree
*var_p
, *bound_p
;
1576 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1578 find_interesting_uses_op (data
, *var_p
);
1579 find_interesting_uses_op (data
, *bound_p
);
1583 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1592 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1597 if (is_gimple_min_invariant (expr
))
1598 return current_loops
->tree_root
;
1600 if (TREE_CODE (expr
) == SSA_NAME
)
1602 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1605 if (flow_bb_inside_loop_p (loop
, def_bb
))
1607 return superloop_at_depth (loop
,
1608 loop_depth (def_bb
->loop_father
) + 1);
1611 return current_loops
->tree_root
;
1617 unsigned maxdepth
= 0;
1618 len
= TREE_OPERAND_LENGTH (expr
);
1619 for (i
= 0; i
< len
; i
++)
1621 struct loop
*ivloop
;
1622 if (!TREE_OPERAND (expr
, i
))
1625 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1628 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1631 return superloop_at_depth (loop
, maxdepth
);
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1639 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1644 gcc_assert (loop_depth (loop
) > 0);
1646 if (is_gimple_min_invariant (expr
))
1649 if (TREE_CODE (expr
) == SSA_NAME
)
1651 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1653 && flow_bb_inside_loop_p (loop
, def_bb
))
1662 len
= TREE_OPERAND_LENGTH (expr
);
1663 for (i
= 0; i
< len
; i
++)
1664 if (TREE_OPERAND (expr
, i
)
1665 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1676 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1681 enum tree_code code
;
1684 if (expr
== NULL_TREE
)
1687 if (is_gimple_min_invariant (expr
))
1690 code
= TREE_CODE (expr
);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1693 n
= TREE_OPERAND_LENGTH (expr
);
1694 for (i
= 0; i
< n
; i
++)
1696 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1702 /* Stop if it's not ssa name. */
1703 if (code
!= SSA_NAME
)
1706 iv
= get_iv (data
, expr
);
1707 if (!iv
|| integer_zerop (iv
->step
))
1712 stmt
= SSA_NAME_DEF_STMT (expr
);
1713 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1716 use_operand_p use_p
;
1718 if (virtual_operand_p (gimple_phi_result (phi
)))
1721 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1723 tree use
= USE_FROM_PTR (use_p
);
1724 iv
= find_deriving_biv_for_expr (data
, use
);
1730 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1733 e1
= gimple_assign_rhs1 (stmt
);
1734 code
= gimple_assign_rhs_code (stmt
);
1735 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1736 return find_deriving_biv_for_expr (data
, e1
);
1743 case POINTER_PLUS_EXPR
:
1744 /* Increments, decrements and multiplications by a constant
1746 e2
= gimple_assign_rhs2 (stmt
);
1747 iv
= find_deriving_biv_for_expr (data
, e2
);
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data
, e1
);
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1767 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1770 tree type
, base_1
, base_2
;
1773 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1774 || biv
->have_address_use
|| !biv
->no_overflow
)
1777 type
= TREE_TYPE (biv
->base
);
1778 if (!INTEGRAL_TYPE_P (type
))
1781 biv
->have_address_use
= true;
1782 data
->bivs_not_used_in_addr
--;
1783 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1784 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1786 struct iv
*iv
= ver_info (data
, i
)->iv
;
1788 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1789 || iv
->have_address_use
|| !iv
->no_overflow
)
1792 if (type
!= TREE_TYPE (iv
->base
)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1796 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1799 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1800 if (operand_equal_p (base_1
, iv
->base
, 0)
1801 || operand_equal_p (base_2
, biv
->base
, 0))
1803 iv
->have_address_use
= true;
1804 data
->bivs_not_used_in_addr
--;
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1813 struct ifs_ivopts_data
1815 struct ivopts_data
*ivopts_data
;
1821 idx_find_step (tree base
, tree
*idx
, void *data
)
1823 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1825 bool use_overflow_semantics
= false;
1826 tree step
, iv_base
, iv_step
, lbound
, off
;
1827 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1829 /* If base is a component ref, require that the offset of the reference
1831 if (TREE_CODE (base
) == COMPONENT_REF
)
1833 off
= component_ref_field_offset (base
);
1834 return expr_invariant_in_loop_p (loop
, off
);
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1848 step
= array_ref_element_size (base
);
1849 lbound
= array_ref_low_bound (base
);
1851 if (!expr_invariant_in_loop_p (loop
, step
)
1852 || !expr_invariant_in_loop_p (loop
, lbound
))
1856 if (TREE_CODE (*idx
) != SSA_NAME
)
1859 iv
= get_iv (dta
->ivopts_data
, *idx
);
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1868 if (integer_zerop (iv
->step
))
1871 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1873 step
= array_ref_element_size (base
);
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step
) != INTEGER_CST
)
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step
= size_one_node
;
1885 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1886 use_overflow_semantics
= true;
1888 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1889 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1890 use_overflow_semantics
))
1892 /* The index might wrap. */
1896 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1897 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1899 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
1902 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
1904 record_biv_for_address_use (dta
->ivopts_data
, iv
);
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1913 idx_record_use (tree base
, tree
*idx
,
1916 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1917 find_interesting_uses_op (data
, *idx
);
1918 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1920 find_interesting_uses_op (data
, array_ref_element_size (base
));
1921 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1932 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1935 enum tree_code code
;
1936 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1937 widest_int res
, p0
, p1
;
1942 if (operand_equal_p (top
, bot
, 0))
1948 code
= TREE_CODE (top
);
1952 mby
= TREE_OPERAND (top
, 1);
1953 if (TREE_CODE (mby
) != INTEGER_CST
)
1956 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1959 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1964 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1965 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1968 if (code
== MINUS_EXPR
)
1970 *mul
= wi::sext (p0
+ p1
, precision
);
1974 if (TREE_CODE (bot
) != INTEGER_CST
)
1977 p0
= widest_int::from (top
, SIGNED
);
1978 p1
= widest_int::from (bot
, SIGNED
);
1981 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1992 may_be_unaligned_p (tree ref
, tree step
)
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1999 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2001 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2003 unsigned HOST_WIDE_INT bitpos
;
2004 unsigned int ref_align
;
2005 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2006 if (ref_align
< align
2007 || (bitpos
% align
) != 0
2008 || (bitpos
% BITS_PER_UNIT
) != 0)
2011 unsigned int trailing_zeros
= tree_ctz (step
);
2012 if (trailing_zeros
< HOST_BITS_PER_INT
2013 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2019 /* Return true if EXPR may be non-addressable. */
2022 may_be_nonaddressable_p (tree expr
)
2024 switch (TREE_CODE (expr
))
2026 case TARGET_MEM_REF
:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr
);
2036 if (REF_REVERSE_STORAGE_ORDER (expr
))
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2047 case ARRAY_RANGE_REF
:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2052 case VIEW_CONVERT_EXPR
:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2073 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2080 static struct iv_use
*
2081 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
2082 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
)
2087 unsigned HOST_WIDE_INT addr_offset
;
2089 /* Only support sub use for address type uses, that is, with base
2091 if (!iv
->base_object
)
2092 return record_use (data
, use_p
, iv
, stmt
, use_type
);
2094 addr_base
= strip_offset (iv
->base
, &addr_offset
);
2095 for (i
= 0; i
< n_iv_uses (data
); i
++)
2097 use
= iv_use (data
, i
);
2098 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
2103 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
2104 && operand_equal_p (addr_base
, use
->addr_base
, 0))
2108 if (i
== n_iv_uses (data
))
2109 return record_use (data
, use_p
, iv
, stmt
,
2110 use_type
, addr_base
, addr_offset
);
2112 return record_sub_use (data
, use_p
, iv
, stmt
,
2113 use_type
, addr_base
, addr_offset
, i
);
2116 /* Finds addresses in *OP_P inside STMT. */
2119 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2122 tree base
= *op_p
, step
= size_zero_node
;
2124 struct ifs_ivopts_data ifs_ivopts_data
;
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt
))
2131 /* Ignore bitfields for now. Not really something terribly complicated
2133 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2136 base
= unshare_expr (base
);
2138 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2140 tree type
= build_pointer_type (TREE_TYPE (base
));
2144 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2146 civ
= get_iv (data
, TMR_BASE (base
));
2150 TMR_BASE (base
) = civ
->base
;
2153 if (TMR_INDEX2 (base
)
2154 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2156 civ
= get_iv (data
, TMR_INDEX2 (base
));
2160 TMR_INDEX2 (base
) = civ
->base
;
2163 if (TMR_INDEX (base
)
2164 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2166 civ
= get_iv (data
, TMR_INDEX (base
));
2170 TMR_INDEX (base
) = civ
->base
;
2175 if (TMR_STEP (base
))
2176 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2178 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2182 if (integer_zerop (step
))
2184 base
= tree_mem_ref_addr (type
, base
);
2188 ifs_ivopts_data
.ivopts_data
= data
;
2189 ifs_ivopts_data
.stmt
= stmt
;
2190 ifs_ivopts_data
.step
= size_zero_node
;
2191 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2192 || integer_zerop (ifs_ivopts_data
.step
))
2194 step
= ifs_ivopts_data
.step
;
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base
))
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2206 base
= build_fold_addr_expr (base
);
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base
) == ADDR_EXPR
)
2212 tree
*ref
= &TREE_OPERAND (base
, 0);
2213 while (handled_component_p (*ref
))
2214 ref
= &TREE_OPERAND (*ref
, 0);
2215 if (TREE_CODE (*ref
) == MEM_REF
)
2217 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2218 TREE_OPERAND (*ref
, 0),
2219 TREE_OPERAND (*ref
, 1));
2226 civ
= alloc_iv (data
, base
, step
);
2227 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2231 for_each_index (op_p
, idx_record_use
, data
);
2234 /* Finds and records invariants used in STMT. */
2237 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2240 use_operand_p use_p
;
2243 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2245 op
= USE_FROM_PTR (use_p
);
2246 record_invariant (data
, op
, false);
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2253 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2256 tree op
, *lhs
, *rhs
;
2258 use_operand_p use_p
;
2259 enum tree_code code
;
2261 find_invariants_stmt (data
, stmt
);
2263 if (gimple_code (stmt
) == GIMPLE_COND
)
2265 find_interesting_uses_cond (data
, stmt
);
2269 if (is_gimple_assign (stmt
))
2271 lhs
= gimple_assign_lhs_ptr (stmt
);
2272 rhs
= gimple_assign_rhs1_ptr (stmt
);
2274 if (TREE_CODE (*lhs
) == SSA_NAME
)
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2279 iv
= get_iv (data
, *lhs
);
2281 if (iv
&& !integer_zerop (iv
->step
))
2285 code
= gimple_assign_rhs_code (stmt
);
2286 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs
)
2288 || is_gimple_val (*rhs
)))
2290 if (REFERENCE_CLASS_P (*rhs
))
2291 find_interesting_uses_address (data
, stmt
, rhs
);
2293 find_interesting_uses_op (data
, *rhs
);
2295 if (REFERENCE_CLASS_P (*lhs
))
2296 find_interesting_uses_address (data
, stmt
, lhs
);
2299 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2301 find_interesting_uses_cond (data
, stmt
);
2305 /* TODO -- we should also handle address uses of type
2307 memory = call (whatever);
2314 if (gimple_code (stmt
) == GIMPLE_PHI
2315 && gimple_bb (stmt
) == data
->current_loop
->header
)
2317 iv
= get_iv (data
, PHI_RESULT (stmt
));
2319 if (iv
&& !integer_zerop (iv
->step
))
2323 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2325 op
= USE_FROM_PTR (use_p
);
2327 if (TREE_CODE (op
) != SSA_NAME
)
2330 iv
= get_iv (data
, op
);
2334 find_interesting_uses_op (data
, op
);
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2342 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2348 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2351 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2352 if (!virtual_operand_p (def
))
2353 find_interesting_uses_op (data
, def
);
2357 /* Finds uses of the induction variables that are interesting. */
2360 find_interesting_uses (struct ivopts_data
*data
)
2363 gimple_stmt_iterator bsi
;
2364 basic_block
*body
= get_loop_body (data
->current_loop
);
2366 struct version_info
*info
;
2369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2370 fprintf (dump_file
, "Uses:\n\n");
2372 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2377 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2378 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2379 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2380 find_interesting_uses_outside (data
, e
);
2382 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2383 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2384 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2385 if (!is_gimple_debug (gsi_stmt (bsi
)))
2386 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2393 fprintf (dump_file
, "\n");
2395 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2397 info
= ver_info (data
, i
);
2400 fprintf (dump_file
, " ");
2401 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2402 fprintf (dump_file
, " is invariant (%d)%s\n",
2403 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2407 fprintf (dump_file
, "\n");
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use
*use
)
2421 HOST_WIDE_INT i
, off
;
2422 unsigned list_index
, num
;
2424 machine_mode mem_mode
, addr_mode
;
2425 static vec
<HOST_WIDE_INT
> max_offset_list
;
2427 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2428 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2430 num
= max_offset_list
.length ();
2431 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2432 if (list_index
>= num
)
2434 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2435 for (; num
< max_offset_list
.length (); num
++)
2436 max_offset_list
[num
] = -1;
2439 off
= max_offset_list
[list_index
];
2443 addr_mode
= targetm
.addr_space
.address_mode (as
);
2444 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2445 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2447 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2448 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2449 width
= HOST_BITS_PER_WIDE_INT
- 1;
2451 for (i
= width
; i
> 0; i
--)
2453 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2454 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2455 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2461 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2463 off
-= GET_MODE_SIZE (mem_mode
);
2464 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2465 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2472 max_offset_list
[list_index
] = off
;
2476 /* Check if all small groups should be split. Return true if and
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2482 Return false otherwise. We want to split such groups because:
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2489 TODO -- Above claim may not hold when auto increment is supported. */
2492 split_all_small_groups (struct ivopts_data
*data
)
2494 bool split_p
= false;
2495 unsigned int i
, n
, distinct
;
2496 struct iv_use
*pre
, *use
;
2498 n
= n_iv_uses (data
);
2499 for (i
= 0; i
< n
; i
++)
2501 use
= iv_use (data
, i
);
2506 gcc_assert (use
->type
== USE_ADDRESS
);
2507 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2509 if (pre
->addr_offset
!= use
->addr_offset
)
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2527 group_address_uses (struct ivopts_data
*data
)
2529 HOST_WIDE_INT max_offset
= -1;
2530 unsigned int i
, n
, sub_id
;
2531 struct iv_use
*pre
, *use
;
2532 unsigned HOST_WIDE_INT addr_offset_first
;
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data
))
2538 n
= n_iv_uses (data
);
2539 for (i
= 0; i
< n
; i
++)
2541 use
= iv_use (data
, i
);
2545 gcc_assert (use
->type
== USE_ADDRESS
);
2546 if (max_offset
!= 0)
2547 max_offset
= compute_max_addr_offset (use
);
2552 addr_offset_first
= use
->addr_offset
;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre
= use
, use
= use
->next
;
2556 use
&& (use
->addr_offset
- addr_offset_first
2557 <= (unsigned HOST_WIDE_INT
) max_offset
);
2558 pre
= use
, use
= use
->next
)
2561 use
->sub_id
= ++sub_id
;
2564 /* Break the list and create new group. */
2568 use
->id
= n_iv_uses (data
);
2569 use
->related_cands
= BITMAP_ALLOC (NULL
);
2570 data
->iv_uses
.safe_push (use
);
2575 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2576 dump_uses (dump_file
, data
);
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2584 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2585 HOST_WIDE_INT
*offset
)
2587 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2588 enum tree_code code
;
2589 tree type
, orig_type
= TREE_TYPE (expr
);
2590 HOST_WIDE_INT off0
, off1
, st
;
2591 tree orig_expr
= expr
;
2595 type
= TREE_TYPE (expr
);
2596 code
= TREE_CODE (expr
);
2602 if (!cst_and_fits_in_hwi (expr
)
2603 || integer_zerop (expr
))
2606 *offset
= int_cst_value (expr
);
2607 return build_int_cst (orig_type
, 0);
2609 case POINTER_PLUS_EXPR
:
2612 op0
= TREE_OPERAND (expr
, 0);
2613 op1
= TREE_OPERAND (expr
, 1);
2615 op0
= strip_offset_1 (op0
, false, false, &off0
);
2616 op1
= strip_offset_1 (op1
, false, false, &off1
);
2618 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2619 if (op0
== TREE_OPERAND (expr
, 0)
2620 && op1
== TREE_OPERAND (expr
, 1))
2623 if (integer_zerop (op1
))
2625 else if (integer_zerop (op0
))
2627 if (code
== MINUS_EXPR
)
2628 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2633 expr
= fold_build2 (code
, type
, op0
, op1
);
2635 return fold_convert (orig_type
, expr
);
2638 op1
= TREE_OPERAND (expr
, 1);
2639 if (!cst_and_fits_in_hwi (op1
))
2642 op0
= TREE_OPERAND (expr
, 0);
2643 op0
= strip_offset_1 (op0
, false, false, &off0
);
2644 if (op0
== TREE_OPERAND (expr
, 0))
2647 *offset
= off0
* int_cst_value (op1
);
2648 if (integer_zerop (op0
))
2651 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2653 return fold_convert (orig_type
, expr
);
2656 case ARRAY_RANGE_REF
:
2660 step
= array_ref_element_size (expr
);
2661 if (!cst_and_fits_in_hwi (step
))
2664 st
= int_cst_value (step
);
2665 op1
= TREE_OPERAND (expr
, 1);
2666 op1
= strip_offset_1 (op1
, false, false, &off1
);
2667 *offset
= off1
* st
;
2670 && integer_zerop (op1
))
2672 /* Strip the component reference completely. */
2673 op0
= TREE_OPERAND (expr
, 0);
2674 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2687 tmp
= component_ref_field_offset (expr
);
2688 field
= TREE_OPERAND (expr
, 1);
2690 && cst_and_fits_in_hwi (tmp
)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2693 HOST_WIDE_INT boffset
, abs_off
;
2695 /* Strip the component reference completely. */
2696 op0
= TREE_OPERAND (expr
, 0);
2697 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2698 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2699 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2703 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2710 op0
= TREE_OPERAND (expr
, 0);
2711 op0
= strip_offset_1 (op0
, true, true, &off0
);
2714 if (op0
== TREE_OPERAND (expr
, 0))
2717 expr
= build_fold_addr_expr (op0
);
2718 return fold_convert (orig_type
, expr
);
2721 /* ??? Offset operand? */
2722 inside_addr
= false;
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0
= TREE_OPERAND (expr
, 0);
2732 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2735 if (op0
== TREE_OPERAND (expr
, 0)
2736 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2739 expr
= copy_node (expr
);
2740 TREE_OPERAND (expr
, 0) = op0
;
2742 TREE_OPERAND (expr
, 1) = op1
;
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2747 expr
= fold_convert (orig_type
, expr
);
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2755 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2758 tree core
= strip_offset_1 (expr
, false, false, &off
);
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2768 generic_type_for (tree type
)
2770 if (POINTER_TYPE_P (type
))
2771 return unsigned_type_for (type
);
2773 if (TYPE_UNSIGNED (type
))
2776 return unsigned_type_for (type
);
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2782 static struct ivopts_data
*fd_ivopts_data
;
2784 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2786 bitmap
*depends_on
= (bitmap
*) data
;
2787 struct version_info
*info
;
2789 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2791 info
= name_info (fd_ivopts_data
, *expr_p
);
2793 if (!info
->inv_id
|| info
->has_nonlin_use
)
2797 *depends_on
= BITMAP_ALLOC (NULL
);
2798 bitmap_set_bit (*depends_on
, info
->inv_id
);
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2808 static struct iv_cand
*
2809 add_candidate_1 (struct ivopts_data
*data
,
2810 tree base
, tree step
, bool important
, enum iv_position pos
,
2811 struct iv_use
*use
, gimple
*incremented_at
,
2812 struct iv
*orig_iv
= NULL
)
2815 struct iv_cand
*cand
= NULL
;
2816 tree type
, orig_type
;
2818 /* For non-original variables, make sure their values are computed in a type
2819 that does not invoke undefined behavior on overflows (since in general,
2820 we cannot prove that these induction variables are non-wrapping). */
2821 if (pos
!= IP_ORIGINAL
)
2823 orig_type
= TREE_TYPE (base
);
2824 type
= generic_type_for (orig_type
);
2825 if (type
!= orig_type
)
2827 base
= fold_convert (type
, base
);
2828 step
= fold_convert (type
, step
);
2832 for (i
= 0; i
< n_iv_cands (data
); i
++)
2834 cand
= iv_cand (data
, i
);
2836 if (cand
->pos
!= pos
)
2839 if (cand
->incremented_at
!= incremented_at
2840 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2841 && cand
->ainc_use
!= use
))
2855 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2856 && operand_equal_p (step
, cand
->iv
->step
, 0)
2857 && (TYPE_PRECISION (TREE_TYPE (base
))
2858 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2862 if (i
== n_iv_cands (data
))
2864 cand
= XCNEW (struct iv_cand
);
2870 cand
->iv
= alloc_iv (data
, base
, step
);
2873 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2875 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2876 cand
->var_after
= cand
->var_before
;
2878 cand
->important
= important
;
2879 cand
->incremented_at
= incremented_at
;
2880 data
->iv_candidates
.safe_push (cand
);
2883 && TREE_CODE (step
) != INTEGER_CST
)
2885 fd_ivopts_data
= data
;
2886 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2889 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2890 cand
->ainc_use
= use
;
2892 cand
->ainc_use
= NULL
;
2894 cand
->orig_iv
= orig_iv
;
2895 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2896 dump_cand (dump_file
, cand
);
2899 if (important
&& !cand
->important
)
2901 cand
->important
= true;
2902 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2903 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2908 bitmap_set_bit (use
->related_cands
, i
);
2909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2910 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2917 /* Returns true if incrementing the induction variable at the end of the LOOP
2920 The purpose is to avoid splitting latch edge with a biv increment, thus
2921 creating a jump, possibly confusing other optimization passes and leaving
2922 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2923 is not available (so we do not have a better alternative), or if the latch
2924 edge is already nonempty. */
2927 allow_ip_end_pos_p (struct loop
*loop
)
2929 if (!ip_normal_pos (loop
))
2932 if (!empty_block_p (ip_end_pos (loop
)))
2938 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2939 Important field is set to IMPORTANT. */
2942 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2943 bool important
, struct iv_use
*use
)
2945 basic_block use_bb
= gimple_bb (use
->stmt
);
2946 machine_mode mem_mode
;
2947 unsigned HOST_WIDE_INT cstepi
;
2949 /* If we insert the increment in any position other than the standard
2950 ones, we must ensure that it is incremented once per iteration.
2951 It must not be in an inner nested loop, or one side of an if
2953 if (use_bb
->loop_father
!= data
->current_loop
2954 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2955 || stmt_could_throw_p (use
->stmt
)
2956 || !cst_and_fits_in_hwi (step
))
2959 cstepi
= int_cst_value (step
);
2961 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2962 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2963 || USE_STORE_PRE_INCREMENT (mem_mode
))
2964 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2965 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2966 || USE_STORE_PRE_DECREMENT (mem_mode
))
2967 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2969 enum tree_code code
= MINUS_EXPR
;
2971 tree new_step
= step
;
2973 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2975 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2976 code
= POINTER_PLUS_EXPR
;
2979 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2980 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2981 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2984 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2985 || USE_STORE_POST_INCREMENT (mem_mode
))
2986 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2987 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2988 || USE_STORE_POST_DECREMENT (mem_mode
))
2989 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2991 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2996 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2997 position to POS. If USE is not NULL, the candidate is set as related to
2998 it. The candidate computation is scheduled before exit condition and at
3002 add_candidate (struct ivopts_data
*data
,
3003 tree base
, tree step
, bool important
, struct iv_use
*use
,
3004 struct iv
*orig_iv
= NULL
)
3006 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
3008 if (ip_normal_pos (data
->current_loop
))
3009 add_candidate_1 (data
, base
, step
, important
,
3010 IP_NORMAL
, use
, NULL
, orig_iv
);
3011 if (ip_end_pos (data
->current_loop
)
3012 && allow_ip_end_pos_p (data
->current_loop
))
3013 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3016 /* Adds standard iv candidates. */
3019 add_standard_iv_candidates (struct ivopts_data
*data
)
3021 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3023 /* The same for a double-integer type if it is still fast enough. */
3025 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3026 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3027 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3028 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3030 /* The same for a double-integer type if it is still fast enough. */
3032 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3033 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3034 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3035 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3039 /* Adds candidates bases on the old induction variable IV. */
3042 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3046 struct iv_cand
*cand
;
3048 /* Check if this biv is used in address type use. */
3049 if (iv
->no_overflow
&& iv
->have_address_use
3050 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3051 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3053 tree base
= fold_convert (sizetype
, iv
->base
);
3054 tree step
= fold_convert (sizetype
, iv
->step
);
3056 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3057 add_candidate (data
, base
, step
, true, NULL
, iv
);
3058 /* Add iv cand of the original type only if it has nonlinear use. */
3059 if (iv
->have_use_for
)
3060 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3063 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3065 /* The same, but with initial value zero. */
3066 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3067 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3069 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3070 iv
->step
, true, NULL
);
3072 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3073 if (gimple_code (phi
) == GIMPLE_PHI
)
3075 /* Additionally record the possibility of leaving the original iv
3077 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3078 /* Don't add candidate if it's from another PHI node because
3079 it's an affine iv appearing in the form of PEELED_CHREC. */
3080 phi
= SSA_NAME_DEF_STMT (def
);
3081 if (gimple_code (phi
) != GIMPLE_PHI
)
3083 cand
= add_candidate_1 (data
,
3084 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3085 SSA_NAME_DEF_STMT (def
));
3086 cand
->var_before
= iv
->ssa_name
;
3087 cand
->var_after
= def
;
3090 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3094 /* Adds candidates based on the old induction variables. */
3097 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3103 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3105 iv
= ver_info (data
, i
)->iv
;
3106 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3107 add_iv_candidate_for_biv (data
, iv
);
3111 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3114 record_common_cand (struct ivopts_data
*data
, tree base
,
3115 tree step
, struct iv_use
*use
)
3117 struct iv_common_cand ent
;
3118 struct iv_common_cand
**slot
;
3120 gcc_assert (use
!= NULL
);
3124 ent
.hash
= iterative_hash_expr (base
, 0);
3125 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3127 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3130 *slot
= new iv_common_cand ();
3131 (*slot
)->base
= base
;
3132 (*slot
)->step
= step
;
3133 (*slot
)->uses
.create (8);
3134 (*slot
)->hash
= ent
.hash
;
3135 data
->iv_common_cands
.safe_push ((*slot
));
3137 (*slot
)->uses
.safe_push (use
);
3141 /* Comparison function used to sort common candidates. */
3144 common_cand_cmp (const void *p1
, const void *p2
)
3147 const struct iv_common_cand
*const *const ccand1
3148 = (const struct iv_common_cand
*const *)p1
;
3149 const struct iv_common_cand
*const *const ccand2
3150 = (const struct iv_common_cand
*const *)p2
;
3152 n1
= (*ccand1
)->uses
.length ();
3153 n2
= (*ccand2
)->uses
.length ();
3157 /* Adds IV candidates based on common candidated recorded. */
3160 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3163 struct iv_cand
*cand_1
, *cand_2
;
3165 data
->iv_common_cands
.qsort (common_cand_cmp
);
3166 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3168 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3170 /* Only add IV candidate if it's derived from multiple uses. */
3171 if (ptr
->uses
.length () <= 1)
3176 if (ip_normal_pos (data
->current_loop
))
3177 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3178 false, IP_NORMAL
, NULL
, NULL
);
3180 if (ip_end_pos (data
->current_loop
)
3181 && allow_ip_end_pos_p (data
->current_loop
))
3182 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3183 false, IP_END
, NULL
, NULL
);
3185 /* Bind deriving uses and the new candidates. */
3186 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3188 struct iv_use
*use
= ptr
->uses
[j
];
3190 bitmap_set_bit (use
->related_cands
, cand_1
->id
);
3192 bitmap_set_bit (use
->related_cands
, cand_2
->id
);
3196 /* Release data since it is useless from this point. */
3197 data
->iv_common_cand_tab
->empty ();
3198 data
->iv_common_cands
.truncate (0);
3201 /* Adds candidates based on the value of USE's iv. */
3204 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3206 unsigned HOST_WIDE_INT offset
;
3209 struct iv
*iv
= use
->iv
;
3211 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3213 /* Record common candidate for use in case it can be shared by others. */
3214 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3216 /* Record common candidate with initial value zero. */
3217 basetype
= TREE_TYPE (iv
->base
);
3218 if (POINTER_TYPE_P (basetype
))
3219 basetype
= sizetype
;
3220 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3222 /* Record common candidate with constant offset stripped in base. */
3224 base
= strip_offset (iv
->base
, &offset
);
3225 if (offset
|| base
!= iv
->base
)
3226 record_common_cand (data
, base
, iv
->step
, use
);
3229 /* Record common candidate with base_object removed in base. */
3230 if (iv
->base_object
!= NULL
)
3234 tree step
, base_object
= iv
->base_object
;
3240 STRIP_NOPS (base_object
);
3241 tree_to_aff_combination (base
, TREE_TYPE (base
), &aff_base
);
3242 for (i
= 0; i
< aff_base
.n
; i
++)
3244 if (aff_base
.elts
[i
].coef
!= 1)
3247 if (operand_equal_p (aff_base
.elts
[i
].val
, base_object
, 0))
3252 aff_combination_remove_elt (&aff_base
, i
);
3253 base
= aff_combination_to_tree (&aff_base
);
3254 basetype
= TREE_TYPE (base
);
3255 if (POINTER_TYPE_P (basetype
))
3256 basetype
= sizetype
;
3258 step
= fold_convert (basetype
, step
);
3259 record_common_cand (data
, base
, step
, use
);
3260 /* Also record common candidate with offset stripped. */
3261 base
= strip_offset (base
, &offset
);
3263 record_common_cand (data
, base
, step
, use
);
3267 /* At last, add auto-incremental candidates. Make such variables
3268 important since other iv uses with same base object may be based
3270 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3271 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3274 /* Adds candidates based on the uses. */
3277 add_iv_candidate_for_uses (struct ivopts_data
*data
)
3281 for (i
= 0; i
< n_iv_uses (data
); i
++)
3283 struct iv_use
*use
= iv_use (data
, i
);
3290 case USE_NONLINEAR_EXPR
:
3293 /* Just add the ivs based on the value of the iv used here. */
3294 add_iv_candidate_for_use (data
, use
);
3301 add_iv_candidate_derived_from_uses (data
);
3304 /* Record important candidates and add them to related_cands bitmaps. */
3307 record_important_candidates (struct ivopts_data
*data
)
3312 for (i
= 0; i
< n_iv_cands (data
); i
++)
3314 struct iv_cand
*cand
= iv_cand (data
, i
);
3316 if (cand
->important
)
3317 bitmap_set_bit (data
->important_candidates
, i
);
3320 data
->consider_all_candidates
= (n_iv_cands (data
)
3321 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3323 /* Add important candidates to uses' related_cands bitmaps. */
3324 for (i
= 0; i
< n_iv_uses (data
); i
++)
3326 use
= iv_use (data
, i
);
3327 bitmap_ior_into (use
->related_cands
, data
->important_candidates
);
3331 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3332 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3333 we allocate a simple list to every use. */
3336 alloc_use_cost_map (struct ivopts_data
*data
)
3338 unsigned i
, size
, s
;
3340 for (i
= 0; i
< n_iv_uses (data
); i
++)
3342 struct iv_use
*use
= iv_use (data
, i
);
3344 if (data
->consider_all_candidates
)
3345 size
= n_iv_cands (data
);
3348 s
= bitmap_count_bits (use
->related_cands
);
3350 /* Round up to the power of two, so that moduling by it is fast. */
3351 size
= s
? (1 << ceil_log2 (s
)) : 1;
3354 use
->n_map_members
= size
;
3355 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3359 /* Returns description of computation cost of expression whose runtime
3360 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3363 new_cost (unsigned runtime
, unsigned complexity
)
3367 cost
.cost
= runtime
;
3368 cost
.complexity
= complexity
;
3373 /* Returns true if COST is infinite. */
3376 infinite_cost_p (comp_cost cost
)
3378 return cost
.cost
== INFTY
;
3381 /* Adds costs COST1 and COST2. */
3384 add_costs (comp_cost cost1
, comp_cost cost2
)
3386 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3387 return infinite_cost
;
3389 cost1
.cost
+= cost2
.cost
;
3390 cost1
.complexity
+= cost2
.complexity
;
3394 /* Subtracts costs COST1 and COST2. */
3397 sub_costs (comp_cost cost1
, comp_cost cost2
)
3399 cost1
.cost
-= cost2
.cost
;
3400 cost1
.complexity
-= cost2
.complexity
;
3405 /* Returns a negative number if COST1 < COST2, a positive number if
3406 COST1 > COST2, and 0 if COST1 = COST2. */
3409 compare_costs (comp_cost cost1
, comp_cost cost2
)
3411 if (cost1
.cost
== cost2
.cost
)
3412 return cost1
.complexity
- cost2
.complexity
;
3414 return cost1
.cost
- cost2
.cost
;
3417 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3418 on invariants DEPENDS_ON and that the value used in expressing it
3419 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3422 set_use_iv_cost (struct ivopts_data
*data
,
3423 struct iv_use
*use
, struct iv_cand
*cand
,
3424 comp_cost cost
, bitmap depends_on
, tree value
,
3425 enum tree_code comp
, int inv_expr_id
)
3429 if (infinite_cost_p (cost
))
3431 BITMAP_FREE (depends_on
);
3435 if (data
->consider_all_candidates
)
3437 use
->cost_map
[cand
->id
].cand
= cand
;
3438 use
->cost_map
[cand
->id
].cost
= cost
;
3439 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3440 use
->cost_map
[cand
->id
].value
= value
;
3441 use
->cost_map
[cand
->id
].comp
= comp
;
3442 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3446 /* n_map_members is a power of two, so this computes modulo. */
3447 s
= cand
->id
& (use
->n_map_members
- 1);
3448 for (i
= s
; i
< use
->n_map_members
; i
++)
3449 if (!use
->cost_map
[i
].cand
)
3451 for (i
= 0; i
< s
; i
++)
3452 if (!use
->cost_map
[i
].cand
)
3458 use
->cost_map
[i
].cand
= cand
;
3459 use
->cost_map
[i
].cost
= cost
;
3460 use
->cost_map
[i
].depends_on
= depends_on
;
3461 use
->cost_map
[i
].value
= value
;
3462 use
->cost_map
[i
].comp
= comp
;
3463 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3466 /* Gets cost of (USE, CANDIDATE) pair. */
3468 static struct cost_pair
*
3469 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3470 struct iv_cand
*cand
)
3473 struct cost_pair
*ret
;
3478 if (data
->consider_all_candidates
)
3480 ret
= use
->cost_map
+ cand
->id
;
3487 /* n_map_members is a power of two, so this computes modulo. */
3488 s
= cand
->id
& (use
->n_map_members
- 1);
3489 for (i
= s
; i
< use
->n_map_members
; i
++)
3490 if (use
->cost_map
[i
].cand
== cand
)
3491 return use
->cost_map
+ i
;
3492 else if (use
->cost_map
[i
].cand
== NULL
)
3494 for (i
= 0; i
< s
; i
++)
3495 if (use
->cost_map
[i
].cand
== cand
)
3496 return use
->cost_map
+ i
;
3497 else if (use
->cost_map
[i
].cand
== NULL
)
3503 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3505 produce_memory_decl_rtl (tree obj
, int *regno
)
3507 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3508 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3512 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3514 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3515 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3516 SET_SYMBOL_REF_DECL (x
, obj
);
3517 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3518 set_mem_addr_space (x
, as
);
3519 targetm
.encode_section_info (obj
, x
, true);
3523 x
= gen_raw_REG (address_mode
, (*regno
)++);
3524 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3525 set_mem_addr_space (x
, as
);
3531 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3532 walk_tree. DATA contains the actual fake register number. */
3535 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3537 tree obj
= NULL_TREE
;
3539 int *regno
= (int *) data
;
3541 switch (TREE_CODE (*expr_p
))
3544 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3545 handled_component_p (*expr_p
);
3546 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3549 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3550 x
= produce_memory_decl_rtl (obj
, regno
);
3555 obj
= SSA_NAME_VAR (*expr_p
);
3556 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3559 if (!DECL_RTL_SET_P (obj
))
3560 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3569 if (DECL_RTL_SET_P (obj
))
3572 if (DECL_MODE (obj
) == BLKmode
)
3573 x
= produce_memory_decl_rtl (obj
, regno
);
3575 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3585 decl_rtl_to_reset
.safe_push (obj
);
3586 SET_DECL_RTL (obj
, x
);
3592 /* Determines cost of the computation of EXPR. */
3595 computation_cost (tree expr
, bool speed
)
3599 tree type
= TREE_TYPE (expr
);
3601 /* Avoid using hard regs in ways which may be unsupported. */
3602 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3603 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3604 enum node_frequency real_frequency
= node
->frequency
;
3606 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3607 crtl
->maybe_hot_insn_p
= speed
;
3608 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3610 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3613 default_rtl_profile ();
3614 node
->frequency
= real_frequency
;
3616 cost
= seq_cost (seq
, speed
);
3618 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3619 TYPE_ADDR_SPACE (type
), speed
);
3620 else if (!REG_P (rslt
))
3621 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3626 /* Returns variable containing the value of candidate CAND at statement AT. */
3629 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3631 if (stmt_after_increment (loop
, cand
, stmt
))
3632 return cand
->var_after
;
3634 return cand
->var_before
;
3637 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3638 same precision that is at least as wide as the precision of TYPE, stores
3639 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3643 determine_common_wider_type (tree
*a
, tree
*b
)
3645 tree wider_type
= NULL
;
3647 tree atype
= TREE_TYPE (*a
);
3649 if (CONVERT_EXPR_P (*a
))
3651 suba
= TREE_OPERAND (*a
, 0);
3652 wider_type
= TREE_TYPE (suba
);
3653 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3659 if (CONVERT_EXPR_P (*b
))
3661 subb
= TREE_OPERAND (*b
, 0);
3662 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3673 /* Determines the expression by that USE is expressed from induction variable
3674 CAND at statement AT in LOOP. The expression is stored in a decomposed
3675 form into AFF. Returns false if USE cannot be expressed using CAND. */
3678 get_computation_aff (struct loop
*loop
,
3679 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3680 struct aff_tree
*aff
)
3682 tree ubase
= use
->iv
->base
;
3683 tree ustep
= use
->iv
->step
;
3684 tree cbase
= cand
->iv
->base
;
3685 tree cstep
= cand
->iv
->step
, cstep_common
;
3686 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3687 tree common_type
, var
;
3689 aff_tree cbase_aff
, var_aff
;
3692 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3694 /* We do not have a precision to express the values of use. */
3698 var
= var_at_stmt (loop
, cand
, at
);
3699 uutype
= unsigned_type_for (utype
);
3701 /* If the conversion is not noop, perform it. */
3702 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3704 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3705 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3707 tree inner_base
, inner_step
, inner_type
;
3708 inner_base
= TREE_OPERAND (cbase
, 0);
3709 if (CONVERT_EXPR_P (cstep
))
3710 inner_step
= TREE_OPERAND (cstep
, 0);
3714 inner_type
= TREE_TYPE (inner_base
);
3715 /* If candidate is added from a biv whose type is smaller than
3716 ctype, we know both candidate and the biv won't overflow.
3717 In this case, it's safe to skip the convertion in candidate.
3718 As an example, (unsigned short)((unsigned long)A) equals to
3719 (unsigned short)A, if A has a type no larger than short. */
3720 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3726 cstep
= fold_convert (uutype
, cstep
);
3727 cbase
= fold_convert (uutype
, cbase
);
3728 var
= fold_convert (uutype
, var
);
3731 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3734 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3735 type, we achieve better folding by computing their difference in this
3736 wider type, and cast the result to UUTYPE. We do not need to worry about
3737 overflows, as all the arithmetics will in the end be performed in UUTYPE
3739 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3741 /* use = ubase - ratio * cbase + ratio * var. */
3742 tree_to_aff_combination (ubase
, common_type
, aff
);
3743 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3744 tree_to_aff_combination (var
, uutype
, &var_aff
);
3746 /* We need to shift the value if we are after the increment. */
3747 if (stmt_after_increment (loop
, cand
, at
))
3751 if (common_type
!= uutype
)
3752 cstep_common
= fold_convert (common_type
, cstep
);
3754 cstep_common
= cstep
;
3756 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3757 aff_combination_add (&cbase_aff
, &cstep_aff
);
3760 aff_combination_scale (&cbase_aff
, -rat
);
3761 aff_combination_add (aff
, &cbase_aff
);
3762 if (common_type
!= uutype
)
3763 aff_combination_convert (aff
, uutype
);
3765 aff_combination_scale (&var_aff
, rat
);
3766 aff_combination_add (aff
, &var_aff
);
3771 /* Return the type of USE. */
3774 get_use_type (struct iv_use
*use
)
3776 tree base_type
= TREE_TYPE (use
->iv
->base
);
3779 if (use
->type
== USE_ADDRESS
)
3781 /* The base_type may be a void pointer. Create a pointer type based on
3782 the mem_ref instead. */
3783 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3784 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3785 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3793 /* Determines the expression by that USE is expressed from induction variable
3794 CAND at statement AT in LOOP. The computation is unshared. */
3797 get_computation_at (struct loop
*loop
,
3798 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3801 tree type
= get_use_type (use
);
3803 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3805 unshare_aff_combination (&aff
);
3806 return fold_convert (type
, aff_combination_to_tree (&aff
));
3809 /* Determines the expression by that USE is expressed from induction variable
3810 CAND in LOOP. The computation is unshared. */
3813 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3815 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3818 /* Adjust the cost COST for being in loop setup rather than loop body.
3819 If we're optimizing for space, the loop setup overhead is constant;
3820 if we're optimizing for speed, amortize it over the per-iteration cost. */
3822 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3826 else if (optimize_loop_for_speed_p (data
->current_loop
))
3827 return cost
/ avg_loop_niter (data
->current_loop
);
3832 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3833 validity for a memory reference accessing memory of mode MODE in
3834 address space AS. */
3838 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3841 #define MAX_RATIO 128
3842 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3843 static vec
<sbitmap
> valid_mult_list
;
3846 if (data_index
>= valid_mult_list
.length ())
3847 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3849 valid_mult
= valid_mult_list
[data_index
];
3852 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3853 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3854 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3858 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3859 bitmap_clear (valid_mult
);
3860 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3861 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3862 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3864 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3865 if (memory_address_addr_space_p (mode
, addr
, as
)
3866 || memory_address_addr_space_p (mode
, scaled
, as
))
3867 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3872 fprintf (dump_file
, " allowed multipliers:");
3873 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3874 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3875 fprintf (dump_file
, " %d", (int) i
);
3876 fprintf (dump_file
, "\n");
3877 fprintf (dump_file
, "\n");
3880 valid_mult_list
[data_index
] = valid_mult
;
3883 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3886 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3889 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3890 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3891 variable is omitted. Compute the cost for a memory reference that accesses
3892 a memory location of mode MEM_MODE in address space AS.
3894 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3895 size of MEM_MODE / RATIO) is available. To make this determination, we
3896 look at the size of the increment to be made, which is given in CSTEP.
3897 CSTEP may be zero if the step is unknown.
3898 STMT_AFTER_INC is true iff the statement we're looking at is after the
3899 increment of the original biv.
3901 TODO -- there must be some better way. This all is quite crude. */
3905 AINC_PRE_INC
, /* Pre increment. */
3906 AINC_PRE_DEC
, /* Pre decrement. */
3907 AINC_POST_INC
, /* Post increment. */
3908 AINC_POST_DEC
, /* Post decrement. */
3909 AINC_NONE
/* Also the number of auto increment types. */
3912 struct address_cost_data
3914 HOST_WIDE_INT min_offset
, max_offset
;
3915 unsigned costs
[2][2][2][2];
3916 unsigned ainc_costs
[AINC_NONE
];
3921 get_address_cost (bool symbol_present
, bool var_present
,
3922 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3923 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3924 addr_space_t as
, bool speed
,
3925 bool stmt_after_inc
, bool *may_autoinc
)
3927 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3928 static vec
<address_cost_data
*> address_cost_data_list
;
3929 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3930 address_cost_data
*data
;
3931 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3932 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3933 unsigned cost
, acost
, complexity
;
3934 enum ainc_type autoinc_type
;
3935 bool offset_p
, ratio_p
, autoinc
;
3936 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3937 unsigned HOST_WIDE_INT mask
;
3940 if (data_index
>= address_cost_data_list
.length ())
3941 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3943 data
= address_cost_data_list
[data_index
];
3947 HOST_WIDE_INT rat
, off
= 0;
3948 int old_cse_not_expected
, width
;
3949 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3954 data
= (address_cost_data
*) xcalloc (1, sizeof (*data
));
3956 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3958 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3959 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3960 width
= HOST_BITS_PER_WIDE_INT
- 1;
3961 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3963 for (i
= width
; i
>= 0; i
--)
3965 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3966 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3967 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3970 data
->min_offset
= (i
== -1? 0 : off
);
3972 for (i
= width
; i
>= 0; i
--)
3974 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3975 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3976 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3978 /* For some strict-alignment targets, the offset must be naturally
3979 aligned. Try an aligned offset if mem_mode is not QImode. */
3980 off
= mem_mode
!= QImode
3981 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3982 - GET_MODE_SIZE (mem_mode
)
3986 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3987 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3993 data
->max_offset
= off
;
3995 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3997 fprintf (dump_file
, "get_address_cost:\n");
3998 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3999 GET_MODE_NAME (mem_mode
),
4001 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
4002 GET_MODE_NAME (mem_mode
),
4007 for (i
= 2; i
<= MAX_RATIO
; i
++)
4008 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
4014 /* Compute the cost of various addressing modes. */
4016 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4017 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
4019 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4020 || USE_STORE_PRE_DECREMENT (mem_mode
))
4022 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
4023 has_predec
[mem_mode
]
4024 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4026 if (has_predec
[mem_mode
])
4027 data
->ainc_costs
[AINC_PRE_DEC
]
4028 = address_cost (addr
, mem_mode
, as
, speed
);
4030 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4031 || USE_STORE_POST_DECREMENT (mem_mode
))
4033 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
4034 has_postdec
[mem_mode
]
4035 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4037 if (has_postdec
[mem_mode
])
4038 data
->ainc_costs
[AINC_POST_DEC
]
4039 = address_cost (addr
, mem_mode
, as
, speed
);
4041 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4042 || USE_STORE_PRE_DECREMENT (mem_mode
))
4044 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
4045 has_preinc
[mem_mode
]
4046 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4048 if (has_preinc
[mem_mode
])
4049 data
->ainc_costs
[AINC_PRE_INC
]
4050 = address_cost (addr
, mem_mode
, as
, speed
);
4052 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4053 || USE_STORE_POST_INCREMENT (mem_mode
))
4055 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
4056 has_postinc
[mem_mode
]
4057 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4059 if (has_postinc
[mem_mode
])
4060 data
->ainc_costs
[AINC_POST_INC
]
4061 = address_cost (addr
, mem_mode
, as
, speed
);
4063 for (i
= 0; i
< 16; i
++)
4066 var_p
= (i
>> 1) & 1;
4067 off_p
= (i
>> 2) & 1;
4068 rat_p
= (i
>> 3) & 1;
4072 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
4073 gen_int_mode (rat
, address_mode
));
4076 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
4080 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
4081 /* ??? We can run into trouble with some backends by presenting
4082 it with symbols which haven't been properly passed through
4083 targetm.encode_section_info. By setting the local bit, we
4084 enhance the probability of things working. */
4085 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
4088 base
= gen_rtx_fmt_e (CONST
, address_mode
,
4090 (PLUS
, address_mode
, base
,
4091 gen_int_mode (off
, address_mode
)));
4094 base
= gen_int_mode (off
, address_mode
);
4099 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
4102 /* To avoid splitting addressing modes, pretend that no cse will
4104 old_cse_not_expected
= cse_not_expected
;
4105 cse_not_expected
= true;
4106 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
4107 cse_not_expected
= old_cse_not_expected
;
4111 acost
= seq_cost (seq
, speed
);
4112 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
4116 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
4119 /* On some targets, it is quite expensive to load symbol to a register,
4120 which makes addresses that contain symbols look much more expensive.
4121 However, the symbol will have to be loaded in any case before the
4122 loop (and quite likely we have it in register already), so it does not
4123 make much sense to penalize them too heavily. So make some final
4124 tweaks for the SYMBOL_PRESENT modes:
4126 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4127 var is cheaper, use this mode with small penalty.
4128 If VAR_PRESENT is true, try whether the mode with
4129 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4130 if this is the case, use it. */
4131 add_c
= add_cost (speed
, address_mode
);
4132 for (i
= 0; i
< 8; i
++)
4135 off_p
= (i
>> 1) & 1;
4136 rat_p
= (i
>> 2) & 1;
4138 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
4142 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
4143 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
4146 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4148 fprintf (dump_file
, "Address costs:\n");
4150 for (i
= 0; i
< 16; i
++)
4153 var_p
= (i
>> 1) & 1;
4154 off_p
= (i
>> 2) & 1;
4155 rat_p
= (i
>> 3) & 1;
4157 fprintf (dump_file
, " ");
4159 fprintf (dump_file
, "sym + ");
4161 fprintf (dump_file
, "var + ");
4163 fprintf (dump_file
, "cst + ");
4165 fprintf (dump_file
, "rat * ");
4167 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4168 fprintf (dump_file
, "index costs %d\n", acost
);
4170 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4171 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4172 fprintf (dump_file
, " May include autoinc/dec\n");
4173 fprintf (dump_file
, "\n");
4176 address_cost_data_list
[data_index
] = data
;
4179 bits
= GET_MODE_BITSIZE (address_mode
);
4180 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4182 if ((offset
>> (bits
- 1) & 1))
4187 autoinc_type
= AINC_NONE
;
4188 msize
= GET_MODE_SIZE (mem_mode
);
4189 autoinc_offset
= offset
;
4191 autoinc_offset
+= ratio
* cstep
;
4192 if (symbol_present
|| var_present
|| ratio
!= 1)
4196 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4198 autoinc_type
= AINC_POST_INC
;
4199 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4201 autoinc_type
= AINC_POST_DEC
;
4202 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4204 autoinc_type
= AINC_PRE_INC
;
4205 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4207 autoinc_type
= AINC_PRE_DEC
;
4209 if (autoinc_type
!= AINC_NONE
)
4214 offset_p
= (s_offset
!= 0
4215 && data
->min_offset
<= s_offset
4216 && s_offset
<= data
->max_offset
);
4217 ratio_p
= (ratio
!= 1
4218 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4220 if (ratio
!= 1 && !ratio_p
)
4221 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4223 if (s_offset
&& !offset_p
&& !symbol_present
)
4224 cost
+= add_cost (speed
, address_mode
);
4227 *may_autoinc
= autoinc
;
4229 acost
= data
->ainc_costs
[autoinc_type
];
4231 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4232 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4233 return new_cost (cost
+ acost
, complexity
);
4236 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4237 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4238 calculating the operands of EXPR. Returns true if successful, and returns
4239 the cost in COST. */
4242 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4243 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4246 tree op1
= TREE_OPERAND (expr
, 1);
4247 tree cst
= TREE_OPERAND (mult
, 1);
4248 tree multop
= TREE_OPERAND (mult
, 0);
4249 int m
= exact_log2 (int_cst_value (cst
));
4250 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4251 int as_cost
, sa_cost
;
4254 if (!(m
>= 0 && m
< maxm
))
4258 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4260 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4262 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4263 use that in preference to a shift insn followed by an add insn. */
4264 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4265 ? shiftadd_cost (speed
, mode
, m
)
4267 ? shiftsub1_cost (speed
, mode
, m
)
4268 : shiftsub0_cost (speed
, mode
, m
)));
4270 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
4271 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
4273 STRIP_NOPS (multop
);
4274 if (!is_gimple_val (multop
))
4275 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
4281 /* Estimates cost of forcing expression EXPR into a variable. */
4284 force_expr_to_var_cost (tree expr
, bool speed
)
4286 static bool costs_initialized
= false;
4287 static unsigned integer_cost
[2];
4288 static unsigned symbol_cost
[2];
4289 static unsigned address_cost
[2];
4291 comp_cost cost0
, cost1
, cost
;
4294 if (!costs_initialized
)
4296 tree type
= build_pointer_type (integer_type_node
);
4301 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4302 TREE_STATIC (var
) = 1;
4303 x
= produce_memory_decl_rtl (var
, NULL
);
4304 SET_DECL_RTL (var
, x
);
4306 addr
= build1 (ADDR_EXPR
, type
, var
);
4309 for (i
= 0; i
< 2; i
++)
4311 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4314 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4317 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4318 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4320 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4321 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4322 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4323 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4324 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4325 fprintf (dump_file
, "\n");
4329 costs_initialized
= true;
4334 if (SSA_VAR_P (expr
))
4337 if (is_gimple_min_invariant (expr
))
4339 if (TREE_CODE (expr
) == INTEGER_CST
)
4340 return new_cost (integer_cost
[speed
], 0);
4342 if (TREE_CODE (expr
) == ADDR_EXPR
)
4344 tree obj
= TREE_OPERAND (expr
, 0);
4346 if (TREE_CODE (obj
) == VAR_DECL
4347 || TREE_CODE (obj
) == PARM_DECL
4348 || TREE_CODE (obj
) == RESULT_DECL
)
4349 return new_cost (symbol_cost
[speed
], 0);
4352 return new_cost (address_cost
[speed
], 0);
4355 switch (TREE_CODE (expr
))
4357 case POINTER_PLUS_EXPR
:
4361 op0
= TREE_OPERAND (expr
, 0);
4362 op1
= TREE_OPERAND (expr
, 1);
4369 op0
= TREE_OPERAND (expr
, 0);
4375 /* Just an arbitrary value, FIXME. */
4376 return new_cost (target_spill_cost
[speed
], 0);
4379 if (op0
== NULL_TREE
4380 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4383 cost0
= force_expr_to_var_cost (op0
, speed
);
4385 if (op1
== NULL_TREE
4386 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4389 cost1
= force_expr_to_var_cost (op1
, speed
);
4391 mode
= TYPE_MODE (TREE_TYPE (expr
));
4392 switch (TREE_CODE (expr
))
4394 case POINTER_PLUS_EXPR
:
4398 cost
= new_cost (add_cost (speed
, mode
), 0);
4399 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4401 tree mult
= NULL_TREE
;
4403 if (TREE_CODE (op1
) == MULT_EXPR
)
4405 else if (TREE_CODE (op0
) == MULT_EXPR
)
4408 if (mult
!= NULL_TREE
4409 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4410 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4418 tree inner_mode
, outer_mode
;
4419 outer_mode
= TREE_TYPE (expr
);
4420 inner_mode
= TREE_TYPE (op0
);
4421 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4422 TYPE_MODE (inner_mode
), speed
), 0);
4427 if (cst_and_fits_in_hwi (op0
))
4428 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4430 else if (cst_and_fits_in_hwi (op1
))
4431 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4434 return new_cost (target_spill_cost
[speed
], 0);
4441 cost
= add_costs (cost
, cost0
);
4442 cost
= add_costs (cost
, cost1
);
4444 /* Bound the cost by target_spill_cost. The parts of complicated
4445 computations often are either loop invariant or at least can
4446 be shared between several iv uses, so letting this grow without
4447 limits would not give reasonable results. */
4448 if (cost
.cost
> (int) target_spill_cost
[speed
])
4449 cost
.cost
= target_spill_cost
[speed
];
4454 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4455 invariants the computation depends on. */
4458 force_var_cost (struct ivopts_data
*data
,
4459 tree expr
, bitmap
*depends_on
)
4463 fd_ivopts_data
= data
;
4464 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4467 return force_expr_to_var_cost (expr
, data
->speed
);
4470 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4471 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4472 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4473 invariants the computation depends on. */
4476 split_address_cost (struct ivopts_data
*data
,
4477 tree addr
, bool *symbol_present
, bool *var_present
,
4478 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4481 HOST_WIDE_INT bitsize
;
4482 HOST_WIDE_INT bitpos
;
4485 int unsignedp
, reversep
, volatilep
;
4487 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4488 &unsignedp
, &reversep
, &volatilep
, false);
4491 || bitpos
% BITS_PER_UNIT
!= 0
4493 || TREE_CODE (core
) != VAR_DECL
)
4495 *symbol_present
= false;
4496 *var_present
= true;
4497 fd_ivopts_data
= data
;
4499 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4501 return new_cost (target_spill_cost
[data
->speed
], 0);
4504 *offset
+= bitpos
/ BITS_PER_UNIT
;
4505 if (TREE_STATIC (core
)
4506 || DECL_EXTERNAL (core
))
4508 *symbol_present
= true;
4509 *var_present
= false;
4513 *symbol_present
= false;
4514 *var_present
= true;
4518 /* Estimates cost of expressing difference of addresses E1 - E2 as
4519 var + symbol + offset. The value of offset is added to OFFSET,
4520 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4521 part is missing. DEPENDS_ON is a set of the invariants the computation
4525 ptr_difference_cost (struct ivopts_data
*data
,
4526 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4527 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4529 HOST_WIDE_INT diff
= 0;
4530 aff_tree aff_e1
, aff_e2
;
4533 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4535 if (ptr_difference_const (e1
, e2
, &diff
))
4538 *symbol_present
= false;
4539 *var_present
= false;
4543 if (integer_zerop (e2
))
4544 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4545 symbol_present
, var_present
, offset
, depends_on
);
4547 *symbol_present
= false;
4548 *var_present
= true;
4550 type
= signed_type_for (TREE_TYPE (e1
));
4551 tree_to_aff_combination (e1
, type
, &aff_e1
);
4552 tree_to_aff_combination (e2
, type
, &aff_e2
);
4553 aff_combination_scale (&aff_e2
, -1);
4554 aff_combination_add (&aff_e1
, &aff_e2
);
4556 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4559 /* Estimates cost of expressing difference E1 - E2 as
4560 var + symbol + offset. The value of offset is added to OFFSET,
4561 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4562 part is missing. DEPENDS_ON is a set of the invariants the computation
4566 difference_cost (struct ivopts_data
*data
,
4567 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4568 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4570 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4571 unsigned HOST_WIDE_INT off1
, off2
;
4572 aff_tree aff_e1
, aff_e2
;
4575 e1
= strip_offset (e1
, &off1
);
4576 e2
= strip_offset (e2
, &off2
);
4577 *offset
+= off1
- off2
;
4582 if (TREE_CODE (e1
) == ADDR_EXPR
)
4583 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4584 offset
, depends_on
);
4585 *symbol_present
= false;
4587 if (operand_equal_p (e1
, e2
, 0))
4589 *var_present
= false;
4593 *var_present
= true;
4595 if (integer_zerop (e2
))
4596 return force_var_cost (data
, e1
, depends_on
);
4598 if (integer_zerop (e1
))
4600 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4601 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4605 type
= signed_type_for (TREE_TYPE (e1
));
4606 tree_to_aff_combination (e1
, type
, &aff_e1
);
4607 tree_to_aff_combination (e2
, type
, &aff_e2
);
4608 aff_combination_scale (&aff_e2
, -1);
4609 aff_combination_add (&aff_e1
, &aff_e2
);
4611 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4614 /* Returns true if AFF1 and AFF2 are identical. */
4617 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4621 if (aff1
->n
!= aff2
->n
)
4624 for (i
= 0; i
< aff1
->n
; i
++)
4626 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4629 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4635 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4638 get_expr_id (struct ivopts_data
*data
, tree expr
)
4640 struct iv_inv_expr_ent ent
;
4641 struct iv_inv_expr_ent
**slot
;
4644 ent
.hash
= iterative_hash_expr (expr
, 0);
4645 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4649 *slot
= XNEW (struct iv_inv_expr_ent
);
4650 (*slot
)->expr
= expr
;
4651 (*slot
)->hash
= ent
.hash
;
4652 (*slot
)->id
= data
->inv_expr_id
++;
4656 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4657 requires a new compiler generated temporary. Returns -1 otherwise.
4658 ADDRESS_P is a flag indicating if the expression is for address
4662 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4663 tree cbase
, HOST_WIDE_INT ratio
,
4666 aff_tree ubase_aff
, cbase_aff
;
4674 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4675 && (TREE_CODE (cbase
) == INTEGER_CST
))
4678 /* Strips the constant part. */
4679 if (TREE_CODE (ubase
) == PLUS_EXPR
4680 || TREE_CODE (ubase
) == MINUS_EXPR
4681 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4683 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4684 ubase
= TREE_OPERAND (ubase
, 0);
4687 /* Strips the constant part. */
4688 if (TREE_CODE (cbase
) == PLUS_EXPR
4689 || TREE_CODE (cbase
) == MINUS_EXPR
4690 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4692 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4693 cbase
= TREE_OPERAND (cbase
, 0);
4698 if (((TREE_CODE (ubase
) == SSA_NAME
)
4699 || (TREE_CODE (ubase
) == ADDR_EXPR
4700 && is_gimple_min_invariant (ubase
)))
4701 && (TREE_CODE (cbase
) == INTEGER_CST
))
4704 if (((TREE_CODE (cbase
) == SSA_NAME
)
4705 || (TREE_CODE (cbase
) == ADDR_EXPR
4706 && is_gimple_min_invariant (cbase
)))
4707 && (TREE_CODE (ubase
) == INTEGER_CST
))
4713 if (operand_equal_p (ubase
, cbase
, 0))
4716 if (TREE_CODE (ubase
) == ADDR_EXPR
4717 && TREE_CODE (cbase
) == ADDR_EXPR
)
4721 usym
= TREE_OPERAND (ubase
, 0);
4722 csym
= TREE_OPERAND (cbase
, 0);
4723 if (TREE_CODE (usym
) == ARRAY_REF
)
4725 tree ind
= TREE_OPERAND (usym
, 1);
4726 if (TREE_CODE (ind
) == INTEGER_CST
4727 && tree_fits_shwi_p (ind
)
4728 && tree_to_shwi (ind
) == 0)
4729 usym
= TREE_OPERAND (usym
, 0);
4731 if (TREE_CODE (csym
) == ARRAY_REF
)
4733 tree ind
= TREE_OPERAND (csym
, 1);
4734 if (TREE_CODE (ind
) == INTEGER_CST
4735 && tree_fits_shwi_p (ind
)
4736 && tree_to_shwi (ind
) == 0)
4737 csym
= TREE_OPERAND (csym
, 0);
4739 if (operand_equal_p (usym
, csym
, 0))
4742 /* Now do more complex comparison */
4743 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4744 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4745 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4749 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4750 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4752 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4753 aff_combination_add (&ubase_aff
, &cbase_aff
);
4754 expr
= aff_combination_to_tree (&ubase_aff
);
4755 return get_expr_id (data
, expr
);
4760 /* Determines the cost of the computation by that USE is expressed
4761 from induction variable CAND. If ADDRESS_P is true, we just need
4762 to create an address from it, otherwise we want to get it into
4763 register. A set of invariants we depend on is stored in
4764 DEPENDS_ON. AT is the statement at that the value is computed.
4765 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4766 addressing is likely. */
4769 get_computation_cost_at (struct ivopts_data
*data
,
4770 struct iv_use
*use
, struct iv_cand
*cand
,
4771 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4775 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4777 tree utype
= TREE_TYPE (ubase
), ctype
;
4778 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4779 HOST_WIDE_INT ratio
, aratio
;
4780 bool var_present
, symbol_present
, stmt_is_after_inc
;
4783 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4784 machine_mode mem_mode
= (address_p
4785 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4791 /* Only consider real candidates. */
4793 return infinite_cost
;
4795 cbase
= cand
->iv
->base
;
4796 cstep
= cand
->iv
->step
;
4797 ctype
= TREE_TYPE (cbase
);
4799 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4801 /* We do not have a precision to express the values of use. */
4802 return infinite_cost
;
4806 || (use
->iv
->base_object
4807 && cand
->iv
->base_object
4808 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4809 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4811 /* Do not try to express address of an object with computation based
4812 on address of a different object. This may cause problems in rtl
4813 level alias analysis (that does not expect this to be happening,
4814 as this is illegal in C), and would be unlikely to be useful
4816 if (use
->iv
->base_object
4817 && cand
->iv
->base_object
4818 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4819 return infinite_cost
;
4822 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4824 /* TODO -- add direct handling of this case. */
4828 /* CSTEPI is removed from the offset in case statement is after the
4829 increment. If the step is not constant, we use zero instead.
4830 This is a bit imprecise (there is the extra addition), but
4831 redundancy elimination is likely to transform the code so that
4832 it uses value of the variable before increment anyway,
4833 so it is not that much unrealistic. */
4834 if (cst_and_fits_in_hwi (cstep
))
4835 cstepi
= int_cst_value (cstep
);
4839 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4840 return infinite_cost
;
4842 if (wi::fits_shwi_p (rat
))
4843 ratio
= rat
.to_shwi ();
4845 return infinite_cost
;
4848 ctype
= TREE_TYPE (cbase
);
4850 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4852 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4853 or ratio == 1, it is better to handle this like
4855 ubase - ratio * cbase + ratio * var
4857 (also holds in the case ratio == -1, TODO. */
4859 if (cst_and_fits_in_hwi (cbase
))
4861 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4862 cost
= difference_cost (data
,
4863 ubase
, build_int_cst (utype
, 0),
4864 &symbol_present
, &var_present
, &offset
,
4866 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4868 else if (ratio
== 1)
4870 tree real_cbase
= cbase
;
4872 /* Check to see if any adjustment is needed. */
4873 if (cstepi
== 0 && stmt_is_after_inc
)
4875 aff_tree real_cbase_aff
;
4878 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4880 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4882 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4883 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4886 cost
= difference_cost (data
,
4888 &symbol_present
, &var_present
, &offset
,
4890 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4893 && !POINTER_TYPE_P (ctype
)
4894 && multiplier_allowed_in_address_p
4896 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4898 if (cstepi
== 0 && stmt_is_after_inc
)
4900 if (POINTER_TYPE_P (ctype
))
4901 cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4903 cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4906 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4907 cost
= difference_cost (data
,
4909 &symbol_present
, &var_present
, &offset
,
4911 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4915 cost
= force_var_cost (data
, cbase
, depends_on
);
4916 cost
= add_costs (cost
,
4917 difference_cost (data
,
4918 ubase
, build_int_cst (utype
, 0),
4919 &symbol_present
, &var_present
,
4920 &offset
, depends_on
));
4921 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4922 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4925 /* Set of invariants depended on by sub use has already been computed
4926 for the first use in the group. */
4930 if (depends_on
&& *depends_on
)
4931 bitmap_clear (*depends_on
);
4933 else if (inv_expr_id
)
4936 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4937 /* Clear depends on. */
4938 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4939 bitmap_clear (*depends_on
);
4942 /* If we are after the increment, the value of the candidate is higher by
4944 if (stmt_is_after_inc
)
4945 offset
-= ratio
* cstepi
;
4947 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4948 (symbol/var1/const parts may be omitted). If we are looking for an
4949 address, find the cost of addressing this. */
4951 return add_costs (cost
,
4952 get_address_cost (symbol_present
, var_present
,
4953 offset
, ratio
, cstepi
,
4955 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4956 speed
, stmt_is_after_inc
,
4959 /* Otherwise estimate the costs for computing the expression. */
4960 if (!symbol_present
&& !var_present
&& !offset
)
4963 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4967 /* Symbol + offset should be compile-time computable so consider that they
4968 are added once to the variable, if present. */
4969 if (var_present
&& (symbol_present
|| offset
))
4970 cost
.cost
+= adjust_setup_cost (data
,
4971 add_cost (speed
, TYPE_MODE (ctype
)));
4973 /* Having offset does not affect runtime cost in case it is added to
4974 symbol, but it increases complexity. */
4978 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4980 aratio
= ratio
> 0 ? ratio
: -ratio
;
4982 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4987 *can_autoinc
= false;
4990 /* Just get the expression, expand it and measure the cost. */
4991 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4994 return infinite_cost
;
4997 comp
= build_simple_mem_ref (comp
);
4999 return new_cost (computation_cost (comp
, speed
), 0);
5003 /* Determines the cost of the computation by that USE is expressed
5004 from induction variable CAND. If ADDRESS_P is true, we just need
5005 to create an address from it, otherwise we want to get it into
5006 register. A set of invariants we depend on is stored in
5007 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5008 autoinc addressing is likely. */
5011 get_computation_cost (struct ivopts_data
*data
,
5012 struct iv_use
*use
, struct iv_cand
*cand
,
5013 bool address_p
, bitmap
*depends_on
,
5014 bool *can_autoinc
, int *inv_expr_id
)
5016 return get_computation_cost_at (data
,
5017 use
, cand
, address_p
, depends_on
, use
->stmt
,
5018 can_autoinc
, inv_expr_id
);
5021 /* Determines cost of basing replacement of USE on CAND in a generic
5025 determine_use_iv_cost_generic (struct ivopts_data
*data
,
5026 struct iv_use
*use
, struct iv_cand
*cand
)
5030 int inv_expr_id
= -1;
5032 /* The simple case first -- if we need to express value of the preserved
5033 original biv, the cost is 0. This also prevents us from counting the
5034 cost of increment twice -- once at this use and once in the cost of
5036 if (cand
->pos
== IP_ORIGINAL
5037 && cand
->incremented_at
== use
->stmt
)
5039 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
5044 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
5045 NULL
, &inv_expr_id
);
5047 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
5050 return !infinite_cost_p (cost
);
5053 /* Determines cost of basing replacement of USE on CAND in an address. */
5056 determine_use_iv_cost_address (struct ivopts_data
*data
,
5057 struct iv_use
*use
, struct iv_cand
*cand
)
5061 int inv_expr_id
= -1;
5062 struct iv_use
*sub_use
;
5064 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5065 &can_autoinc
, &inv_expr_id
);
5067 if (cand
->ainc_use
== use
)
5070 cost
.cost
-= cand
->cost_step
;
5071 /* If we generated the candidate solely for exploiting autoincrement
5072 opportunities, and it turns out it can't be used, set the cost to
5073 infinity to make sure we ignore it. */
5074 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
5075 cost
= infinite_cost
;
5077 for (sub_use
= use
->next
;
5078 sub_use
&& !infinite_cost_p (cost
);
5079 sub_use
= sub_use
->next
)
5081 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, NULL
,
5082 &can_autoinc
, NULL
);
5083 cost
= add_costs (cost
, sub_cost
);
5086 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
5089 return !infinite_cost_p (cost
);
5092 /* Computes value of candidate CAND at position AT in iteration NITER, and
5093 stores it to VAL. */
5096 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
5099 aff_tree step
, delta
, nit
;
5100 struct iv
*iv
= cand
->iv
;
5101 tree type
= TREE_TYPE (iv
->base
);
5102 tree steptype
= type
;
5103 if (POINTER_TYPE_P (type
))
5104 steptype
= sizetype
;
5105 steptype
= unsigned_type_for (type
);
5107 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
5108 aff_combination_convert (&step
, steptype
);
5109 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
5110 aff_combination_convert (&nit
, steptype
);
5111 aff_combination_mult (&nit
, &step
, &delta
);
5112 if (stmt_after_increment (loop
, cand
, at
))
5113 aff_combination_add (&delta
, &step
);
5115 tree_to_aff_combination (iv
->base
, type
, val
);
5116 if (!POINTER_TYPE_P (type
))
5117 aff_combination_convert (val
, steptype
);
5118 aff_combination_add (val
, &delta
);
5121 /* Returns period of induction variable iv. */
5124 iv_period (struct iv
*iv
)
5126 tree step
= iv
->step
, period
, type
;
5129 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
5131 type
= unsigned_type_for (TREE_TYPE (step
));
5132 /* Period of the iv is lcm (step, type_range)/step -1,
5133 i.e., N*type_range/step - 1. Since type range is power
5134 of two, N == (step >> num_of_ending_zeros_binary (step),
5135 so the final result is
5137 (type_range >> num_of_ending_zeros_binary (step)) - 1
5140 pow2div
= num_ending_zeros (step
);
5142 period
= build_low_bits_mask (type
,
5143 (TYPE_PRECISION (type
)
5144 - tree_to_uhwi (pow2div
)));
5149 /* Returns the comparison operator used when eliminating the iv USE. */
5151 static enum tree_code
5152 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
5154 struct loop
*loop
= data
->current_loop
;
5158 ex_bb
= gimple_bb (use
->stmt
);
5159 exit
= EDGE_SUCC (ex_bb
, 0);
5160 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5161 exit
= EDGE_SUCC (ex_bb
, 1);
5163 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
5166 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5167 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5168 calculation is performed in non-wrapping type.
5170 TODO: More generally, we could test for the situation that
5171 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5172 This would require knowing the sign of OFFSET. */
5175 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5177 enum tree_code code
;
5179 aff_tree aff_e1
, aff_e2
, aff_offset
;
5181 if (!nowrap_type_p (TREE_TYPE (base
)))
5184 base
= expand_simple_operations (base
);
5186 if (TREE_CODE (base
) == SSA_NAME
)
5188 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5190 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5193 code
= gimple_assign_rhs_code (stmt
);
5194 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5197 e1
= gimple_assign_rhs1 (stmt
);
5198 e2
= gimple_assign_rhs2 (stmt
);
5202 code
= TREE_CODE (base
);
5203 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5205 e1
= TREE_OPERAND (base
, 0);
5206 e2
= TREE_OPERAND (base
, 1);
5209 /* Use affine expansion as deeper inspection to prove the equality. */
5210 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5211 &aff_e2
, &data
->name_expansion_cache
);
5212 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5213 &aff_offset
, &data
->name_expansion_cache
);
5214 aff_combination_scale (&aff_offset
, -1);
5218 aff_combination_add (&aff_e2
, &aff_offset
);
5219 if (aff_combination_zero_p (&aff_e2
))
5222 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5223 &aff_e1
, &data
->name_expansion_cache
);
5224 aff_combination_add (&aff_e1
, &aff_offset
);
5225 return aff_combination_zero_p (&aff_e1
);
5227 case POINTER_PLUS_EXPR
:
5228 aff_combination_add (&aff_e2
, &aff_offset
);
5229 return aff_combination_zero_p (&aff_e2
);
5236 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5237 comparison with CAND. NITER describes the number of iterations of
5238 the loops. If successful, the comparison in COMP_P is altered accordingly.
5240 We aim to handle the following situation:
5256 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5257 We aim to optimize this to
5265 while (p < p_0 - a + b);
5267 This preserves the correctness, since the pointer arithmetics does not
5268 overflow. More precisely:
5270 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5271 overflow in computing it or the values of p.
5272 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5273 overflow. To prove this, we use the fact that p_0 = base + a. */
5276 iv_elimination_compare_lt (struct ivopts_data
*data
,
5277 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5278 struct tree_niter_desc
*niter
)
5280 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5281 struct aff_tree nit
, tmpa
, tmpb
;
5282 enum tree_code comp
;
5285 /* We need to know that the candidate induction variable does not overflow.
5286 While more complex analysis may be used to prove this, for now just
5287 check that the variable appears in the original program and that it
5288 is computed in a type that guarantees no overflows. */
5289 cand_type
= TREE_TYPE (cand
->iv
->base
);
5290 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5293 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5294 the calculation of the BOUND could overflow, making the comparison
5296 if (!data
->loop_single_exit_p
)
5299 /* We need to be able to decide whether candidate is increasing or decreasing
5300 in order to choose the right comparison operator. */
5301 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5303 step
= int_cst_value (cand
->iv
->step
);
5305 /* Check that the number of iterations matches the expected pattern:
5306 a + 1 > b ? 0 : b - a - 1. */
5307 mbz
= niter
->may_be_zero
;
5308 if (TREE_CODE (mbz
) == GT_EXPR
)
5310 /* Handle a + 1 > b. */
5311 tree op0
= TREE_OPERAND (mbz
, 0);
5312 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5314 a
= TREE_OPERAND (op0
, 0);
5315 b
= TREE_OPERAND (mbz
, 1);
5320 else if (TREE_CODE (mbz
) == LT_EXPR
)
5322 tree op1
= TREE_OPERAND (mbz
, 1);
5324 /* Handle b < a + 1. */
5325 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5327 a
= TREE_OPERAND (op1
, 0);
5328 b
= TREE_OPERAND (mbz
, 0);
5336 /* Expected number of iterations is B - A - 1. Check that it matches
5337 the actual number, i.e., that B - A - NITER = 1. */
5338 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5339 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5340 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5341 aff_combination_scale (&nit
, -1);
5342 aff_combination_scale (&tmpa
, -1);
5343 aff_combination_add (&tmpb
, &tmpa
);
5344 aff_combination_add (&tmpb
, &nit
);
5345 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5348 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5352 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5353 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5356 /* Determine the new comparison operator. */
5357 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5358 if (*comp_p
== NE_EXPR
)
5360 else if (*comp_p
== EQ_EXPR
)
5361 *comp_p
= invert_tree_comparison (comp
, false);
5368 /* Check whether it is possible to express the condition in USE by comparison
5369 of candidate CAND. If so, store the value compared with to BOUND, and the
5370 comparison operator to COMP. */
5373 may_eliminate_iv (struct ivopts_data
*data
,
5374 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5375 enum tree_code
*comp
)
5380 struct loop
*loop
= data
->current_loop
;
5382 struct tree_niter_desc
*desc
= NULL
;
5384 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5387 /* For now works only for exits that dominate the loop latch.
5388 TODO: extend to other conditions inside loop body. */
5389 ex_bb
= gimple_bb (use
->stmt
);
5390 if (use
->stmt
!= last_stmt (ex_bb
)
5391 || gimple_code (use
->stmt
) != GIMPLE_COND
5392 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5395 exit
= EDGE_SUCC (ex_bb
, 0);
5396 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5397 exit
= EDGE_SUCC (ex_bb
, 1);
5398 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5401 desc
= niter_for_exit (data
, exit
);
5405 /* Determine whether we can use the variable to test the exit condition.
5406 This is the case iff the period of the induction variable is greater
5407 than the number of iterations for which the exit condition is true. */
5408 period
= iv_period (cand
->iv
);
5410 /* If the number of iterations is constant, compare against it directly. */
5411 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5413 /* See cand_value_at. */
5414 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5416 if (!tree_int_cst_lt (desc
->niter
, period
))
5421 if (tree_int_cst_lt (period
, desc
->niter
))
5426 /* If not, and if this is the only possible exit of the loop, see whether
5427 we can get a conservative estimate on the number of iterations of the
5428 entire loop and compare against that instead. */
5431 widest_int period_value
, max_niter
;
5433 max_niter
= desc
->max
;
5434 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5436 period_value
= wi::to_widest (period
);
5437 if (wi::gtu_p (max_niter
, period_value
))
5439 /* See if we can take advantage of inferred loop bound information. */
5440 if (data
->loop_single_exit_p
)
5442 if (!max_loop_iterations (loop
, &max_niter
))
5444 /* The loop bound is already adjusted by adding 1. */
5445 if (wi::gtu_p (max_niter
, period_value
))
5453 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5455 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5456 aff_combination_to_tree (&bnd
));
5457 *comp
= iv_elimination_compare (data
, use
);
5459 /* It is unlikely that computing the number of iterations using division
5460 would be more profitable than keeping the original induction variable. */
5461 if (expression_expensive_p (*bound
))
5464 /* Sometimes, it is possible to handle the situation that the number of
5465 iterations may be zero unless additional assumtions by using <
5466 instead of != in the exit condition.
5468 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5469 base the exit condition on it. However, that is often too
5471 if (!integer_zerop (desc
->may_be_zero
))
5472 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5477 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5478 be copied, if it is used in the loop body and DATA->body_includes_call. */
5481 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5483 tree sbound
= bound
;
5484 STRIP_NOPS (sbound
);
5486 if (TREE_CODE (sbound
) == SSA_NAME
5487 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5488 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5489 && data
->body_includes_call
)
5490 return COSTS_N_INSNS (1);
5495 /* Determines cost of basing replacement of USE on CAND in a condition. */
5498 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5499 struct iv_use
*use
, struct iv_cand
*cand
)
5501 tree bound
= NULL_TREE
;
5503 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5504 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5506 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5507 tree
*control_var
, *bound_cst
;
5508 enum tree_code comp
= ERROR_MARK
;
5510 /* Only consider real candidates. */
5513 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5518 /* Try iv elimination. */
5519 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5521 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5522 if (elim_cost
.cost
== 0)
5523 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5524 else if (TREE_CODE (bound
) == INTEGER_CST
)
5526 /* If we replace a loop condition 'i < n' with 'p < base + n',
5527 depends_on_elim will have 'base' and 'n' set, which implies
5528 that both 'base' and 'n' will be live during the loop. More likely,
5529 'base + n' will be loop invariant, resulting in only one live value
5530 during the loop. So in that case we clear depends_on_elim and set
5531 elim_inv_expr_id instead. */
5532 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5534 elim_inv_expr_id
= get_expr_id (data
, bound
);
5535 bitmap_clear (depends_on_elim
);
5537 /* The bound is a loop invariant, so it will be only computed
5539 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5542 elim_cost
= infinite_cost
;
5544 /* Try expressing the original giv. If it is compared with an invariant,
5545 note that we cannot get rid of it. */
5546 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5550 /* When the condition is a comparison of the candidate IV against
5551 zero, prefer this IV.
5553 TODO: The constant that we're subtracting from the cost should
5554 be target-dependent. This information should be added to the
5555 target costs for each backend. */
5556 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5557 && integer_zerop (*bound_cst
)
5558 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5559 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5560 elim_cost
.cost
-= 1;
5562 express_cost
= get_computation_cost (data
, use
, cand
, false,
5563 &depends_on_express
, NULL
,
5564 &express_inv_expr_id
);
5565 fd_ivopts_data
= data
;
5566 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5568 /* Count the cost of the original bound as well. */
5569 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5570 if (bound_cost
.cost
== 0)
5571 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5572 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5573 bound_cost
.cost
= 0;
5574 express_cost
.cost
+= bound_cost
.cost
;
5576 /* Choose the better approach, preferring the eliminated IV. */
5577 if (compare_costs (elim_cost
, express_cost
) <= 0)
5580 depends_on
= depends_on_elim
;
5581 depends_on_elim
= NULL
;
5582 inv_expr_id
= elim_inv_expr_id
;
5586 cost
= express_cost
;
5587 depends_on
= depends_on_express
;
5588 depends_on_express
= NULL
;
5591 inv_expr_id
= express_inv_expr_id
;
5594 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5596 if (depends_on_elim
)
5597 BITMAP_FREE (depends_on_elim
);
5598 if (depends_on_express
)
5599 BITMAP_FREE (depends_on_express
);
5601 return !infinite_cost_p (cost
);
5604 /* Determines cost of basing replacement of USE on CAND. Returns false
5605 if USE cannot be based on CAND. */
5608 determine_use_iv_cost (struct ivopts_data
*data
,
5609 struct iv_use
*use
, struct iv_cand
*cand
)
5613 case USE_NONLINEAR_EXPR
:
5614 return determine_use_iv_cost_generic (data
, use
, cand
);
5617 return determine_use_iv_cost_address (data
, use
, cand
);
5620 return determine_use_iv_cost_condition (data
, use
, cand
);
5627 /* Return true if get_computation_cost indicates that autoincrement is
5628 a possibility for the pair of USE and CAND, false otherwise. */
5631 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5632 struct iv_cand
*cand
)
5638 if (use
->type
!= USE_ADDRESS
)
5641 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5642 &can_autoinc
, NULL
);
5644 BITMAP_FREE (depends_on
);
5646 return !infinite_cost_p (cost
) && can_autoinc
;
5649 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5650 use that allows autoincrement, and set their AINC_USE if possible. */
5653 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5657 for (i
= 0; i
< n_iv_cands (data
); i
++)
5659 struct iv_cand
*cand
= iv_cand (data
, i
);
5660 struct iv_use
*closest_before
= NULL
;
5661 struct iv_use
*closest_after
= NULL
;
5662 if (cand
->pos
!= IP_ORIGINAL
)
5665 for (j
= 0; j
< n_iv_uses (data
); j
++)
5667 struct iv_use
*use
= iv_use (data
, j
);
5668 unsigned uid
= gimple_uid (use
->stmt
);
5670 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5673 if (uid
< gimple_uid (cand
->incremented_at
)
5674 && (closest_before
== NULL
5675 || uid
> gimple_uid (closest_before
->stmt
)))
5676 closest_before
= use
;
5678 if (uid
> gimple_uid (cand
->incremented_at
)
5679 && (closest_after
== NULL
5680 || uid
< gimple_uid (closest_after
->stmt
)))
5681 closest_after
= use
;
5684 if (closest_before
!= NULL
5685 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5686 cand
->ainc_use
= closest_before
;
5687 else if (closest_after
!= NULL
5688 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5689 cand
->ainc_use
= closest_after
;
5693 /* Finds the candidates for the induction variables. */
5696 find_iv_candidates (struct ivopts_data
*data
)
5698 /* Add commonly used ivs. */
5699 add_standard_iv_candidates (data
);
5701 /* Add old induction variables. */
5702 add_iv_candidate_for_bivs (data
);
5704 /* Add induction variables derived from uses. */
5705 add_iv_candidate_for_uses (data
);
5707 set_autoinc_for_original_candidates (data
);
5709 /* Record the important candidates. */
5710 record_important_candidates (data
);
5713 /* Determines costs of basing the use of the iv on an iv candidate. */
5716 determine_use_iv_costs (struct ivopts_data
*data
)
5720 struct iv_cand
*cand
;
5721 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5723 alloc_use_cost_map (data
);
5725 for (i
= 0; i
< n_iv_uses (data
); i
++)
5727 use
= iv_use (data
, i
);
5729 if (data
->consider_all_candidates
)
5731 for (j
= 0; j
< n_iv_cands (data
); j
++)
5733 cand
= iv_cand (data
, j
);
5734 determine_use_iv_cost (data
, use
, cand
);
5741 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5743 cand
= iv_cand (data
, j
);
5744 if (!determine_use_iv_cost (data
, use
, cand
))
5745 bitmap_set_bit (to_clear
, j
);
5748 /* Remove the candidates for that the cost is infinite from
5749 the list of related candidates. */
5750 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5751 bitmap_clear (to_clear
);
5755 BITMAP_FREE (to_clear
);
5757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5759 fprintf (dump_file
, "Use-candidate costs:\n");
5761 for (i
= 0; i
< n_iv_uses (data
); i
++)
5763 use
= iv_use (data
, i
);
5765 fprintf (dump_file
, "Use %d:\n", i
);
5766 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5767 for (j
= 0; j
< use
->n_map_members
; j
++)
5769 if (!use
->cost_map
[j
].cand
5770 || infinite_cost_p (use
->cost_map
[j
].cost
))
5773 fprintf (dump_file
, " %d\t%d\t%d\t",
5774 use
->cost_map
[j
].cand
->id
,
5775 use
->cost_map
[j
].cost
.cost
,
5776 use
->cost_map
[j
].cost
.complexity
);
5777 if (use
->cost_map
[j
].depends_on
)
5778 bitmap_print (dump_file
,
5779 use
->cost_map
[j
].depends_on
, "","");
5780 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5781 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5782 fprintf (dump_file
, "\n");
5785 fprintf (dump_file
, "\n");
5787 fprintf (dump_file
, "\n");
5791 /* Determines cost of the candidate CAND. */
5794 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5796 comp_cost cost_base
;
5797 unsigned cost
, cost_step
;
5806 /* There are two costs associated with the candidate -- its increment
5807 and its initialization. The second is almost negligible for any loop
5808 that rolls enough, so we take it just very little into account. */
5810 base
= cand
->iv
->base
;
5811 cost_base
= force_var_cost (data
, base
, NULL
);
5812 /* It will be exceptional that the iv register happens to be initialized with
5813 the proper value at no cost. In general, there will at least be a regcopy
5815 if (cost_base
.cost
== 0)
5816 cost_base
.cost
= COSTS_N_INSNS (1);
5817 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5819 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5821 /* Prefer the original ivs unless we may gain something by replacing it.
5822 The reason is to make debugging simpler; so this is not relevant for
5823 artificial ivs created by other optimization passes. */
5824 if (cand
->pos
!= IP_ORIGINAL
5825 || !SSA_NAME_VAR (cand
->var_before
)
5826 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5829 /* Prefer not to insert statements into latch unless there are some
5830 already (so that we do not create unnecessary jumps). */
5831 if (cand
->pos
== IP_END
5832 && empty_block_p (ip_end_pos (data
->current_loop
)))
5836 cand
->cost_step
= cost_step
;
5839 /* Determines costs of computation of the candidates. */
5842 determine_iv_costs (struct ivopts_data
*data
)
5846 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5848 fprintf (dump_file
, "Candidate costs:\n");
5849 fprintf (dump_file
, " cand\tcost\n");
5852 for (i
= 0; i
< n_iv_cands (data
); i
++)
5854 struct iv_cand
*cand
= iv_cand (data
, i
);
5856 determine_iv_cost (data
, cand
);
5858 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5859 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5862 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5863 fprintf (dump_file
, "\n");
5866 /* Calculates cost for having SIZE induction variables. */
5869 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5871 /* We add size to the cost, so that we prefer eliminating ivs
5873 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5874 data
->body_includes_call
);
5877 /* For each size of the induction variable set determine the penalty. */
5880 determine_set_costs (struct ivopts_data
*data
)
5886 struct loop
*loop
= data
->current_loop
;
5889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5891 fprintf (dump_file
, "Global costs:\n");
5892 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5893 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5894 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5895 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5899 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5902 op
= PHI_RESULT (phi
);
5904 if (virtual_operand_p (op
))
5907 if (get_iv (data
, op
))
5913 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5915 struct version_info
*info
= ver_info (data
, j
);
5917 if (info
->inv_id
&& info
->has_nonlin_use
)
5921 data
->regs_used
= n
;
5922 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5923 fprintf (dump_file
, " regs_used %d\n", n
);
5925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5927 fprintf (dump_file
, " cost for size:\n");
5928 fprintf (dump_file
, " ivs\tcost\n");
5929 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5930 fprintf (dump_file
, " %d\t%d\n", j
,
5931 ivopts_global_cost_for_size (data
, j
));
5932 fprintf (dump_file
, "\n");
5936 /* Returns true if A is a cheaper cost pair than B. */
5939 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5949 cmp
= compare_costs (a
->cost
, b
->cost
);
5956 /* In case the costs are the same, prefer the cheaper candidate. */
5957 if (a
->cand
->cost
< b
->cand
->cost
)
5964 /* Returns candidate by that USE is expressed in IVS. */
5966 static struct cost_pair
*
5967 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5969 return ivs
->cand_for_use
[use
->id
];
5972 /* Computes the cost field of IVS structure. */
5975 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5977 comp_cost cost
= ivs
->cand_use_cost
;
5979 cost
.cost
+= ivs
->cand_cost
;
5981 cost
.cost
+= ivopts_global_cost_for_size (data
,
5982 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5987 /* Remove invariants in set INVS to set IVS. */
5990 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5998 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6000 ivs
->n_invariant_uses
[iid
]--;
6001 if (ivs
->n_invariant_uses
[iid
] == 0)
6006 /* Set USE not to be expressed by any candidate in IVS. */
6009 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6012 unsigned uid
= use
->id
, cid
;
6013 struct cost_pair
*cp
;
6015 cp
= ivs
->cand_for_use
[uid
];
6021 ivs
->cand_for_use
[uid
] = NULL
;
6022 ivs
->n_cand_uses
[cid
]--;
6024 if (ivs
->n_cand_uses
[cid
] == 0)
6026 bitmap_clear_bit (ivs
->cands
, cid
);
6027 /* Do not count the pseudocandidates. */
6031 ivs
->cand_cost
-= cp
->cand
->cost
;
6033 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
6036 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
6038 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
6040 if (cp
->inv_expr_id
!= -1)
6042 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
6043 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
6044 ivs
->num_used_inv_expr
--;
6046 iv_ca_recount_cost (data
, ivs
);
6049 /* Add invariants in set INVS to set IVS. */
6052 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
6060 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6062 ivs
->n_invariant_uses
[iid
]++;
6063 if (ivs
->n_invariant_uses
[iid
] == 1)
6068 /* Set cost pair for USE in set IVS to CP. */
6071 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6072 struct iv_use
*use
, struct cost_pair
*cp
)
6074 unsigned uid
= use
->id
, cid
;
6076 if (ivs
->cand_for_use
[uid
] == cp
)
6079 if (ivs
->cand_for_use
[uid
])
6080 iv_ca_set_no_cp (data
, ivs
, use
);
6087 ivs
->cand_for_use
[uid
] = cp
;
6088 ivs
->n_cand_uses
[cid
]++;
6089 if (ivs
->n_cand_uses
[cid
] == 1)
6091 bitmap_set_bit (ivs
->cands
, cid
);
6092 /* Do not count the pseudocandidates. */
6096 ivs
->cand_cost
+= cp
->cand
->cost
;
6098 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
6101 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
6102 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
6104 if (cp
->inv_expr_id
!= -1)
6106 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
6107 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
6108 ivs
->num_used_inv_expr
++;
6110 iv_ca_recount_cost (data
, ivs
);
6114 /* Extend set IVS by expressing USE by some of the candidates in it
6115 if possible. Consider all important candidates if candidates in
6116 set IVS don't give any result. */
6119 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6122 struct cost_pair
*best_cp
= NULL
, *cp
;
6125 struct iv_cand
*cand
;
6127 gcc_assert (ivs
->upto
>= use
->id
);
6131 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6133 cand
= iv_cand (data
, i
);
6134 cp
= get_use_iv_cost (data
, use
, cand
);
6135 if (cheaper_cost_pair (cp
, best_cp
))
6139 if (best_cp
== NULL
)
6141 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6143 cand
= iv_cand (data
, i
);
6144 cp
= get_use_iv_cost (data
, use
, cand
);
6145 if (cheaper_cost_pair (cp
, best_cp
))
6150 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
6153 /* Get cost for assignment IVS. */
6156 iv_ca_cost (struct iv_ca
*ivs
)
6158 /* This was a conditional expression but it triggered a bug in
6161 return infinite_cost
;
6166 /* Returns true if all dependences of CP are among invariants in IVS. */
6169 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6174 if (!cp
->depends_on
)
6177 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6179 if (ivs
->n_invariant_uses
[i
] == 0)
6186 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6187 it before NEXT_CHANGE. */
6189 static struct iv_ca_delta
*
6190 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
6191 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
6193 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6196 change
->old_cp
= old_cp
;
6197 change
->new_cp
= new_cp
;
6198 change
->next_change
= next_change
;
6203 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6206 static struct iv_ca_delta
*
6207 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6209 struct iv_ca_delta
*last
;
6217 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
6219 last
->next_change
= l2
;
6224 /* Reverse the list of changes DELTA, forming the inverse to it. */
6226 static struct iv_ca_delta
*
6227 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6229 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6231 for (act
= delta
; act
; act
= next
)
6233 next
= act
->next_change
;
6234 act
->next_change
= prev
;
6237 std::swap (act
->old_cp
, act
->new_cp
);
6243 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6244 reverted instead. */
6247 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6248 struct iv_ca_delta
*delta
, bool forward
)
6250 struct cost_pair
*from
, *to
;
6251 struct iv_ca_delta
*act
;
6254 delta
= iv_ca_delta_reverse (delta
);
6256 for (act
= delta
; act
; act
= act
->next_change
)
6260 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
6261 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
6265 iv_ca_delta_reverse (delta
);
6268 /* Returns true if CAND is used in IVS. */
6271 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6273 return ivs
->n_cand_uses
[cand
->id
] > 0;
6276 /* Returns number of induction variable candidates in the set IVS. */
6279 iv_ca_n_cands (struct iv_ca
*ivs
)
6281 return ivs
->n_cands
;
6284 /* Free the list of changes DELTA. */
6287 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6289 struct iv_ca_delta
*act
, *next
;
6291 for (act
= *delta
; act
; act
= next
)
6293 next
= act
->next_change
;
6300 /* Allocates new iv candidates assignment. */
6302 static struct iv_ca
*
6303 iv_ca_new (struct ivopts_data
*data
)
6305 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6309 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
6310 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
6311 nw
->cands
= BITMAP_ALLOC (NULL
);
6314 nw
->cand_use_cost
= no_cost
;
6316 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6318 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
6319 nw
->num_used_inv_expr
= 0;
6324 /* Free memory occupied by the set IVS. */
6327 iv_ca_free (struct iv_ca
**ivs
)
6329 free ((*ivs
)->cand_for_use
);
6330 free ((*ivs
)->n_cand_uses
);
6331 BITMAP_FREE ((*ivs
)->cands
);
6332 free ((*ivs
)->n_invariant_uses
);
6333 free ((*ivs
)->used_inv_expr
);
6338 /* Dumps IVS to FILE. */
6341 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6343 const char *pref
= " invariants ";
6345 comp_cost cost
= iv_ca_cost (ivs
);
6347 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6348 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6349 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6350 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6352 for (i
= 0; i
< ivs
->upto
; i
++)
6354 struct iv_use
*use
= iv_use (data
, i
);
6355 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6357 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6358 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6360 fprintf (file
, " use:%d --> ??\n", use
->id
);
6363 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6364 if (ivs
->n_invariant_uses
[i
])
6366 fprintf (file
, "%s%d", pref
, i
);
6369 fprintf (file
, "\n\n");
6372 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6373 new set, and store differences in DELTA. Number of induction variables
6374 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6375 the function will try to find a solution with mimimal iv candidates. */
6378 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6379 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6380 unsigned *n_ivs
, bool min_ncand
)
6385 struct cost_pair
*old_cp
, *new_cp
;
6388 for (i
= 0; i
< ivs
->upto
; i
++)
6390 use
= iv_use (data
, i
);
6391 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6394 && old_cp
->cand
== cand
)
6397 new_cp
= get_use_iv_cost (data
, use
, cand
);
6401 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6404 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6407 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6410 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6411 cost
= iv_ca_cost (ivs
);
6413 *n_ivs
= iv_ca_n_cands (ivs
);
6414 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6419 /* Try narrowing set IVS by removing CAND. Return the cost of
6420 the new set and store the differences in DELTA. START is
6421 the candidate with which we start narrowing. */
6424 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6425 struct iv_cand
*cand
, struct iv_cand
*start
,
6426 struct iv_ca_delta
**delta
)
6430 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6432 struct iv_cand
*cnd
;
6433 comp_cost cost
, best_cost
, acost
;
6436 for (i
= 0; i
< n_iv_uses (data
); i
++)
6438 use
= iv_use (data
, i
);
6440 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6441 if (old_cp
->cand
!= cand
)
6444 best_cost
= iv_ca_cost (ivs
);
6445 /* Start narrowing with START. */
6446 new_cp
= get_use_iv_cost (data
, use
, start
);
6448 if (data
->consider_all_candidates
)
6450 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6452 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6455 cnd
= iv_cand (data
, ci
);
6457 cp
= get_use_iv_cost (data
, use
, cnd
);
6461 iv_ca_set_cp (data
, ivs
, use
, cp
);
6462 acost
= iv_ca_cost (ivs
);
6464 if (compare_costs (acost
, best_cost
) < 0)
6473 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6475 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6478 cnd
= iv_cand (data
, ci
);
6480 cp
= get_use_iv_cost (data
, use
, cnd
);
6484 iv_ca_set_cp (data
, ivs
, use
, cp
);
6485 acost
= iv_ca_cost (ivs
);
6487 if (compare_costs (acost
, best_cost
) < 0)
6494 /* Restore to old cp for use. */
6495 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6499 iv_ca_delta_free (delta
);
6500 return infinite_cost
;
6503 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6506 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6507 cost
= iv_ca_cost (ivs
);
6508 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6513 /* Try optimizing the set of candidates IVS by removing candidates different
6514 from to EXCEPT_CAND from it. Return cost of the new set, and store
6515 differences in DELTA. */
6518 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6519 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6522 struct iv_ca_delta
*act_delta
, *best_delta
;
6524 comp_cost best_cost
, acost
;
6525 struct iv_cand
*cand
;
6528 best_cost
= iv_ca_cost (ivs
);
6530 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6532 cand
= iv_cand (data
, i
);
6534 if (cand
== except_cand
)
6537 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6539 if (compare_costs (acost
, best_cost
) < 0)
6542 iv_ca_delta_free (&best_delta
);
6543 best_delta
= act_delta
;
6546 iv_ca_delta_free (&act_delta
);
6555 /* Recurse to possibly remove other unnecessary ivs. */
6556 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6557 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6558 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6559 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6563 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6564 cheaper local cost for USE than BEST_CP. Return pointer to
6565 the corresponding cost_pair, otherwise just return BEST_CP. */
6567 static struct cost_pair
*
6568 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6569 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6570 struct cost_pair
*best_cp
)
6572 struct iv_cand
*cand
;
6573 struct cost_pair
*cp
;
6575 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6576 if (cand_idx
== old_cand
->id
)
6579 cand
= iv_cand (data
, cand_idx
);
6580 cp
= get_use_iv_cost (data
, use
, cand
);
6581 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6587 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6588 which are used by more than one iv uses. For each of those candidates,
6589 this function tries to represent iv uses under that candidate using
6590 other ones with lower local cost, then tries to prune the new set.
6591 If the new set has lower cost, It returns the new cost after recording
6592 candidate replacement in list DELTA. */
6595 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6596 struct iv_ca_delta
**delta
)
6598 bitmap_iterator bi
, bj
;
6599 unsigned int i
, j
, k
;
6601 struct iv_cand
*cand
;
6602 comp_cost orig_cost
, acost
;
6603 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6604 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6607 orig_cost
= iv_ca_cost (ivs
);
6609 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6611 if (ivs
->n_cand_uses
[i
] == 1
6612 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6615 cand
= iv_cand (data
, i
);
6618 /* Represent uses under current candidate using other ones with
6619 lower local cost. */
6620 for (j
= 0; j
< ivs
->upto
; j
++)
6622 use
= iv_use (data
, j
);
6623 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6625 if (old_cp
->cand
!= cand
)
6629 if (data
->consider_all_candidates
)
6630 for (k
= 0; k
< n_iv_cands (data
); k
++)
6631 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6632 old_cp
->cand
, best_cp
);
6634 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6635 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6636 old_cp
->cand
, best_cp
);
6638 if (best_cp
== old_cp
)
6641 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6643 /* No need for further prune. */
6647 /* Prune the new candidate set. */
6648 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6649 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6650 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6651 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6653 if (compare_costs (acost
, orig_cost
) < 0)
6659 iv_ca_delta_free (&act_delta
);
6665 /* Tries to extend the sets IVS in the best possible way in order
6666 to express the USE. If ORIGINALP is true, prefer candidates from
6667 the original set of IVs, otherwise favor important candidates not
6668 based on any memory object. */
6671 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6672 struct iv_use
*use
, bool originalp
)
6674 comp_cost best_cost
, act_cost
;
6677 struct iv_cand
*cand
;
6678 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6679 struct cost_pair
*cp
;
6681 iv_ca_add_use (data
, ivs
, use
);
6682 best_cost
= iv_ca_cost (ivs
);
6683 cp
= iv_ca_cand_for_use (ivs
, use
);
6686 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6687 iv_ca_set_no_cp (data
, ivs
, use
);
6690 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6691 first try important candidates not based on any memory object. Only if
6692 this fails, try the specific ones. Rationale -- in loops with many
6693 variables the best choice often is to use just one generic biv. If we
6694 added here many ivs specific to the uses, the optimization algorithm later
6695 would be likely to get stuck in a local minimum, thus causing us to create
6696 too many ivs. The approach from few ivs to more seems more likely to be
6697 successful -- starting from few ivs, replacing an expensive use by a
6698 specific iv should always be a win. */
6699 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, i
, bi
)
6701 cand
= iv_cand (data
, i
);
6703 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6706 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6709 if (iv_ca_cand_used_p (ivs
, cand
))
6712 cp
= get_use_iv_cost (data
, use
, cand
);
6716 iv_ca_set_cp (data
, ivs
, use
, cp
);
6717 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6719 iv_ca_set_no_cp (data
, ivs
, use
);
6720 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6722 if (compare_costs (act_cost
, best_cost
) < 0)
6724 best_cost
= act_cost
;
6726 iv_ca_delta_free (&best_delta
);
6727 best_delta
= act_delta
;
6730 iv_ca_delta_free (&act_delta
);
6733 if (infinite_cost_p (best_cost
))
6735 for (i
= 0; i
< use
->n_map_members
; i
++)
6737 cp
= use
->cost_map
+ i
;
6742 /* Already tried this. */
6743 if (cand
->important
)
6745 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6747 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6751 if (iv_ca_cand_used_p (ivs
, cand
))
6755 iv_ca_set_cp (data
, ivs
, use
, cp
);
6756 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6757 iv_ca_set_no_cp (data
, ivs
, use
);
6758 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6761 if (compare_costs (act_cost
, best_cost
) < 0)
6763 best_cost
= act_cost
;
6766 iv_ca_delta_free (&best_delta
);
6767 best_delta
= act_delta
;
6770 iv_ca_delta_free (&act_delta
);
6774 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6775 iv_ca_delta_free (&best_delta
);
6777 return !infinite_cost_p (best_cost
);
6780 /* Finds an initial assignment of candidates to uses. */
6782 static struct iv_ca
*
6783 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6785 struct iv_ca
*ivs
= iv_ca_new (data
);
6788 for (i
= 0; i
< n_iv_uses (data
); i
++)
6789 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6798 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6799 points to a bool variable, this function tries to break local
6800 optimal fixed-point by replacing candidates in IVS if it's true. */
6803 try_improve_iv_set (struct ivopts_data
*data
,
6804 struct iv_ca
*ivs
, bool *try_replace_p
)
6807 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6808 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6809 struct iv_cand
*cand
;
6811 /* Try extending the set of induction variables by one. */
6812 for (i
= 0; i
< n_iv_cands (data
); i
++)
6814 cand
= iv_cand (data
, i
);
6816 if (iv_ca_cand_used_p (ivs
, cand
))
6819 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6823 /* If we successfully added the candidate and the set is small enough,
6824 try optimizing it by removing other candidates. */
6825 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6827 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6828 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6829 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6830 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6833 if (compare_costs (acost
, best_cost
) < 0)
6836 iv_ca_delta_free (&best_delta
);
6837 best_delta
= act_delta
;
6840 iv_ca_delta_free (&act_delta
);
6845 /* Try removing the candidates from the set instead. */
6846 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6848 if (!best_delta
&& *try_replace_p
)
6850 *try_replace_p
= false;
6851 /* So far candidate selecting algorithm tends to choose fewer IVs
6852 so that it can handle cases in which loops have many variables
6853 but the best choice is often to use only one general biv. One
6854 weakness is it can't handle opposite cases, in which different
6855 candidates should be chosen with respect to each use. To solve
6856 the problem, we replace candidates in a manner described by the
6857 comments of iv_ca_replace, thus give general algorithm a chance
6858 to break local optimal fixed-point in these cases. */
6859 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6866 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6867 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6868 iv_ca_delta_free (&best_delta
);
6872 /* Attempts to find the optimal set of induction variables. We do simple
6873 greedy heuristic -- we try to replace at most one candidate in the selected
6874 solution and remove the unused ivs while this improves the cost. */
6876 static struct iv_ca
*
6877 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6880 bool try_replace_p
= true;
6882 /* Get the initial solution. */
6883 set
= get_initial_solution (data
, originalp
);
6886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6887 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6893 fprintf (dump_file
, "Initial set of candidates:\n");
6894 iv_ca_dump (data
, dump_file
, set
);
6897 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6901 fprintf (dump_file
, "Improved to:\n");
6902 iv_ca_dump (data
, dump_file
, set
);
6909 static struct iv_ca
*
6910 find_optimal_iv_set (struct ivopts_data
*data
)
6913 struct iv_ca
*set
, *origset
;
6915 comp_cost cost
, origcost
;
6917 /* Determine the cost based on a strategy that starts with original IVs,
6918 and try again using a strategy that prefers candidates not based
6920 origset
= find_optimal_iv_set_1 (data
, true);
6921 set
= find_optimal_iv_set_1 (data
, false);
6923 if (!origset
&& !set
)
6926 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6927 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6929 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6931 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6932 origcost
.cost
, origcost
.complexity
);
6933 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6934 cost
.cost
, cost
.complexity
);
6937 /* Choose the one with the best cost. */
6938 if (compare_costs (origcost
, cost
) <= 0)
6945 iv_ca_free (&origset
);
6947 for (i
= 0; i
< n_iv_uses (data
); i
++)
6949 use
= iv_use (data
, i
);
6950 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6956 /* Creates a new induction variable corresponding to CAND. */
6959 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6961 gimple_stmt_iterator incr_pos
;
6971 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6975 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6983 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6987 /* Mark that the iv is preserved. */
6988 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6989 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6991 /* Rewrite the increment so that it uses var_before directly. */
6992 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6996 gimple_add_tmp_var (cand
->var_before
);
6998 base
= unshare_expr (cand
->iv
->base
);
7000 create_iv (base
, unshare_expr (cand
->iv
->step
),
7001 cand
->var_before
, data
->current_loop
,
7002 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
7005 /* Creates new induction variables described in SET. */
7008 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
7011 struct iv_cand
*cand
;
7014 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7016 cand
= iv_cand (data
, i
);
7017 create_new_iv (data
, cand
);
7020 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7022 fprintf (dump_file
, "Selected IV set for loop %d",
7023 data
->current_loop
->num
);
7024 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7025 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7026 LOCATION_LINE (data
->loop_loc
));
7027 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
7028 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7030 cand
= iv_cand (data
, i
);
7031 dump_cand (dump_file
, cand
);
7033 fprintf (dump_file
, "\n");
7037 /* Rewrites USE (definition of iv used in a nonlinear expression)
7038 using candidate CAND. */
7041 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
7042 struct iv_use
*use
, struct iv_cand
*cand
)
7047 gimple_stmt_iterator bsi
;
7049 /* An important special case -- if we are asked to express value of
7050 the original iv by itself, just exit; there is no need to
7051 introduce a new computation (that might also need casting the
7052 variable to unsigned and back). */
7053 if (cand
->pos
== IP_ORIGINAL
7054 && cand
->incremented_at
== use
->stmt
)
7056 enum tree_code stmt_code
;
7058 gcc_assert (is_gimple_assign (use
->stmt
));
7059 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
7061 /* Check whether we may leave the computation unchanged.
7062 This is the case only if it does not rely on other
7063 computations in the loop -- otherwise, the computation
7064 we rely upon may be removed in remove_unused_ivs,
7065 thus leading to ICE. */
7066 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
7067 if (stmt_code
== PLUS_EXPR
7068 || stmt_code
== MINUS_EXPR
7069 || stmt_code
== POINTER_PLUS_EXPR
)
7071 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
7072 op
= gimple_assign_rhs2 (use
->stmt
);
7073 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
7074 op
= gimple_assign_rhs1 (use
->stmt
);
7081 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
7085 comp
= get_computation (data
->current_loop
, use
, cand
);
7086 gcc_assert (comp
!= NULL_TREE
);
7088 switch (gimple_code (use
->stmt
))
7091 tgt
= PHI_RESULT (use
->stmt
);
7093 /* If we should keep the biv, do not replace it. */
7094 if (name_info (data
, tgt
)->preserve_biv
)
7097 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
7101 tgt
= gimple_assign_lhs (use
->stmt
);
7102 bsi
= gsi_for_stmt (use
->stmt
);
7109 if (!valid_gimple_rhs_p (comp
)
7110 || (gimple_code (use
->stmt
) != GIMPLE_PHI
7111 /* We can't allow re-allocating the stmt as it might be pointed
7113 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
7114 >= gimple_num_ops (gsi_stmt (bsi
)))))
7116 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
7117 true, GSI_SAME_STMT
);
7118 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
7120 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
7121 /* As this isn't a plain copy we have to reset alignment
7123 if (SSA_NAME_PTR_INFO (comp
))
7124 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
7128 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
7130 ass
= gimple_build_assign (tgt
, comp
);
7131 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
7133 bsi
= gsi_for_stmt (use
->stmt
);
7134 remove_phi_node (&bsi
, false);
7138 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
7139 use
->stmt
= gsi_stmt (bsi
);
7143 /* Performs a peephole optimization to reorder the iv update statement with
7144 a mem ref to enable instruction combining in later phases. The mem ref uses
7145 the iv value before the update, so the reordering transformation requires
7146 adjustment of the offset. CAND is the selected IV_CAND.
7150 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7158 directly propagating t over to (1) will introduce overlapping live range
7159 thus increase register pressure. This peephole transform it into:
7163 t = MEM_REF (base, iv2, 8, 8);
7170 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7173 gimple
*iv_update
, *stmt
;
7175 gimple_stmt_iterator gsi
, gsi_iv
;
7177 if (cand
->pos
!= IP_NORMAL
)
7180 var_after
= cand
->var_after
;
7181 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7183 bb
= gimple_bb (iv_update
);
7184 gsi
= gsi_last_nondebug_bb (bb
);
7185 stmt
= gsi_stmt (gsi
);
7187 /* Only handle conditional statement for now. */
7188 if (gimple_code (stmt
) != GIMPLE_COND
)
7191 gsi_prev_nondebug (&gsi
);
7192 stmt
= gsi_stmt (gsi
);
7193 if (stmt
!= iv_update
)
7196 gsi_prev_nondebug (&gsi
);
7197 if (gsi_end_p (gsi
))
7200 stmt
= gsi_stmt (gsi
);
7201 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7204 if (stmt
!= use
->stmt
)
7207 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7210 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7212 fprintf (dump_file
, "Reordering \n");
7213 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7214 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7215 fprintf (dump_file
, "\n");
7218 gsi
= gsi_for_stmt (use
->stmt
);
7219 gsi_iv
= gsi_for_stmt (iv_update
);
7220 gsi_move_before (&gsi_iv
, &gsi
);
7222 cand
->pos
= IP_BEFORE_USE
;
7223 cand
->incremented_at
= use
->stmt
;
7226 /* Rewrites USE (address that is an iv) using candidate CAND. */
7229 rewrite_use_address_1 (struct ivopts_data
*data
,
7230 struct iv_use
*use
, struct iv_cand
*cand
)
7233 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7234 tree base_hint
= NULL_TREE
;
7238 adjust_iv_update_pos (cand
, use
);
7239 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7241 unshare_aff_combination (&aff
);
7243 /* To avoid undefined overflow problems, all IV candidates use unsigned
7244 integer types. The drawback is that this makes it impossible for
7245 create_mem_ref to distinguish an IV that is based on a memory object
7246 from one that represents simply an offset.
7248 To work around this problem, we pass a hint to create_mem_ref that
7249 indicates which variable (if any) in aff is an IV based on a memory
7250 object. Note that we only consider the candidate. If this is not
7251 based on an object, the base of the reference is in some subexpression
7252 of the use -- but these will use pointer types, so they are recognized
7253 by the create_mem_ref heuristics anyway. */
7254 if (cand
->iv
->base_object
)
7255 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7257 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7258 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7259 reference_alias_ptr_type (*use
->op_p
),
7260 iv
, base_hint
, data
->speed
);
7261 copy_ref_info (ref
, *use
->op_p
);
7265 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7266 first use of a group, rewrites sub uses in the group too. */
7269 rewrite_use_address (struct ivopts_data
*data
,
7270 struct iv_use
*use
, struct iv_cand
*cand
)
7272 struct iv_use
*next
;
7274 gcc_assert (use
->sub_id
== 0);
7275 rewrite_use_address_1 (data
, use
, cand
);
7276 update_stmt (use
->stmt
);
7278 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
7280 rewrite_use_address_1 (data
, next
, cand
);
7281 update_stmt (next
->stmt
);
7287 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7291 rewrite_use_compare (struct ivopts_data
*data
,
7292 struct iv_use
*use
, struct iv_cand
*cand
)
7294 tree comp
, *var_p
, op
, bound
;
7295 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7296 enum tree_code compare
;
7297 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
7303 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7304 tree var_type
= TREE_TYPE (var
);
7307 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7309 fprintf (dump_file
, "Replacing exit test: ");
7310 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7313 bound
= unshare_expr (fold_convert (var_type
, bound
));
7314 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7316 gsi_insert_seq_on_edge_immediate (
7317 loop_preheader_edge (data
->current_loop
),
7320 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7321 gimple_cond_set_lhs (cond_stmt
, var
);
7322 gimple_cond_set_code (cond_stmt
, compare
);
7323 gimple_cond_set_rhs (cond_stmt
, op
);
7327 /* The induction variable elimination failed; just express the original
7329 comp
= get_computation (data
->current_loop
, use
, cand
);
7330 gcc_assert (comp
!= NULL_TREE
);
7332 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7335 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7336 true, GSI_SAME_STMT
);
7339 /* Rewrites USE using candidate CAND. */
7342 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7346 case USE_NONLINEAR_EXPR
:
7347 rewrite_use_nonlinear_expr (data
, use
, cand
);
7351 rewrite_use_address (data
, use
, cand
);
7355 rewrite_use_compare (data
, use
, cand
);
7362 update_stmt (use
->stmt
);
7365 /* Rewrite the uses using the selected induction variables. */
7368 rewrite_uses (struct ivopts_data
*data
)
7371 struct iv_cand
*cand
;
7374 for (i
= 0; i
< n_iv_uses (data
); i
++)
7376 use
= iv_use (data
, i
);
7377 cand
= use
->selected
;
7380 rewrite_use (data
, use
, cand
);
7384 /* Removes the ivs that are not used after rewriting. */
7387 remove_unused_ivs (struct ivopts_data
*data
)
7391 bitmap toremove
= BITMAP_ALLOC (NULL
);
7393 /* Figure out an order in which to release SSA DEFs so that we don't
7394 release something that we'd have to propagate into a debug stmt
7396 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7398 struct version_info
*info
;
7400 info
= ver_info (data
, j
);
7402 && !integer_zerop (info
->iv
->step
)
7404 && !info
->iv
->have_use_for
7405 && !info
->preserve_biv
)
7407 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7409 tree def
= info
->iv
->ssa_name
;
7411 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7413 imm_use_iterator imm_iter
;
7414 use_operand_p use_p
;
7418 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7420 if (!gimple_debug_bind_p (stmt
))
7423 /* We just want to determine whether to do nothing
7424 (count == 0), to substitute the computed
7425 expression into a single use of the SSA DEF by
7426 itself (count == 1), or to use a debug temp
7427 because the SSA DEF is used multiple times or as
7428 part of a larger expression (count > 1). */
7430 if (gimple_debug_bind_get_value (stmt
) != def
)
7434 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7440 struct iv_use dummy_use
;
7441 struct iv_cand
*best_cand
= NULL
, *cand
;
7442 unsigned i
, best_pref
= 0, cand_pref
;
7444 memset (&dummy_use
, 0, sizeof (dummy_use
));
7445 dummy_use
.iv
= info
->iv
;
7446 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7448 cand
= iv_use (data
, i
)->selected
;
7449 if (cand
== best_cand
)
7451 cand_pref
= operand_equal_p (cand
->iv
->step
,
7455 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7456 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7459 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7461 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7464 best_pref
= cand_pref
;
7471 tree comp
= get_computation_at (data
->current_loop
,
7472 &dummy_use
, best_cand
,
7473 SSA_NAME_DEF_STMT (def
));
7479 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7480 DECL_ARTIFICIAL (vexpr
) = 1;
7481 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7482 if (SSA_NAME_VAR (def
))
7483 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7485 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7487 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7488 gimple_stmt_iterator gsi
;
7490 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7491 gsi
= gsi_after_labels (gimple_bb
7492 (SSA_NAME_DEF_STMT (def
)));
7494 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7496 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7500 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7502 if (!gimple_debug_bind_p (stmt
))
7505 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7506 SET_USE (use_p
, comp
);
7514 release_defs_bitset (toremove
);
7516 BITMAP_FREE (toremove
);
7519 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7520 for hash_map::traverse. */
7523 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7529 /* Frees data allocated by the optimization of a single loop. */
7532 free_loop_data (struct ivopts_data
*data
)
7540 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7541 delete data
->niters
;
7542 data
->niters
= NULL
;
7545 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7547 struct version_info
*info
;
7549 info
= ver_info (data
, i
);
7551 info
->has_nonlin_use
= false;
7552 info
->preserve_biv
= false;
7555 bitmap_clear (data
->relevant
);
7556 bitmap_clear (data
->important_candidates
);
7558 for (i
= 0; i
< n_iv_uses (data
); i
++)
7560 struct iv_use
*use
= iv_use (data
, i
);
7561 struct iv_use
*pre
= use
, *sub
= use
->next
;
7565 gcc_assert (sub
->related_cands
== NULL
);
7566 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7573 BITMAP_FREE (use
->related_cands
);
7574 for (j
= 0; j
< use
->n_map_members
; j
++)
7575 if (use
->cost_map
[j
].depends_on
)
7576 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7577 free (use
->cost_map
);
7580 data
->iv_uses
.truncate (0);
7582 for (i
= 0; i
< n_iv_cands (data
); i
++)
7584 struct iv_cand
*cand
= iv_cand (data
, i
);
7586 if (cand
->depends_on
)
7587 BITMAP_FREE (cand
->depends_on
);
7590 data
->iv_candidates
.truncate (0);
7592 if (data
->version_info_size
< num_ssa_names
)
7594 data
->version_info_size
= 2 * num_ssa_names
;
7595 free (data
->version_info
);
7596 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7599 data
->max_inv_id
= 0;
7601 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7602 SET_DECL_RTL (obj
, NULL_RTX
);
7604 decl_rtl_to_reset
.truncate (0);
7606 data
->inv_expr_tab
->empty ();
7607 data
->inv_expr_id
= 0;
7609 data
->iv_common_cand_tab
->empty ();
7610 data
->iv_common_cands
.truncate (0);
7613 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7617 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7619 free_loop_data (data
);
7620 free (data
->version_info
);
7621 BITMAP_FREE (data
->relevant
);
7622 BITMAP_FREE (data
->important_candidates
);
7624 decl_rtl_to_reset
.release ();
7625 data
->iv_uses
.release ();
7626 data
->iv_candidates
.release ();
7627 delete data
->inv_expr_tab
;
7628 data
->inv_expr_tab
= NULL
;
7629 free_affine_expand_cache (&data
->name_expansion_cache
);
7630 delete data
->iv_common_cand_tab
;
7631 data
->iv_common_cand_tab
= NULL
;
7632 data
->iv_common_cands
.release ();
7633 obstack_free (&data
->iv_obstack
, NULL
);
7636 /* Returns true if the loop body BODY includes any function calls. */
7639 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7641 gimple_stmt_iterator gsi
;
7644 for (i
= 0; i
< num_nodes
; i
++)
7645 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7647 gimple
*stmt
= gsi_stmt (gsi
);
7648 if (is_gimple_call (stmt
)
7649 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7655 /* Optimizes the LOOP. Returns true if anything changed. */
7658 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7660 bool changed
= false;
7661 struct iv_ca
*iv_ca
;
7662 edge exit
= single_dom_exit (loop
);
7665 gcc_assert (!data
->niters
);
7666 data
->current_loop
= loop
;
7667 data
->loop_loc
= find_loop_location (loop
);
7668 data
->speed
= optimize_loop_for_speed_p (loop
);
7670 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7672 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7673 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7674 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7675 LOCATION_LINE (data
->loop_loc
));
7676 fprintf (dump_file
, "\n");
7680 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7681 exit
->src
->index
, exit
->dest
->index
);
7682 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7683 fprintf (dump_file
, "\n");
7686 fprintf (dump_file
, "\n");
7689 body
= get_loop_body (loop
);
7690 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7691 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7694 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7696 /* For each ssa name determines whether it behaves as an induction variable
7698 if (!find_induction_variables (data
))
7701 /* Finds interesting uses (item 1). */
7702 find_interesting_uses (data
);
7703 group_address_uses (data
);
7704 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7707 /* Finds candidates for the induction variables (item 2). */
7708 find_iv_candidates (data
);
7710 /* Calculates the costs (item 3, part 1). */
7711 determine_iv_costs (data
);
7712 determine_use_iv_costs (data
);
7713 determine_set_costs (data
);
7715 /* Find the optimal set of induction variables (item 3, part 2). */
7716 iv_ca
= find_optimal_iv_set (data
);
7721 /* Create the new induction variables (item 4, part 1). */
7722 create_new_ivs (data
, iv_ca
);
7723 iv_ca_free (&iv_ca
);
7725 /* Rewrite the uses (item 4, part 2). */
7726 rewrite_uses (data
);
7728 /* Remove the ivs that are unused after rewriting. */
7729 remove_unused_ivs (data
);
7731 /* We have changed the structure of induction variables; it might happen
7732 that definitions in the scev database refer to some of them that were
7737 free_loop_data (data
);
7742 /* Main entry point. Optimizes induction variables in loops. */
7745 tree_ssa_iv_optimize (void)
7748 struct ivopts_data data
;
7750 tree_ssa_iv_optimize_init (&data
);
7752 /* Optimize the loops starting with the innermost ones. */
7753 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7756 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7758 tree_ssa_iv_optimize_loop (&data
, loop
);
7761 tree_ssa_iv_optimize_finalize (&data
);