1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
76 #include "hard-reg-set.h"
79 #include "dominance.h"
81 #include "basic-block.h"
82 #include "gimple-pretty-print.h"
84 #include "hash-table.h"
85 #include "tree-ssa-alias.h"
86 #include "internal-fn.h"
88 #include "gimple-expr.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "gimple-ssa.h"
95 #include "plugin-api.h"
99 #include "tree-phinodes.h"
100 #include "ssa-iterators.h"
101 #include "stringpool.h"
102 #include "tree-ssanames.h"
103 #include "tree-ssa-loop-ivopts.h"
104 #include "tree-ssa-loop-manip.h"
105 #include "tree-ssa-loop-niter.h"
106 #include "tree-ssa-loop.h"
108 #include "tree-dfa.h"
109 #include "tree-ssa.h"
111 #include "tree-pass.h"
112 #include "insn-config.h"
113 #include "tree-chrec.h"
114 #include "tree-scalar-evolution.h"
117 #include "langhooks.h"
118 #include "tree-affine.h"
120 #include "tree-inline.h"
121 #include "tree-ssa-propagate.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
127 cost of different addressing modes. This should be moved to a TBD
128 interface between the GIMPLE and RTL worlds. */
132 /* The infinite cost. */
133 #define INFTY 10000000
135 #define AVG_LOOP_NITER(LOOP) 5
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop
*loop
)
144 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
146 return AVG_LOOP_NITER (loop
);
151 /* Representation of the induction variable. */
154 tree base
; /* Initial value of the iv. */
155 tree base_object
; /* A memory object to that the induction variable points. */
156 tree step
; /* Step of the iv (constant only). */
157 tree ssa_name
; /* The ssa name with the value. */
158 bool biv_p
; /* Is it a biv? */
159 bool have_use_for
; /* Do we already have a use for it? */
160 unsigned use_id
; /* The identifier in the use if it is the case. */
163 /* Per-ssa version information (induction variable descriptions, etc.). */
166 tree name
; /* The ssa name. */
167 struct iv
*iv
; /* Induction variable description. */
168 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
169 an expression that is not an induction variable. */
170 bool preserve_biv
; /* For the original biv, whether to preserve it. */
171 unsigned inv_id
; /* Id of an invariant. */
177 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
178 USE_ADDRESS
, /* Use in an address. */
179 USE_COMPARE
/* Use is a compare. */
182 /* Cost of a computation. */
185 int cost
; /* The runtime cost. */
186 unsigned complexity
; /* The estimate of the complexity of the code for
187 the computation (in no concrete units --
188 complexity field should be larger for more
189 complex expressions and addressing modes). */
192 static const comp_cost no_cost
= {0, 0};
193 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
195 /* The candidate - cost pair. */
198 struct iv_cand
*cand
; /* The candidate. */
199 comp_cost cost
; /* The cost. */
200 bitmap depends_on
; /* The list of invariants that have to be
202 tree value
; /* For final value elimination, the expression for
203 the final value of the iv. For iv elimination,
204 the new bound to compare with. */
205 enum tree_code comp
; /* For iv elimination, the comparison. */
206 int inv_expr_id
; /* Loop invariant expression id. */
212 unsigned id
; /* The id of the use. */
213 enum use_type type
; /* Type of the use. */
214 struct iv
*iv
; /* The induction variable it is based on. */
215 gimple stmt
; /* Statement in that it occurs. */
216 tree
*op_p
; /* The place where it occurs. */
217 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
220 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
221 struct cost_pair
*cost_map
;
222 /* The costs wrto the iv candidates. */
224 struct iv_cand
*selected
;
225 /* The selected candidate. */
228 /* The position where the iv is computed. */
231 IP_NORMAL
, /* At the end, just before the exit condition. */
232 IP_END
, /* At the end of the latch block. */
233 IP_BEFORE_USE
, /* Immediately before a specific use. */
234 IP_AFTER_USE
, /* Immediately after a specific use. */
235 IP_ORIGINAL
/* The original biv. */
238 /* The induction variable candidate. */
241 unsigned id
; /* The number of the candidate. */
242 bool important
; /* Whether this is an "important" candidate, i.e. such
243 that it should be considered by all uses. */
244 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
245 gimple incremented_at
;/* For original biv, the statement where it is
247 tree var_before
; /* The variable used for it before increment. */
248 tree var_after
; /* The variable used for it after increment. */
249 struct iv
*iv
; /* The value of the candidate. NULL for
250 "pseudocandidate" used to indicate the possibility
251 to replace the final value of an iv by direct
252 computation of the value. */
253 unsigned cost
; /* Cost of the candidate. */
254 unsigned cost_step
; /* Cost of the candidate's increment operation. */
255 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
256 where it is incremented. */
257 bitmap depends_on
; /* The list of invariants that are used in step of the
261 /* Loop invariant expression hashtable entry. */
262 struct iv_inv_expr_ent
269 /* The data used by the induction variable optimizations. */
271 typedef struct iv_use
*iv_use_p
;
273 typedef struct iv_cand
*iv_cand_p
;
275 /* Hashtable helpers. */
277 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
279 typedef iv_inv_expr_ent value_type
;
280 typedef iv_inv_expr_ent compare_type
;
281 static inline hashval_t
hash (const value_type
*);
282 static inline bool equal (const value_type
*, const compare_type
*);
285 /* Hash function for loop invariant expressions. */
288 iv_inv_expr_hasher::hash (const value_type
*expr
)
293 /* Hash table equality function for expressions. */
296 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
298 return expr1
->hash
== expr2
->hash
299 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
304 /* The currently optimized loop. */
305 struct loop
*current_loop
;
307 /* Numbers of iterations for all exits of the current loop. */
308 hash_map
<edge
, tree_niter_desc
*> *niters
;
310 /* Number of registers used in it. */
313 /* The size of version_info array allocated. */
314 unsigned version_info_size
;
316 /* The array of information for the ssa names. */
317 struct version_info
*version_info
;
319 /* The hashtable of loop invariant expressions created
321 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
323 /* Loop invariant expression id. */
326 /* The bitmap of indices in version_info whose value was changed. */
329 /* The uses of induction variables. */
330 vec
<iv_use_p
> iv_uses
;
332 /* The candidates. */
333 vec
<iv_cand_p
> iv_candidates
;
335 /* A bitmap of important candidates. */
336 bitmap important_candidates
;
338 /* Cache used by tree_to_aff_combination_expand. */
339 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
341 /* The maximum invariant id. */
344 /* Whether to consider just related and important candidates when replacing a
346 bool consider_all_candidates
;
348 /* Are we optimizing for speed? */
351 /* Whether the loop body includes any function calls. */
352 bool body_includes_call
;
354 /* Whether the loop body can only be exited via single exit. */
355 bool loop_single_exit_p
;
358 /* An assignment of iv candidates to uses. */
362 /* The number of uses covered by the assignment. */
365 /* Number of uses that cannot be expressed by the candidates in the set. */
368 /* Candidate assigned to a use, together with the related costs. */
369 struct cost_pair
**cand_for_use
;
371 /* Number of times each candidate is used. */
372 unsigned *n_cand_uses
;
374 /* The candidates used. */
377 /* The number of candidates in the set. */
380 /* Total number of registers needed. */
383 /* Total cost of expressing uses. */
384 comp_cost cand_use_cost
;
386 /* Total cost of candidates. */
389 /* Number of times each invariant is used. */
390 unsigned *n_invariant_uses
;
392 /* The array holding the number of uses of each loop
393 invariant expressions created by ivopt. */
394 unsigned *used_inv_expr
;
396 /* The number of created loop invariants. */
397 unsigned num_used_inv_expr
;
399 /* Total cost of the assignment. */
403 /* Difference of two iv candidate assignments. */
410 /* An old assignment (for rollback purposes). */
411 struct cost_pair
*old_cp
;
413 /* A new assignment. */
414 struct cost_pair
*new_cp
;
416 /* Next change in the list. */
417 struct iv_ca_delta
*next_change
;
420 /* Bound on number of candidates below that all candidates are considered. */
422 #define CONSIDER_ALL_CANDIDATES_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
426 optimizing such a loop would help, and it would take ages). */
428 #define MAX_CONSIDERED_USES \
429 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
431 /* If there are at most this number of ivs in the set, try removing unnecessary
432 ivs from the set always. */
434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
435 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
437 /* The list of trees for that the decl_rtl field must be reset is stored
440 static vec
<tree
> decl_rtl_to_reset
;
442 static comp_cost
force_expr_to_var_cost (tree
, bool);
444 /* Number of uses recorded in DATA. */
446 static inline unsigned
447 n_iv_uses (struct ivopts_data
*data
)
449 return data
->iv_uses
.length ();
452 /* Ith use recorded in DATA. */
454 static inline struct iv_use
*
455 iv_use (struct ivopts_data
*data
, unsigned i
)
457 return data
->iv_uses
[i
];
460 /* Number of candidates recorded in DATA. */
462 static inline unsigned
463 n_iv_cands (struct ivopts_data
*data
)
465 return data
->iv_candidates
.length ();
468 /* Ith candidate recorded in DATA. */
470 static inline struct iv_cand
*
471 iv_cand (struct ivopts_data
*data
, unsigned i
)
473 return data
->iv_candidates
[i
];
476 /* The single loop exit if it dominates the latch, NULL otherwise. */
479 single_dom_exit (struct loop
*loop
)
481 edge exit
= single_exit (loop
);
486 if (!just_once_each_iteration_p (loop
, exit
->src
))
492 /* Dumps information about the induction variable IV to FILE. */
495 dump_iv (FILE *file
, struct iv
*iv
)
499 fprintf (file
, "ssa name ");
500 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
501 fprintf (file
, "\n");
504 fprintf (file
, " type ");
505 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
506 fprintf (file
, "\n");
510 fprintf (file
, " base ");
511 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
512 fprintf (file
, "\n");
514 fprintf (file
, " step ");
515 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
516 fprintf (file
, "\n");
520 fprintf (file
, " invariant ");
521 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
522 fprintf (file
, "\n");
527 fprintf (file
, " base object ");
528 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
529 fprintf (file
, "\n");
533 fprintf (file
, " is a biv\n");
536 /* Dumps information about the USE to FILE. */
539 dump_use (FILE *file
, struct iv_use
*use
)
541 fprintf (file
, "use %d\n", use
->id
);
545 case USE_NONLINEAR_EXPR
:
546 fprintf (file
, " generic\n");
550 fprintf (file
, " address\n");
554 fprintf (file
, " compare\n");
561 fprintf (file
, " in statement ");
562 print_gimple_stmt (file
, use
->stmt
, 0, 0);
563 fprintf (file
, "\n");
565 fprintf (file
, " at position ");
567 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
568 fprintf (file
, "\n");
570 dump_iv (file
, use
->iv
);
572 if (use
->related_cands
)
574 fprintf (file
, " related candidates ");
575 dump_bitmap (file
, use
->related_cands
);
579 /* Dumps information about the uses to FILE. */
582 dump_uses (FILE *file
, struct ivopts_data
*data
)
587 for (i
= 0; i
< n_iv_uses (data
); i
++)
589 use
= iv_use (data
, i
);
591 dump_use (file
, use
);
592 fprintf (file
, "\n");
596 /* Dumps information about induction variable candidate CAND to FILE. */
599 dump_cand (FILE *file
, struct iv_cand
*cand
)
601 struct iv
*iv
= cand
->iv
;
603 fprintf (file
, "candidate %d%s\n",
604 cand
->id
, cand
->important
? " (important)" : "");
606 if (cand
->depends_on
)
608 fprintf (file
, " depends on ");
609 dump_bitmap (file
, cand
->depends_on
);
614 fprintf (file
, " final value replacement\n");
618 if (cand
->var_before
)
620 fprintf (file
, " var_before ");
621 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
622 fprintf (file
, "\n");
626 fprintf (file
, " var_after ");
627 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
628 fprintf (file
, "\n");
634 fprintf (file
, " incremented before exit test\n");
638 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
642 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
646 fprintf (file
, " incremented at end\n");
650 fprintf (file
, " original biv\n");
657 /* Returns the info for ssa version VER. */
659 static inline struct version_info
*
660 ver_info (struct ivopts_data
*data
, unsigned ver
)
662 return data
->version_info
+ ver
;
665 /* Returns the info for ssa name NAME. */
667 static inline struct version_info
*
668 name_info (struct ivopts_data
*data
, tree name
)
670 return ver_info (data
, SSA_NAME_VERSION (name
));
673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
677 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
679 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
683 if (sbb
== loop
->latch
)
689 return stmt
== last_stmt (bb
);
692 /* Returns true if STMT if after the place where the original induction
693 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
694 if the positions are identical. */
697 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
699 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
700 basic_block stmt_bb
= gimple_bb (stmt
);
702 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
705 if (stmt_bb
!= cand_bb
)
709 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
711 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
714 /* Returns true if STMT if after the place where the induction variable
715 CAND is incremented in LOOP. */
718 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
726 return stmt_after_ip_normal_pos (loop
, stmt
);
730 return stmt_after_inc_pos (cand
, stmt
, false);
733 return stmt_after_inc_pos (cand
, stmt
, true);
740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
743 abnormal_ssa_name_p (tree exp
)
748 if (TREE_CODE (exp
) != SSA_NAME
)
751 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
755 abnormal phi node. Callback for for_each_index. */
758 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
759 void *data ATTRIBUTE_UNUSED
)
761 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
763 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
765 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
769 return !abnormal_ssa_name_p (*index
);
772 /* Returns true if EXPR contains a ssa name that occurs in an
773 abnormal phi node. */
776 contains_abnormal_ssa_name_p (tree expr
)
779 enum tree_code_class codeclass
;
784 code
= TREE_CODE (expr
);
785 codeclass
= TREE_CODE_CLASS (code
);
787 if (code
== SSA_NAME
)
788 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
790 if (code
== INTEGER_CST
791 || is_gimple_min_invariant (expr
))
794 if (code
== ADDR_EXPR
)
795 return !for_each_index (&TREE_OPERAND (expr
, 0),
796 idx_contains_abnormal_ssa_name_p
,
799 if (code
== COND_EXPR
)
800 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
801 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
802 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
808 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
825 /* Returns the structure describing number of iterations determined from
826 EXIT of DATA->current_loop, or NULL if something goes wrong. */
828 static struct tree_niter_desc
*
829 niter_for_exit (struct ivopts_data
*data
, edge exit
)
831 struct tree_niter_desc
*desc
;
832 tree_niter_desc
**slot
;
836 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
840 slot
= data
->niters
->get (exit
);
844 /* Try to determine number of iterations. We cannot safely work with ssa
845 names that appear in phi nodes on abnormal edges, so that we do not
846 create overlapping life ranges for them (PR 27283). */
847 desc
= XNEW (struct tree_niter_desc
);
848 if (!number_of_iterations_exit (data
->current_loop
,
850 || contains_abnormal_ssa_name_p (desc
->niter
))
855 data
->niters
->put (exit
, desc
);
863 /* Returns the structure describing number of iterations determined from
864 single dominating exit of DATA->current_loop, or NULL if something
867 static struct tree_niter_desc
*
868 niter_for_single_dom_exit (struct ivopts_data
*data
)
870 edge exit
= single_dom_exit (data
->current_loop
);
875 return niter_for_exit (data
, exit
);
878 /* Initializes data structures used by the iv optimization pass, stored
882 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
884 data
->version_info_size
= 2 * num_ssa_names
;
885 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
886 data
->relevant
= BITMAP_ALLOC (NULL
);
887 data
->important_candidates
= BITMAP_ALLOC (NULL
);
888 data
->max_inv_id
= 0;
890 data
->iv_uses
.create (20);
891 data
->iv_candidates
.create (20);
892 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
893 data
->inv_expr_id
= 0;
894 data
->name_expansion_cache
= NULL
;
895 decl_rtl_to_reset
.create (20);
898 /* Returns a memory object to that EXPR points. In case we are able to
899 determine that it does not point to any such object, NULL is returned. */
902 determine_base_object (tree expr
)
904 enum tree_code code
= TREE_CODE (expr
);
907 /* If this is a pointer casted to any type, we need to determine
908 the base object for the pointer; so handle conversions before
909 throwing away non-pointer expressions. */
910 if (CONVERT_EXPR_P (expr
))
911 return determine_base_object (TREE_OPERAND (expr
, 0));
913 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
922 obj
= TREE_OPERAND (expr
, 0);
923 base
= get_base_address (obj
);
928 if (TREE_CODE (base
) == MEM_REF
)
929 return determine_base_object (TREE_OPERAND (base
, 0));
931 return fold_convert (ptr_type_node
,
932 build_fold_addr_expr (base
));
934 case POINTER_PLUS_EXPR
:
935 return determine_base_object (TREE_OPERAND (expr
, 0));
939 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
943 return fold_convert (ptr_type_node
, expr
);
947 /* Return true if address expression with non-DECL_P operand appears
951 contain_complex_addr_expr (tree expr
)
956 switch (TREE_CODE (expr
))
958 case POINTER_PLUS_EXPR
:
961 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
962 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
966 return (!DECL_P (TREE_OPERAND (expr
, 0)));
975 /* Allocates an induction variable with given initial value BASE and step STEP
979 alloc_iv (tree base
, tree step
)
982 struct iv
*iv
= XCNEW (struct iv
);
983 gcc_assert (step
!= NULL_TREE
);
985 /* Lower address expression in base except ones with DECL_P as operand.
987 1) More accurate cost can be computed for address expressions;
988 2) Duplicate candidates won't be created for bases in different
989 forms, like &a[0] and &a. */
991 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
992 || contain_complex_addr_expr (expr
))
995 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
996 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1000 iv
->base_object
= determine_base_object (base
);
1003 iv
->have_use_for
= false;
1005 iv
->ssa_name
= NULL_TREE
;
1010 /* Sets STEP and BASE for induction variable IV. */
1013 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1015 struct version_info
*info
= name_info (data
, iv
);
1017 gcc_assert (!info
->iv
);
1019 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1020 info
->iv
= alloc_iv (base
, step
);
1021 info
->iv
->ssa_name
= iv
;
1024 /* Finds induction variable declaration for VAR. */
1027 get_iv (struct ivopts_data
*data
, tree var
)
1030 tree type
= TREE_TYPE (var
);
1032 if (!POINTER_TYPE_P (type
)
1033 && !INTEGRAL_TYPE_P (type
))
1036 if (!name_info (data
, var
)->iv
)
1038 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1041 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1042 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1045 return name_info (data
, var
)->iv
;
1048 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1049 not define a simple affine biv with nonzero step. */
1052 determine_biv_step (gimple phi
)
1054 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1055 tree name
= PHI_RESULT (phi
);
1058 if (virtual_operand_p (name
))
1061 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1064 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1067 /* Finds basic ivs. */
1070 find_bivs (struct ivopts_data
*data
)
1073 tree step
, type
, base
;
1075 struct loop
*loop
= data
->current_loop
;
1076 gimple_stmt_iterator psi
;
1078 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1080 phi
= gsi_stmt (psi
);
1082 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1085 step
= determine_biv_step (phi
);
1089 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1090 base
= expand_simple_operations (base
);
1091 if (contains_abnormal_ssa_name_p (base
)
1092 || contains_abnormal_ssa_name_p (step
))
1095 type
= TREE_TYPE (PHI_RESULT (phi
));
1096 base
= fold_convert (type
, base
);
1099 if (POINTER_TYPE_P (type
))
1100 step
= convert_to_ptrofftype (step
);
1102 step
= fold_convert (type
, step
);
1105 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1112 /* Marks basic ivs. */
1115 mark_bivs (struct ivopts_data
*data
)
1119 struct iv
*iv
, *incr_iv
;
1120 struct loop
*loop
= data
->current_loop
;
1121 basic_block incr_bb
;
1122 gimple_stmt_iterator psi
;
1124 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1126 phi
= gsi_stmt (psi
);
1128 iv
= get_iv (data
, PHI_RESULT (phi
));
1132 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1133 def
= SSA_NAME_DEF_STMT (var
);
1134 /* Don't mark iv peeled from other one as biv. */
1136 && gimple_code (def
) == GIMPLE_PHI
1137 && gimple_bb (def
) == loop
->header
)
1140 incr_iv
= get_iv (data
, var
);
1144 /* If the increment is in the subloop, ignore it. */
1145 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1146 if (incr_bb
->loop_father
!= data
->current_loop
1147 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1151 incr_iv
->biv_p
= true;
1155 /* Checks whether STMT defines a linear induction variable and stores its
1156 parameters to IV. */
1159 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1162 struct loop
*loop
= data
->current_loop
;
1164 iv
->base
= NULL_TREE
;
1165 iv
->step
= NULL_TREE
;
1167 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1170 lhs
= gimple_assign_lhs (stmt
);
1171 if (TREE_CODE (lhs
) != SSA_NAME
)
1174 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1176 iv
->base
= expand_simple_operations (iv
->base
);
1178 if (contains_abnormal_ssa_name_p (iv
->base
)
1179 || contains_abnormal_ssa_name_p (iv
->step
))
1182 /* If STMT could throw, then do not consider STMT as defining a GIV.
1183 While this will suppress optimizations, we can not safely delete this
1184 GIV and associated statements, even if it appears it is not used. */
1185 if (stmt_could_throw_p (stmt
))
1191 /* Finds general ivs in statement STMT. */
1194 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1198 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1201 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1204 /* Finds general ivs in basic block BB. */
1207 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1209 gimple_stmt_iterator bsi
;
1211 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1212 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1215 /* Finds general ivs. */
1218 find_givs (struct ivopts_data
*data
)
1220 struct loop
*loop
= data
->current_loop
;
1221 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1224 for (i
= 0; i
< loop
->num_nodes
; i
++)
1225 find_givs_in_bb (data
, body
[i
]);
1229 /* For each ssa name defined in LOOP determines whether it is an induction
1230 variable and if so, its initial value and step. */
1233 find_induction_variables (struct ivopts_data
*data
)
1238 if (!find_bivs (data
))
1244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1246 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1250 fprintf (dump_file
, " number of iterations ");
1251 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1252 if (!integer_zerop (niter
->may_be_zero
))
1254 fprintf (dump_file
, "; zero if ");
1255 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1257 fprintf (dump_file
, "\n\n");
1260 fprintf (dump_file
, "Induction variables:\n\n");
1262 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1264 if (ver_info (data
, i
)->iv
)
1265 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1272 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1274 static struct iv_use
*
1275 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1276 gimple stmt
, enum use_type use_type
)
1278 struct iv_use
*use
= XCNEW (struct iv_use
);
1280 use
->id
= n_iv_uses (data
);
1281 use
->type
= use_type
;
1285 use
->related_cands
= BITMAP_ALLOC (NULL
);
1287 /* To avoid showing ssa name in the dumps, if it was not reset by the
1289 iv
->ssa_name
= NULL_TREE
;
1291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1292 dump_use (dump_file
, use
);
1294 data
->iv_uses
.safe_push (use
);
1299 /* Checks whether OP is a loop-level invariant and if so, records it.
1300 NONLINEAR_USE is true if the invariant is used in a way we do not
1301 handle specially. */
1304 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1307 struct version_info
*info
;
1309 if (TREE_CODE (op
) != SSA_NAME
1310 || virtual_operand_p (op
))
1313 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1315 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1318 info
= name_info (data
, op
);
1320 info
->has_nonlin_use
|= nonlinear_use
;
1322 info
->inv_id
= ++data
->max_inv_id
;
1323 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1326 /* Checks whether the use OP is interesting and if so, records it. */
1328 static struct iv_use
*
1329 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1336 if (TREE_CODE (op
) != SSA_NAME
)
1339 iv
= get_iv (data
, op
);
1343 if (iv
->have_use_for
)
1345 use
= iv_use (data
, iv
->use_id
);
1347 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1351 if (integer_zerop (iv
->step
))
1353 record_invariant (data
, op
, true);
1356 iv
->have_use_for
= true;
1358 civ
= XNEW (struct iv
);
1361 stmt
= SSA_NAME_DEF_STMT (op
);
1362 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1363 || is_gimple_assign (stmt
));
1365 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1366 iv
->use_id
= use
->id
;
1371 /* Given a condition in statement STMT, checks whether it is a compare
1372 of an induction variable and an invariant. If this is the case,
1373 CONTROL_VAR is set to location of the iv, BOUND to the location of
1374 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1375 induction variable descriptions, and true is returned. If this is not
1376 the case, CONTROL_VAR and BOUND are set to the arguments of the
1377 condition and false is returned. */
1380 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1381 tree
**control_var
, tree
**bound
,
1382 struct iv
**iv_var
, struct iv
**iv_bound
)
1384 /* The objects returned when COND has constant operands. */
1385 static struct iv const_iv
;
1387 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1388 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1391 if (gimple_code (stmt
) == GIMPLE_COND
)
1393 op0
= gimple_cond_lhs_ptr (stmt
);
1394 op1
= gimple_cond_rhs_ptr (stmt
);
1398 op0
= gimple_assign_rhs1_ptr (stmt
);
1399 op1
= gimple_assign_rhs2_ptr (stmt
);
1402 zero
= integer_zero_node
;
1403 const_iv
.step
= integer_zero_node
;
1405 if (TREE_CODE (*op0
) == SSA_NAME
)
1406 iv0
= get_iv (data
, *op0
);
1407 if (TREE_CODE (*op1
) == SSA_NAME
)
1408 iv1
= get_iv (data
, *op1
);
1410 /* Exactly one of the compared values must be an iv, and the other one must
1415 if (integer_zerop (iv0
->step
))
1417 /* Control variable may be on the other side. */
1418 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1419 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1421 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1425 *control_var
= op0
;;
1436 /* Checks whether the condition in STMT is interesting and if so,
1440 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1442 tree
*var_p
, *bound_p
;
1443 struct iv
*var_iv
, *civ
;
1445 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1447 find_interesting_uses_op (data
, *var_p
);
1448 find_interesting_uses_op (data
, *bound_p
);
1452 civ
= XNEW (struct iv
);
1454 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1457 /* Returns the outermost loop EXPR is obviously invariant in
1458 relative to the loop LOOP, i.e. if all its operands are defined
1459 outside of the returned loop. Returns NULL if EXPR is not
1460 even obviously invariant in LOOP. */
1463 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1468 if (is_gimple_min_invariant (expr
))
1469 return current_loops
->tree_root
;
1471 if (TREE_CODE (expr
) == SSA_NAME
)
1473 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1476 if (flow_bb_inside_loop_p (loop
, def_bb
))
1478 return superloop_at_depth (loop
,
1479 loop_depth (def_bb
->loop_father
) + 1);
1482 return current_loops
->tree_root
;
1488 unsigned maxdepth
= 0;
1489 len
= TREE_OPERAND_LENGTH (expr
);
1490 for (i
= 0; i
< len
; i
++)
1492 struct loop
*ivloop
;
1493 if (!TREE_OPERAND (expr
, i
))
1496 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1499 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1502 return superloop_at_depth (loop
, maxdepth
);
1505 /* Returns true if expression EXPR is obviously invariant in LOOP,
1506 i.e. if all its operands are defined outside of the LOOP. LOOP
1507 should not be the function body. */
1510 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1515 gcc_assert (loop_depth (loop
) > 0);
1517 if (is_gimple_min_invariant (expr
))
1520 if (TREE_CODE (expr
) == SSA_NAME
)
1522 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1524 && flow_bb_inside_loop_p (loop
, def_bb
))
1533 len
= TREE_OPERAND_LENGTH (expr
);
1534 for (i
= 0; i
< len
; i
++)
1535 if (TREE_OPERAND (expr
, i
)
1536 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1542 /* Cumulates the steps of indices into DATA and replaces their values with the
1543 initial ones. Returns false when the value of the index cannot be determined.
1544 Callback for for_each_index. */
1546 struct ifs_ivopts_data
1548 struct ivopts_data
*ivopts_data
;
1554 idx_find_step (tree base
, tree
*idx
, void *data
)
1556 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1558 tree step
, iv_base
, iv_step
, lbound
, off
;
1559 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1561 /* If base is a component ref, require that the offset of the reference
1563 if (TREE_CODE (base
) == COMPONENT_REF
)
1565 off
= component_ref_field_offset (base
);
1566 return expr_invariant_in_loop_p (loop
, off
);
1569 /* If base is array, first check whether we will be able to move the
1570 reference out of the loop (in order to take its address in strength
1571 reduction). In order for this to work we need both lower bound
1572 and step to be loop invariants. */
1573 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1575 /* Moreover, for a range, the size needs to be invariant as well. */
1576 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1577 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1580 step
= array_ref_element_size (base
);
1581 lbound
= array_ref_low_bound (base
);
1583 if (!expr_invariant_in_loop_p (loop
, step
)
1584 || !expr_invariant_in_loop_p (loop
, lbound
))
1588 if (TREE_CODE (*idx
) != SSA_NAME
)
1591 iv
= get_iv (dta
->ivopts_data
, *idx
);
1595 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1596 *&x[0], which is not folded and does not trigger the
1597 ARRAY_REF path below. */
1600 if (integer_zerop (iv
->step
))
1603 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1605 step
= array_ref_element_size (base
);
1607 /* We only handle addresses whose step is an integer constant. */
1608 if (TREE_CODE (step
) != INTEGER_CST
)
1612 /* The step for pointer arithmetics already is 1 byte. */
1613 step
= size_one_node
;
1617 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1618 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1621 /* The index might wrap. */
1625 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1626 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1631 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1632 object is passed to it in DATA. */
1635 idx_record_use (tree base
, tree
*idx
,
1638 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1639 find_interesting_uses_op (data
, *idx
);
1640 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1642 find_interesting_uses_op (data
, array_ref_element_size (base
));
1643 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1648 /* If we can prove that TOP = cst * BOT for some constant cst,
1649 store cst to MUL and return true. Otherwise return false.
1650 The returned value is always sign-extended, regardless of the
1651 signedness of TOP and BOT. */
1654 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1657 enum tree_code code
;
1658 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1659 widest_int res
, p0
, p1
;
1664 if (operand_equal_p (top
, bot
, 0))
1670 code
= TREE_CODE (top
);
1674 mby
= TREE_OPERAND (top
, 1);
1675 if (TREE_CODE (mby
) != INTEGER_CST
)
1678 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1681 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1686 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1687 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1690 if (code
== MINUS_EXPR
)
1692 *mul
= wi::sext (p0
+ p1
, precision
);
1696 if (TREE_CODE (bot
) != INTEGER_CST
)
1699 p0
= widest_int::from (top
, SIGNED
);
1700 p1
= widest_int::from (bot
, SIGNED
);
1703 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1711 /* Return true if memory reference REF with step STEP may be unaligned. */
1714 may_be_unaligned_p (tree ref
, tree step
)
1716 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1717 thus they are not misaligned. */
1718 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1721 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1722 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1723 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1725 unsigned HOST_WIDE_INT bitpos
;
1726 unsigned int ref_align
;
1727 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1728 if (ref_align
< align
1729 || (bitpos
% align
) != 0
1730 || (bitpos
% BITS_PER_UNIT
) != 0)
1733 unsigned int trailing_zeros
= tree_ctz (step
);
1734 if (trailing_zeros
< HOST_BITS_PER_INT
1735 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1741 /* Return true if EXPR may be non-addressable. */
1744 may_be_nonaddressable_p (tree expr
)
1746 switch (TREE_CODE (expr
))
1748 case TARGET_MEM_REF
:
1749 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1750 target, thus they are always addressable. */
1754 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1755 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1757 case VIEW_CONVERT_EXPR
:
1758 /* This kind of view-conversions may wrap non-addressable objects
1759 and make them look addressable. After some processing the
1760 non-addressability may be uncovered again, causing ADDR_EXPRs
1761 of inappropriate objects to be built. */
1762 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1763 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1766 /* ... fall through ... */
1769 case ARRAY_RANGE_REF
:
1770 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1782 /* Finds addresses in *OP_P inside STMT. */
1785 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1787 tree base
= *op_p
, step
= size_zero_node
;
1789 struct ifs_ivopts_data ifs_ivopts_data
;
1791 /* Do not play with volatile memory references. A bit too conservative,
1792 perhaps, but safe. */
1793 if (gimple_has_volatile_ops (stmt
))
1796 /* Ignore bitfields for now. Not really something terribly complicated
1798 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1801 base
= unshare_expr (base
);
1803 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1805 tree type
= build_pointer_type (TREE_TYPE (base
));
1809 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1811 civ
= get_iv (data
, TMR_BASE (base
));
1815 TMR_BASE (base
) = civ
->base
;
1818 if (TMR_INDEX2 (base
)
1819 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1821 civ
= get_iv (data
, TMR_INDEX2 (base
));
1825 TMR_INDEX2 (base
) = civ
->base
;
1828 if (TMR_INDEX (base
)
1829 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1831 civ
= get_iv (data
, TMR_INDEX (base
));
1835 TMR_INDEX (base
) = civ
->base
;
1840 if (TMR_STEP (base
))
1841 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1843 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1847 if (integer_zerop (step
))
1849 base
= tree_mem_ref_addr (type
, base
);
1853 ifs_ivopts_data
.ivopts_data
= data
;
1854 ifs_ivopts_data
.stmt
= stmt
;
1855 ifs_ivopts_data
.step
= size_zero_node
;
1856 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1857 || integer_zerop (ifs_ivopts_data
.step
))
1859 step
= ifs_ivopts_data
.step
;
1861 /* Check that the base expression is addressable. This needs
1862 to be done after substituting bases of IVs into it. */
1863 if (may_be_nonaddressable_p (base
))
1866 /* Moreover, on strict alignment platforms, check that it is
1867 sufficiently aligned. */
1868 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1871 base
= build_fold_addr_expr (base
);
1873 /* Substituting bases of IVs into the base expression might
1874 have caused folding opportunities. */
1875 if (TREE_CODE (base
) == ADDR_EXPR
)
1877 tree
*ref
= &TREE_OPERAND (base
, 0);
1878 while (handled_component_p (*ref
))
1879 ref
= &TREE_OPERAND (*ref
, 0);
1880 if (TREE_CODE (*ref
) == MEM_REF
)
1882 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1883 TREE_OPERAND (*ref
, 0),
1884 TREE_OPERAND (*ref
, 1));
1891 civ
= alloc_iv (base
, step
);
1892 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1896 for_each_index (op_p
, idx_record_use
, data
);
1899 /* Finds and records invariants used in STMT. */
1902 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1905 use_operand_p use_p
;
1908 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1910 op
= USE_FROM_PTR (use_p
);
1911 record_invariant (data
, op
, false);
1915 /* Finds interesting uses of induction variables in the statement STMT. */
1918 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1921 tree op
, *lhs
, *rhs
;
1923 use_operand_p use_p
;
1924 enum tree_code code
;
1926 find_invariants_stmt (data
, stmt
);
1928 if (gimple_code (stmt
) == GIMPLE_COND
)
1930 find_interesting_uses_cond (data
, stmt
);
1934 if (is_gimple_assign (stmt
))
1936 lhs
= gimple_assign_lhs_ptr (stmt
);
1937 rhs
= gimple_assign_rhs1_ptr (stmt
);
1939 if (TREE_CODE (*lhs
) == SSA_NAME
)
1941 /* If the statement defines an induction variable, the uses are not
1942 interesting by themselves. */
1944 iv
= get_iv (data
, *lhs
);
1946 if (iv
&& !integer_zerop (iv
->step
))
1950 code
= gimple_assign_rhs_code (stmt
);
1951 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1952 && (REFERENCE_CLASS_P (*rhs
)
1953 || is_gimple_val (*rhs
)))
1955 if (REFERENCE_CLASS_P (*rhs
))
1956 find_interesting_uses_address (data
, stmt
, rhs
);
1958 find_interesting_uses_op (data
, *rhs
);
1960 if (REFERENCE_CLASS_P (*lhs
))
1961 find_interesting_uses_address (data
, stmt
, lhs
);
1964 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1966 find_interesting_uses_cond (data
, stmt
);
1970 /* TODO -- we should also handle address uses of type
1972 memory = call (whatever);
1979 if (gimple_code (stmt
) == GIMPLE_PHI
1980 && gimple_bb (stmt
) == data
->current_loop
->header
)
1982 iv
= get_iv (data
, PHI_RESULT (stmt
));
1984 if (iv
&& !integer_zerop (iv
->step
))
1988 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1990 op
= USE_FROM_PTR (use_p
);
1992 if (TREE_CODE (op
) != SSA_NAME
)
1995 iv
= get_iv (data
, op
);
1999 find_interesting_uses_op (data
, op
);
2003 /* Finds interesting uses of induction variables outside of loops
2004 on loop exit edge EXIT. */
2007 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2010 gimple_stmt_iterator psi
;
2013 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2015 phi
= gsi_stmt (psi
);
2016 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2017 if (!virtual_operand_p (def
))
2018 find_interesting_uses_op (data
, def
);
2022 /* Finds uses of the induction variables that are interesting. */
2025 find_interesting_uses (struct ivopts_data
*data
)
2028 gimple_stmt_iterator bsi
;
2029 basic_block
*body
= get_loop_body (data
->current_loop
);
2031 struct version_info
*info
;
2034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2035 fprintf (dump_file
, "Uses:\n\n");
2037 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2042 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2043 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2044 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2045 find_interesting_uses_outside (data
, e
);
2047 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2048 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2049 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2050 if (!is_gimple_debug (gsi_stmt (bsi
)))
2051 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2054 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2058 fprintf (dump_file
, "\n");
2060 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2062 info
= ver_info (data
, i
);
2065 fprintf (dump_file
, " ");
2066 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2067 fprintf (dump_file
, " is invariant (%d)%s\n",
2068 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2072 fprintf (dump_file
, "\n");
2078 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2079 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2080 we are at the top-level of the processed address. */
2083 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2084 HOST_WIDE_INT
*offset
)
2086 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2087 enum tree_code code
;
2088 tree type
, orig_type
= TREE_TYPE (expr
);
2089 HOST_WIDE_INT off0
, off1
, st
;
2090 tree orig_expr
= expr
;
2094 type
= TREE_TYPE (expr
);
2095 code
= TREE_CODE (expr
);
2101 if (!cst_and_fits_in_hwi (expr
)
2102 || integer_zerop (expr
))
2105 *offset
= int_cst_value (expr
);
2106 return build_int_cst (orig_type
, 0);
2108 case POINTER_PLUS_EXPR
:
2111 op0
= TREE_OPERAND (expr
, 0);
2112 op1
= TREE_OPERAND (expr
, 1);
2114 op0
= strip_offset_1 (op0
, false, false, &off0
);
2115 op1
= strip_offset_1 (op1
, false, false, &off1
);
2117 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2118 if (op0
== TREE_OPERAND (expr
, 0)
2119 && op1
== TREE_OPERAND (expr
, 1))
2122 if (integer_zerop (op1
))
2124 else if (integer_zerop (op0
))
2126 if (code
== MINUS_EXPR
)
2127 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2132 expr
= fold_build2 (code
, type
, op0
, op1
);
2134 return fold_convert (orig_type
, expr
);
2137 op1
= TREE_OPERAND (expr
, 1);
2138 if (!cst_and_fits_in_hwi (op1
))
2141 op0
= TREE_OPERAND (expr
, 0);
2142 op0
= strip_offset_1 (op0
, false, false, &off0
);
2143 if (op0
== TREE_OPERAND (expr
, 0))
2146 *offset
= off0
* int_cst_value (op1
);
2147 if (integer_zerop (op0
))
2150 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2152 return fold_convert (orig_type
, expr
);
2155 case ARRAY_RANGE_REF
:
2159 step
= array_ref_element_size (expr
);
2160 if (!cst_and_fits_in_hwi (step
))
2163 st
= int_cst_value (step
);
2164 op1
= TREE_OPERAND (expr
, 1);
2165 op1
= strip_offset_1 (op1
, false, false, &off1
);
2166 *offset
= off1
* st
;
2169 && integer_zerop (op1
))
2171 /* Strip the component reference completely. */
2172 op0
= TREE_OPERAND (expr
, 0);
2173 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2186 tmp
= component_ref_field_offset (expr
);
2187 field
= TREE_OPERAND (expr
, 1);
2189 && cst_and_fits_in_hwi (tmp
)
2190 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2192 HOST_WIDE_INT boffset
, abs_off
;
2194 /* Strip the component reference completely. */
2195 op0
= TREE_OPERAND (expr
, 0);
2196 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2197 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2198 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2202 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2209 op0
= TREE_OPERAND (expr
, 0);
2210 op0
= strip_offset_1 (op0
, true, true, &off0
);
2213 if (op0
== TREE_OPERAND (expr
, 0))
2216 expr
= build_fold_addr_expr (op0
);
2217 return fold_convert (orig_type
, expr
);
2220 /* ??? Offset operand? */
2221 inside_addr
= false;
2228 /* Default handling of expressions for that we want to recurse into
2229 the first operand. */
2230 op0
= TREE_OPERAND (expr
, 0);
2231 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2234 if (op0
== TREE_OPERAND (expr
, 0)
2235 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2238 expr
= copy_node (expr
);
2239 TREE_OPERAND (expr
, 0) = op0
;
2241 TREE_OPERAND (expr
, 1) = op1
;
2243 /* Inside address, we might strip the top level component references,
2244 thus changing type of the expression. Handling of ADDR_EXPR
2246 expr
= fold_convert (orig_type
, expr
);
2251 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2254 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2257 tree core
= strip_offset_1 (expr
, false, false, &off
);
2262 /* Returns variant of TYPE that can be used as base for different uses.
2263 We return unsigned type with the same precision, which avoids problems
2267 generic_type_for (tree type
)
2269 if (POINTER_TYPE_P (type
))
2270 return unsigned_type_for (type
);
2272 if (TYPE_UNSIGNED (type
))
2275 return unsigned_type_for (type
);
2278 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2279 the bitmap to that we should store it. */
2281 static struct ivopts_data
*fd_ivopts_data
;
2283 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2285 bitmap
*depends_on
= (bitmap
*) data
;
2286 struct version_info
*info
;
2288 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2290 info
= name_info (fd_ivopts_data
, *expr_p
);
2292 if (!info
->inv_id
|| info
->has_nonlin_use
)
2296 *depends_on
= BITMAP_ALLOC (NULL
);
2297 bitmap_set_bit (*depends_on
, info
->inv_id
);
2302 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2303 position to POS. If USE is not NULL, the candidate is set as related to
2304 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2305 replacement of the final value of the iv by a direct computation. */
2307 static struct iv_cand
*
2308 add_candidate_1 (struct ivopts_data
*data
,
2309 tree base
, tree step
, bool important
, enum iv_position pos
,
2310 struct iv_use
*use
, gimple incremented_at
)
2313 struct iv_cand
*cand
= NULL
;
2314 tree type
, orig_type
;
2316 /* For non-original variables, make sure their values are computed in a type
2317 that does not invoke undefined behavior on overflows (since in general,
2318 we cannot prove that these induction variables are non-wrapping). */
2319 if (pos
!= IP_ORIGINAL
)
2321 orig_type
= TREE_TYPE (base
);
2322 type
= generic_type_for (orig_type
);
2323 if (type
!= orig_type
)
2325 base
= fold_convert (type
, base
);
2326 step
= fold_convert (type
, step
);
2330 for (i
= 0; i
< n_iv_cands (data
); i
++)
2332 cand
= iv_cand (data
, i
);
2334 if (cand
->pos
!= pos
)
2337 if (cand
->incremented_at
!= incremented_at
2338 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2339 && cand
->ainc_use
!= use
))
2353 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2354 && operand_equal_p (step
, cand
->iv
->step
, 0)
2355 && (TYPE_PRECISION (TREE_TYPE (base
))
2356 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2360 if (i
== n_iv_cands (data
))
2362 cand
= XCNEW (struct iv_cand
);
2368 cand
->iv
= alloc_iv (base
, step
);
2371 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2373 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2374 cand
->var_after
= cand
->var_before
;
2376 cand
->important
= important
;
2377 cand
->incremented_at
= incremented_at
;
2378 data
->iv_candidates
.safe_push (cand
);
2381 && TREE_CODE (step
) != INTEGER_CST
)
2383 fd_ivopts_data
= data
;
2384 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2387 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2388 cand
->ainc_use
= use
;
2390 cand
->ainc_use
= NULL
;
2392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2393 dump_cand (dump_file
, cand
);
2396 if (important
&& !cand
->important
)
2398 cand
->important
= true;
2399 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2400 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2405 bitmap_set_bit (use
->related_cands
, i
);
2406 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2407 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2414 /* Returns true if incrementing the induction variable at the end of the LOOP
2417 The purpose is to avoid splitting latch edge with a biv increment, thus
2418 creating a jump, possibly confusing other optimization passes and leaving
2419 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2420 is not available (so we do not have a better alternative), or if the latch
2421 edge is already nonempty. */
2424 allow_ip_end_pos_p (struct loop
*loop
)
2426 if (!ip_normal_pos (loop
))
2429 if (!empty_block_p (ip_end_pos (loop
)))
2435 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2436 Important field is set to IMPORTANT. */
2439 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2440 bool important
, struct iv_use
*use
)
2442 basic_block use_bb
= gimple_bb (use
->stmt
);
2443 machine_mode mem_mode
;
2444 unsigned HOST_WIDE_INT cstepi
;
2446 /* If we insert the increment in any position other than the standard
2447 ones, we must ensure that it is incremented once per iteration.
2448 It must not be in an inner nested loop, or one side of an if
2450 if (use_bb
->loop_father
!= data
->current_loop
2451 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2452 || stmt_could_throw_p (use
->stmt
)
2453 || !cst_and_fits_in_hwi (step
))
2456 cstepi
= int_cst_value (step
);
2458 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2459 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2460 || USE_STORE_PRE_INCREMENT (mem_mode
))
2461 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2462 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2463 || USE_STORE_PRE_DECREMENT (mem_mode
))
2464 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2466 enum tree_code code
= MINUS_EXPR
;
2468 tree new_step
= step
;
2470 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2472 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2473 code
= POINTER_PLUS_EXPR
;
2476 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2477 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2478 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2481 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2482 || USE_STORE_POST_INCREMENT (mem_mode
))
2483 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2484 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2485 || USE_STORE_POST_DECREMENT (mem_mode
))
2486 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2488 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2493 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2494 position to POS. If USE is not NULL, the candidate is set as related to
2495 it. The candidate computation is scheduled on all available positions. */
2498 add_candidate (struct ivopts_data
*data
,
2499 tree base
, tree step
, bool important
, struct iv_use
*use
)
2501 if (ip_normal_pos (data
->current_loop
))
2502 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2503 if (ip_end_pos (data
->current_loop
)
2504 && allow_ip_end_pos_p (data
->current_loop
))
2505 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2507 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2508 add_autoinc_candidates (data
, base
, step
, important
, use
);
2511 /* Adds standard iv candidates. */
2514 add_standard_iv_candidates (struct ivopts_data
*data
)
2516 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2518 /* The same for a double-integer type if it is still fast enough. */
2520 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2521 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2522 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2523 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2525 /* The same for a double-integer type if it is still fast enough. */
2527 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2528 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2529 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2530 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2534 /* Adds candidates bases on the old induction variable IV. */
2537 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2541 struct iv_cand
*cand
;
2543 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2545 /* The same, but with initial value zero. */
2546 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2547 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2549 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2550 iv
->step
, true, NULL
);
2552 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2553 if (gimple_code (phi
) == GIMPLE_PHI
)
2555 /* Additionally record the possibility of leaving the original iv
2557 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2558 /* Don't add candidate if it's from another PHI node because
2559 it's an affine iv appearing in the form of PEELED_CHREC. */
2560 phi
= SSA_NAME_DEF_STMT (def
);
2561 if (gimple_code (phi
) != GIMPLE_PHI
)
2563 cand
= add_candidate_1 (data
,
2564 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2565 SSA_NAME_DEF_STMT (def
));
2566 cand
->var_before
= iv
->ssa_name
;
2567 cand
->var_after
= def
;
2570 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2574 /* Adds candidates based on the old induction variables. */
2577 add_old_ivs_candidates (struct ivopts_data
*data
)
2583 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2585 iv
= ver_info (data
, i
)->iv
;
2586 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2587 add_old_iv_candidates (data
, iv
);
2591 /* Adds candidates based on the value of the induction variable IV and USE. */
2594 add_iv_value_candidates (struct ivopts_data
*data
,
2595 struct iv
*iv
, struct iv_use
*use
)
2597 unsigned HOST_WIDE_INT offset
;
2601 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2603 /* The same, but with initial value zero. Make such variable important,
2604 since it is generic enough so that possibly many uses may be based
2606 basetype
= TREE_TYPE (iv
->base
);
2607 if (POINTER_TYPE_P (basetype
))
2608 basetype
= sizetype
;
2609 add_candidate (data
, build_int_cst (basetype
, 0),
2610 iv
->step
, true, use
);
2612 /* Third, try removing the constant offset. Make sure to even
2613 add a candidate for &a[0] vs. (T *)&a. */
2614 base
= strip_offset (iv
->base
, &offset
);
2616 || base
!= iv
->base
)
2617 add_candidate (data
, base
, iv
->step
, false, use
);
2620 /* Adds candidates based on the uses. */
2623 add_derived_ivs_candidates (struct ivopts_data
*data
)
2627 for (i
= 0; i
< n_iv_uses (data
); i
++)
2629 struct iv_use
*use
= iv_use (data
, i
);
2636 case USE_NONLINEAR_EXPR
:
2639 /* Just add the ivs based on the value of the iv used here. */
2640 add_iv_value_candidates (data
, use
->iv
, use
);
2649 /* Record important candidates and add them to related_cands bitmaps
2653 record_important_candidates (struct ivopts_data
*data
)
2658 for (i
= 0; i
< n_iv_cands (data
); i
++)
2660 struct iv_cand
*cand
= iv_cand (data
, i
);
2662 if (cand
->important
)
2663 bitmap_set_bit (data
->important_candidates
, i
);
2666 data
->consider_all_candidates
= (n_iv_cands (data
)
2667 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2669 if (data
->consider_all_candidates
)
2671 /* We will not need "related_cands" bitmaps in this case,
2672 so release them to decrease peak memory consumption. */
2673 for (i
= 0; i
< n_iv_uses (data
); i
++)
2675 use
= iv_use (data
, i
);
2676 BITMAP_FREE (use
->related_cands
);
2681 /* Add important candidates to the related_cands bitmaps. */
2682 for (i
= 0; i
< n_iv_uses (data
); i
++)
2683 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2684 data
->important_candidates
);
2688 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2689 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2690 we allocate a simple list to every use. */
2693 alloc_use_cost_map (struct ivopts_data
*data
)
2695 unsigned i
, size
, s
;
2697 for (i
= 0; i
< n_iv_uses (data
); i
++)
2699 struct iv_use
*use
= iv_use (data
, i
);
2701 if (data
->consider_all_candidates
)
2702 size
= n_iv_cands (data
);
2705 s
= bitmap_count_bits (use
->related_cands
);
2707 /* Round up to the power of two, so that moduling by it is fast. */
2708 size
= s
? (1 << ceil_log2 (s
)) : 1;
2711 use
->n_map_members
= size
;
2712 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2716 /* Returns description of computation cost of expression whose runtime
2717 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2720 new_cost (unsigned runtime
, unsigned complexity
)
2724 cost
.cost
= runtime
;
2725 cost
.complexity
= complexity
;
2730 /* Adds costs COST1 and COST2. */
2733 add_costs (comp_cost cost1
, comp_cost cost2
)
2735 cost1
.cost
+= cost2
.cost
;
2736 cost1
.complexity
+= cost2
.complexity
;
2740 /* Subtracts costs COST1 and COST2. */
2743 sub_costs (comp_cost cost1
, comp_cost cost2
)
2745 cost1
.cost
-= cost2
.cost
;
2746 cost1
.complexity
-= cost2
.complexity
;
2751 /* Returns a negative number if COST1 < COST2, a positive number if
2752 COST1 > COST2, and 0 if COST1 = COST2. */
2755 compare_costs (comp_cost cost1
, comp_cost cost2
)
2757 if (cost1
.cost
== cost2
.cost
)
2758 return cost1
.complexity
- cost2
.complexity
;
2760 return cost1
.cost
- cost2
.cost
;
2763 /* Returns true if COST is infinite. */
2766 infinite_cost_p (comp_cost cost
)
2768 return cost
.cost
== INFTY
;
2771 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2772 on invariants DEPENDS_ON and that the value used in expressing it
2773 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2776 set_use_iv_cost (struct ivopts_data
*data
,
2777 struct iv_use
*use
, struct iv_cand
*cand
,
2778 comp_cost cost
, bitmap depends_on
, tree value
,
2779 enum tree_code comp
, int inv_expr_id
)
2783 if (infinite_cost_p (cost
))
2785 BITMAP_FREE (depends_on
);
2789 if (data
->consider_all_candidates
)
2791 use
->cost_map
[cand
->id
].cand
= cand
;
2792 use
->cost_map
[cand
->id
].cost
= cost
;
2793 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2794 use
->cost_map
[cand
->id
].value
= value
;
2795 use
->cost_map
[cand
->id
].comp
= comp
;
2796 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2800 /* n_map_members is a power of two, so this computes modulo. */
2801 s
= cand
->id
& (use
->n_map_members
- 1);
2802 for (i
= s
; i
< use
->n_map_members
; i
++)
2803 if (!use
->cost_map
[i
].cand
)
2805 for (i
= 0; i
< s
; i
++)
2806 if (!use
->cost_map
[i
].cand
)
2812 use
->cost_map
[i
].cand
= cand
;
2813 use
->cost_map
[i
].cost
= cost
;
2814 use
->cost_map
[i
].depends_on
= depends_on
;
2815 use
->cost_map
[i
].value
= value
;
2816 use
->cost_map
[i
].comp
= comp
;
2817 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2820 /* Gets cost of (USE, CANDIDATE) pair. */
2822 static struct cost_pair
*
2823 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2824 struct iv_cand
*cand
)
2827 struct cost_pair
*ret
;
2832 if (data
->consider_all_candidates
)
2834 ret
= use
->cost_map
+ cand
->id
;
2841 /* n_map_members is a power of two, so this computes modulo. */
2842 s
= cand
->id
& (use
->n_map_members
- 1);
2843 for (i
= s
; i
< use
->n_map_members
; i
++)
2844 if (use
->cost_map
[i
].cand
== cand
)
2845 return use
->cost_map
+ i
;
2846 else if (use
->cost_map
[i
].cand
== NULL
)
2848 for (i
= 0; i
< s
; i
++)
2849 if (use
->cost_map
[i
].cand
== cand
)
2850 return use
->cost_map
+ i
;
2851 else if (use
->cost_map
[i
].cand
== NULL
)
2857 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2859 produce_memory_decl_rtl (tree obj
, int *regno
)
2861 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2862 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2866 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2868 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2869 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2870 SET_SYMBOL_REF_DECL (x
, obj
);
2871 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2872 set_mem_addr_space (x
, as
);
2873 targetm
.encode_section_info (obj
, x
, true);
2877 x
= gen_raw_REG (address_mode
, (*regno
)++);
2878 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2879 set_mem_addr_space (x
, as
);
2885 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2886 walk_tree. DATA contains the actual fake register number. */
2889 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2891 tree obj
= NULL_TREE
;
2893 int *regno
= (int *) data
;
2895 switch (TREE_CODE (*expr_p
))
2898 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2899 handled_component_p (*expr_p
);
2900 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2903 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2904 x
= produce_memory_decl_rtl (obj
, regno
);
2909 obj
= SSA_NAME_VAR (*expr_p
);
2910 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2913 if (!DECL_RTL_SET_P (obj
))
2914 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2923 if (DECL_RTL_SET_P (obj
))
2926 if (DECL_MODE (obj
) == BLKmode
)
2927 x
= produce_memory_decl_rtl (obj
, regno
);
2929 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2939 decl_rtl_to_reset
.safe_push (obj
);
2940 SET_DECL_RTL (obj
, x
);
2946 /* Determines cost of the computation of EXPR. */
2949 computation_cost (tree expr
, bool speed
)
2953 tree type
= TREE_TYPE (expr
);
2955 /* Avoid using hard regs in ways which may be unsupported. */
2956 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2957 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2958 enum node_frequency real_frequency
= node
->frequency
;
2960 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2961 crtl
->maybe_hot_insn_p
= speed
;
2962 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2964 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2967 default_rtl_profile ();
2968 node
->frequency
= real_frequency
;
2970 cost
= seq_cost (seq
, speed
);
2972 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2973 TYPE_ADDR_SPACE (type
), speed
);
2974 else if (!REG_P (rslt
))
2975 cost
+= set_src_cost (rslt
, speed
);
2980 /* Returns variable containing the value of candidate CAND at statement AT. */
2983 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2985 if (stmt_after_increment (loop
, cand
, stmt
))
2986 return cand
->var_after
;
2988 return cand
->var_before
;
2991 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2992 same precision that is at least as wide as the precision of TYPE, stores
2993 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2997 determine_common_wider_type (tree
*a
, tree
*b
)
2999 tree wider_type
= NULL
;
3001 tree atype
= TREE_TYPE (*a
);
3003 if (CONVERT_EXPR_P (*a
))
3005 suba
= TREE_OPERAND (*a
, 0);
3006 wider_type
= TREE_TYPE (suba
);
3007 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3013 if (CONVERT_EXPR_P (*b
))
3015 subb
= TREE_OPERAND (*b
, 0);
3016 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3027 /* Determines the expression by that USE is expressed from induction variable
3028 CAND at statement AT in LOOP. The expression is stored in a decomposed
3029 form into AFF. Returns false if USE cannot be expressed using CAND. */
3032 get_computation_aff (struct loop
*loop
,
3033 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3034 struct aff_tree
*aff
)
3036 tree ubase
= use
->iv
->base
;
3037 tree ustep
= use
->iv
->step
;
3038 tree cbase
= cand
->iv
->base
;
3039 tree cstep
= cand
->iv
->step
, cstep_common
;
3040 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3041 tree common_type
, var
;
3043 aff_tree cbase_aff
, var_aff
;
3046 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3048 /* We do not have a precision to express the values of use. */
3052 var
= var_at_stmt (loop
, cand
, at
);
3053 uutype
= unsigned_type_for (utype
);
3055 /* If the conversion is not noop, perform it. */
3056 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3058 cstep
= fold_convert (uutype
, cstep
);
3059 cbase
= fold_convert (uutype
, cbase
);
3060 var
= fold_convert (uutype
, var
);
3063 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3066 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3067 type, we achieve better folding by computing their difference in this
3068 wider type, and cast the result to UUTYPE. We do not need to worry about
3069 overflows, as all the arithmetics will in the end be performed in UUTYPE
3071 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3073 /* use = ubase - ratio * cbase + ratio * var. */
3074 tree_to_aff_combination (ubase
, common_type
, aff
);
3075 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3076 tree_to_aff_combination (var
, uutype
, &var_aff
);
3078 /* We need to shift the value if we are after the increment. */
3079 if (stmt_after_increment (loop
, cand
, at
))
3083 if (common_type
!= uutype
)
3084 cstep_common
= fold_convert (common_type
, cstep
);
3086 cstep_common
= cstep
;
3088 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3089 aff_combination_add (&cbase_aff
, &cstep_aff
);
3092 aff_combination_scale (&cbase_aff
, -rat
);
3093 aff_combination_add (aff
, &cbase_aff
);
3094 if (common_type
!= uutype
)
3095 aff_combination_convert (aff
, uutype
);
3097 aff_combination_scale (&var_aff
, rat
);
3098 aff_combination_add (aff
, &var_aff
);
3103 /* Return the type of USE. */
3106 get_use_type (struct iv_use
*use
)
3108 tree base_type
= TREE_TYPE (use
->iv
->base
);
3111 if (use
->type
== USE_ADDRESS
)
3113 /* The base_type may be a void pointer. Create a pointer type based on
3114 the mem_ref instead. */
3115 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3116 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3117 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3125 /* Determines the expression by that USE is expressed from induction variable
3126 CAND at statement AT in LOOP. The computation is unshared. */
3129 get_computation_at (struct loop
*loop
,
3130 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3133 tree type
= get_use_type (use
);
3135 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3137 unshare_aff_combination (&aff
);
3138 return fold_convert (type
, aff_combination_to_tree (&aff
));
3141 /* Determines the expression by that USE is expressed from induction variable
3142 CAND in LOOP. The computation is unshared. */
3145 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3147 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3150 /* Adjust the cost COST for being in loop setup rather than loop body.
3151 If we're optimizing for space, the loop setup overhead is constant;
3152 if we're optimizing for speed, amortize it over the per-iteration cost. */
3154 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3158 else if (optimize_loop_for_speed_p (data
->current_loop
))
3159 return cost
/ avg_loop_niter (data
->current_loop
);
3164 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3165 validity for a memory reference accessing memory of mode MODE in
3166 address space AS. */
3170 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3173 #define MAX_RATIO 128
3174 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3175 static vec
<sbitmap
> valid_mult_list
;
3178 if (data_index
>= valid_mult_list
.length ())
3179 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3181 valid_mult
= valid_mult_list
[data_index
];
3184 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3185 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3186 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3190 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3191 bitmap_clear (valid_mult
);
3192 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3193 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3194 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3196 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3197 if (memory_address_addr_space_p (mode
, addr
, as
)
3198 || memory_address_addr_space_p (mode
, scaled
, as
))
3199 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3204 fprintf (dump_file
, " allowed multipliers:");
3205 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3206 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3207 fprintf (dump_file
, " %d", (int) i
);
3208 fprintf (dump_file
, "\n");
3209 fprintf (dump_file
, "\n");
3212 valid_mult_list
[data_index
] = valid_mult
;
3215 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3218 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3221 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3222 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3223 variable is omitted. Compute the cost for a memory reference that accesses
3224 a memory location of mode MEM_MODE in address space AS.
3226 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3227 size of MEM_MODE / RATIO) is available. To make this determination, we
3228 look at the size of the increment to be made, which is given in CSTEP.
3229 CSTEP may be zero if the step is unknown.
3230 STMT_AFTER_INC is true iff the statement we're looking at is after the
3231 increment of the original biv.
3233 TODO -- there must be some better way. This all is quite crude. */
3237 AINC_PRE_INC
, /* Pre increment. */
3238 AINC_PRE_DEC
, /* Pre decrement. */
3239 AINC_POST_INC
, /* Post increment. */
3240 AINC_POST_DEC
, /* Post decrement. */
3241 AINC_NONE
/* Also the number of auto increment types. */
3244 typedef struct address_cost_data_s
3246 HOST_WIDE_INT min_offset
, max_offset
;
3247 unsigned costs
[2][2][2][2];
3248 unsigned ainc_costs
[AINC_NONE
];
3249 } *address_cost_data
;
3253 get_address_cost (bool symbol_present
, bool var_present
,
3254 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3255 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3256 addr_space_t as
, bool speed
,
3257 bool stmt_after_inc
, bool *may_autoinc
)
3259 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3260 static vec
<address_cost_data
> address_cost_data_list
;
3261 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3262 address_cost_data data
;
3263 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3264 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3265 unsigned cost
, acost
, complexity
;
3266 enum ainc_type autoinc_type
;
3267 bool offset_p
, ratio_p
, autoinc
;
3268 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3269 unsigned HOST_WIDE_INT mask
;
3272 if (data_index
>= address_cost_data_list
.length ())
3273 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3275 data
= address_cost_data_list
[data_index
];
3279 HOST_WIDE_INT rat
, off
= 0;
3280 int old_cse_not_expected
, width
;
3281 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3286 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3288 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3290 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3291 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3292 width
= HOST_BITS_PER_WIDE_INT
- 1;
3293 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3295 for (i
= width
; i
>= 0; i
--)
3297 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3298 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3299 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3302 data
->min_offset
= (i
== -1? 0 : off
);
3304 for (i
= width
; i
>= 0; i
--)
3306 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3307 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3308 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3310 /* For some TARGET, like ARM THUMB1, the offset should be nature
3311 aligned. Try an aligned offset if address_mode is not QImode. */
3312 off
= (address_mode
== QImode
)
3314 : ((unsigned HOST_WIDE_INT
) 1 << i
)
3315 - GET_MODE_SIZE (address_mode
);
3318 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3319 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3325 data
->max_offset
= off
;
3327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3329 fprintf (dump_file
, "get_address_cost:\n");
3330 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3331 GET_MODE_NAME (mem_mode
),
3333 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3334 GET_MODE_NAME (mem_mode
),
3339 for (i
= 2; i
<= MAX_RATIO
; i
++)
3340 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3346 /* Compute the cost of various addressing modes. */
3348 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3349 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3351 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3352 || USE_STORE_PRE_DECREMENT (mem_mode
))
3354 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3355 has_predec
[mem_mode
]
3356 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3358 if (has_predec
[mem_mode
])
3359 data
->ainc_costs
[AINC_PRE_DEC
]
3360 = address_cost (addr
, mem_mode
, as
, speed
);
3362 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3363 || USE_STORE_POST_DECREMENT (mem_mode
))
3365 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3366 has_postdec
[mem_mode
]
3367 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3369 if (has_postdec
[mem_mode
])
3370 data
->ainc_costs
[AINC_POST_DEC
]
3371 = address_cost (addr
, mem_mode
, as
, speed
);
3373 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3374 || USE_STORE_PRE_DECREMENT (mem_mode
))
3376 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3377 has_preinc
[mem_mode
]
3378 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3380 if (has_preinc
[mem_mode
])
3381 data
->ainc_costs
[AINC_PRE_INC
]
3382 = address_cost (addr
, mem_mode
, as
, speed
);
3384 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3385 || USE_STORE_POST_INCREMENT (mem_mode
))
3387 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3388 has_postinc
[mem_mode
]
3389 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3391 if (has_postinc
[mem_mode
])
3392 data
->ainc_costs
[AINC_POST_INC
]
3393 = address_cost (addr
, mem_mode
, as
, speed
);
3395 for (i
= 0; i
< 16; i
++)
3398 var_p
= (i
>> 1) & 1;
3399 off_p
= (i
>> 2) & 1;
3400 rat_p
= (i
>> 3) & 1;
3404 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3405 gen_int_mode (rat
, address_mode
));
3408 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3412 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3413 /* ??? We can run into trouble with some backends by presenting
3414 it with symbols which haven't been properly passed through
3415 targetm.encode_section_info. By setting the local bit, we
3416 enhance the probability of things working. */
3417 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3420 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3422 (PLUS
, address_mode
, base
,
3423 gen_int_mode (off
, address_mode
)));
3426 base
= gen_int_mode (off
, address_mode
);
3431 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3434 /* To avoid splitting addressing modes, pretend that no cse will
3436 old_cse_not_expected
= cse_not_expected
;
3437 cse_not_expected
= true;
3438 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3439 cse_not_expected
= old_cse_not_expected
;
3443 acost
= seq_cost (seq
, speed
);
3444 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3448 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3451 /* On some targets, it is quite expensive to load symbol to a register,
3452 which makes addresses that contain symbols look much more expensive.
3453 However, the symbol will have to be loaded in any case before the
3454 loop (and quite likely we have it in register already), so it does not
3455 make much sense to penalize them too heavily. So make some final
3456 tweaks for the SYMBOL_PRESENT modes:
3458 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3459 var is cheaper, use this mode with small penalty.
3460 If VAR_PRESENT is true, try whether the mode with
3461 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3462 if this is the case, use it. */
3463 add_c
= add_cost (speed
, address_mode
);
3464 for (i
= 0; i
< 8; i
++)
3467 off_p
= (i
>> 1) & 1;
3468 rat_p
= (i
>> 2) & 1;
3470 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3474 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3475 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3480 fprintf (dump_file
, "Address costs:\n");
3482 for (i
= 0; i
< 16; i
++)
3485 var_p
= (i
>> 1) & 1;
3486 off_p
= (i
>> 2) & 1;
3487 rat_p
= (i
>> 3) & 1;
3489 fprintf (dump_file
, " ");
3491 fprintf (dump_file
, "sym + ");
3493 fprintf (dump_file
, "var + ");
3495 fprintf (dump_file
, "cst + ");
3497 fprintf (dump_file
, "rat * ");
3499 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3500 fprintf (dump_file
, "index costs %d\n", acost
);
3502 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3503 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3504 fprintf (dump_file
, " May include autoinc/dec\n");
3505 fprintf (dump_file
, "\n");
3508 address_cost_data_list
[data_index
] = data
;
3511 bits
= GET_MODE_BITSIZE (address_mode
);
3512 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3514 if ((offset
>> (bits
- 1) & 1))
3519 autoinc_type
= AINC_NONE
;
3520 msize
= GET_MODE_SIZE (mem_mode
);
3521 autoinc_offset
= offset
;
3523 autoinc_offset
+= ratio
* cstep
;
3524 if (symbol_present
|| var_present
|| ratio
!= 1)
3528 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3530 autoinc_type
= AINC_POST_INC
;
3531 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3533 autoinc_type
= AINC_POST_DEC
;
3534 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3536 autoinc_type
= AINC_PRE_INC
;
3537 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3539 autoinc_type
= AINC_PRE_DEC
;
3541 if (autoinc_type
!= AINC_NONE
)
3546 offset_p
= (s_offset
!= 0
3547 && data
->min_offset
<= s_offset
3548 && s_offset
<= data
->max_offset
);
3549 ratio_p
= (ratio
!= 1
3550 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3552 if (ratio
!= 1 && !ratio_p
)
3553 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3555 if (s_offset
&& !offset_p
&& !symbol_present
)
3556 cost
+= add_cost (speed
, address_mode
);
3559 *may_autoinc
= autoinc
;
3561 acost
= data
->ainc_costs
[autoinc_type
];
3563 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3564 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3565 return new_cost (cost
+ acost
, complexity
);
3568 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3569 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3570 calculating the operands of EXPR. Returns true if successful, and returns
3571 the cost in COST. */
3574 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3575 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3578 tree op1
= TREE_OPERAND (expr
, 1);
3579 tree cst
= TREE_OPERAND (mult
, 1);
3580 tree multop
= TREE_OPERAND (mult
, 0);
3581 int m
= exact_log2 (int_cst_value (cst
));
3582 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3584 bool equal_p
= false;
3586 if (!(m
>= 0 && m
< maxm
))
3589 if (operand_equal_p (op1
, mult
, 0))
3592 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3593 ? shiftadd_cost (speed
, mode
, m
)
3595 ? shiftsub1_cost (speed
, mode
, m
)
3596 : shiftsub0_cost (speed
, mode
, m
)));
3597 res
= new_cost (sa_cost
, 0);
3598 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3600 STRIP_NOPS (multop
);
3601 if (!is_gimple_val (multop
))
3602 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3608 /* Estimates cost of forcing expression EXPR into a variable. */
3611 force_expr_to_var_cost (tree expr
, bool speed
)
3613 static bool costs_initialized
= false;
3614 static unsigned integer_cost
[2];
3615 static unsigned symbol_cost
[2];
3616 static unsigned address_cost
[2];
3618 comp_cost cost0
, cost1
, cost
;
3621 if (!costs_initialized
)
3623 tree type
= build_pointer_type (integer_type_node
);
3628 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3629 TREE_STATIC (var
) = 1;
3630 x
= produce_memory_decl_rtl (var
, NULL
);
3631 SET_DECL_RTL (var
, x
);
3633 addr
= build1 (ADDR_EXPR
, type
, var
);
3636 for (i
= 0; i
< 2; i
++)
3638 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3641 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3644 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3647 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3648 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3649 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3650 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3651 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3652 fprintf (dump_file
, "\n");
3656 costs_initialized
= true;
3661 if (SSA_VAR_P (expr
))
3664 if (is_gimple_min_invariant (expr
))
3666 if (TREE_CODE (expr
) == INTEGER_CST
)
3667 return new_cost (integer_cost
[speed
], 0);
3669 if (TREE_CODE (expr
) == ADDR_EXPR
)
3671 tree obj
= TREE_OPERAND (expr
, 0);
3673 if (TREE_CODE (obj
) == VAR_DECL
3674 || TREE_CODE (obj
) == PARM_DECL
3675 || TREE_CODE (obj
) == RESULT_DECL
)
3676 return new_cost (symbol_cost
[speed
], 0);
3679 return new_cost (address_cost
[speed
], 0);
3682 switch (TREE_CODE (expr
))
3684 case POINTER_PLUS_EXPR
:
3688 op0
= TREE_OPERAND (expr
, 0);
3689 op1
= TREE_OPERAND (expr
, 1);
3696 op0
= TREE_OPERAND (expr
, 0);
3702 /* Just an arbitrary value, FIXME. */
3703 return new_cost (target_spill_cost
[speed
], 0);
3706 if (op0
== NULL_TREE
3707 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3710 cost0
= force_expr_to_var_cost (op0
, speed
);
3712 if (op1
== NULL_TREE
3713 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3716 cost1
= force_expr_to_var_cost (op1
, speed
);
3718 mode
= TYPE_MODE (TREE_TYPE (expr
));
3719 switch (TREE_CODE (expr
))
3721 case POINTER_PLUS_EXPR
:
3725 cost
= new_cost (add_cost (speed
, mode
), 0);
3726 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3728 tree mult
= NULL_TREE
;
3730 if (TREE_CODE (op1
) == MULT_EXPR
)
3732 else if (TREE_CODE (op0
) == MULT_EXPR
)
3735 if (mult
!= NULL_TREE
3736 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3737 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3745 tree inner_mode
, outer_mode
;
3746 outer_mode
= TREE_TYPE (expr
);
3747 inner_mode
= TREE_TYPE (op0
);
3748 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3749 TYPE_MODE (inner_mode
), speed
), 0);
3754 if (cst_and_fits_in_hwi (op0
))
3755 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3757 else if (cst_and_fits_in_hwi (op1
))
3758 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3761 return new_cost (target_spill_cost
[speed
], 0);
3768 cost
= add_costs (cost
, cost0
);
3769 cost
= add_costs (cost
, cost1
);
3771 /* Bound the cost by target_spill_cost. The parts of complicated
3772 computations often are either loop invariant or at least can
3773 be shared between several iv uses, so letting this grow without
3774 limits would not give reasonable results. */
3775 if (cost
.cost
> (int) target_spill_cost
[speed
])
3776 cost
.cost
= target_spill_cost
[speed
];
3781 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3782 invariants the computation depends on. */
3785 force_var_cost (struct ivopts_data
*data
,
3786 tree expr
, bitmap
*depends_on
)
3790 fd_ivopts_data
= data
;
3791 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3794 return force_expr_to_var_cost (expr
, data
->speed
);
3797 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3798 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3799 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3800 invariants the computation depends on. */
3803 split_address_cost (struct ivopts_data
*data
,
3804 tree addr
, bool *symbol_present
, bool *var_present
,
3805 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3808 HOST_WIDE_INT bitsize
;
3809 HOST_WIDE_INT bitpos
;
3812 int unsignedp
, volatilep
;
3814 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3815 &unsignedp
, &volatilep
, false);
3818 || bitpos
% BITS_PER_UNIT
!= 0
3819 || TREE_CODE (core
) != VAR_DECL
)
3821 *symbol_present
= false;
3822 *var_present
= true;
3823 fd_ivopts_data
= data
;
3824 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3825 return new_cost (target_spill_cost
[data
->speed
], 0);
3828 *offset
+= bitpos
/ BITS_PER_UNIT
;
3829 if (TREE_STATIC (core
)
3830 || DECL_EXTERNAL (core
))
3832 *symbol_present
= true;
3833 *var_present
= false;
3837 *symbol_present
= false;
3838 *var_present
= true;
3842 /* Estimates cost of expressing difference of addresses E1 - E2 as
3843 var + symbol + offset. The value of offset is added to OFFSET,
3844 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3845 part is missing. DEPENDS_ON is a set of the invariants the computation
3849 ptr_difference_cost (struct ivopts_data
*data
,
3850 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3851 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3853 HOST_WIDE_INT diff
= 0;
3854 aff_tree aff_e1
, aff_e2
;
3857 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3859 if (ptr_difference_const (e1
, e2
, &diff
))
3862 *symbol_present
= false;
3863 *var_present
= false;
3867 if (integer_zerop (e2
))
3868 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3869 symbol_present
, var_present
, offset
, depends_on
);
3871 *symbol_present
= false;
3872 *var_present
= true;
3874 type
= signed_type_for (TREE_TYPE (e1
));
3875 tree_to_aff_combination (e1
, type
, &aff_e1
);
3876 tree_to_aff_combination (e2
, type
, &aff_e2
);
3877 aff_combination_scale (&aff_e2
, -1);
3878 aff_combination_add (&aff_e1
, &aff_e2
);
3880 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3883 /* Estimates cost of expressing difference E1 - E2 as
3884 var + symbol + offset. The value of offset is added to OFFSET,
3885 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3886 part is missing. DEPENDS_ON is a set of the invariants the computation
3890 difference_cost (struct ivopts_data
*data
,
3891 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3892 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3894 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3895 unsigned HOST_WIDE_INT off1
, off2
;
3896 aff_tree aff_e1
, aff_e2
;
3899 e1
= strip_offset (e1
, &off1
);
3900 e2
= strip_offset (e2
, &off2
);
3901 *offset
+= off1
- off2
;
3906 if (TREE_CODE (e1
) == ADDR_EXPR
)
3907 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3908 offset
, depends_on
);
3909 *symbol_present
= false;
3911 if (operand_equal_p (e1
, e2
, 0))
3913 *var_present
= false;
3917 *var_present
= true;
3919 if (integer_zerop (e2
))
3920 return force_var_cost (data
, e1
, depends_on
);
3922 if (integer_zerop (e1
))
3924 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3925 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3929 type
= signed_type_for (TREE_TYPE (e1
));
3930 tree_to_aff_combination (e1
, type
, &aff_e1
);
3931 tree_to_aff_combination (e2
, type
, &aff_e2
);
3932 aff_combination_scale (&aff_e2
, -1);
3933 aff_combination_add (&aff_e1
, &aff_e2
);
3935 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3938 /* Returns true if AFF1 and AFF2 are identical. */
3941 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3945 if (aff1
->n
!= aff2
->n
)
3948 for (i
= 0; i
< aff1
->n
; i
++)
3950 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3953 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3959 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3962 get_expr_id (struct ivopts_data
*data
, tree expr
)
3964 struct iv_inv_expr_ent ent
;
3965 struct iv_inv_expr_ent
**slot
;
3968 ent
.hash
= iterative_hash_expr (expr
, 0);
3969 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3973 *slot
= XNEW (struct iv_inv_expr_ent
);
3974 (*slot
)->expr
= expr
;
3975 (*slot
)->hash
= ent
.hash
;
3976 (*slot
)->id
= data
->inv_expr_id
++;
3980 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3981 requires a new compiler generated temporary. Returns -1 otherwise.
3982 ADDRESS_P is a flag indicating if the expression is for address
3986 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3987 tree cbase
, HOST_WIDE_INT ratio
,
3990 aff_tree ubase_aff
, cbase_aff
;
3998 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3999 && (TREE_CODE (cbase
) == INTEGER_CST
))
4002 /* Strips the constant part. */
4003 if (TREE_CODE (ubase
) == PLUS_EXPR
4004 || TREE_CODE (ubase
) == MINUS_EXPR
4005 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4007 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4008 ubase
= TREE_OPERAND (ubase
, 0);
4011 /* Strips the constant part. */
4012 if (TREE_CODE (cbase
) == PLUS_EXPR
4013 || TREE_CODE (cbase
) == MINUS_EXPR
4014 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4016 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4017 cbase
= TREE_OPERAND (cbase
, 0);
4022 if (((TREE_CODE (ubase
) == SSA_NAME
)
4023 || (TREE_CODE (ubase
) == ADDR_EXPR
4024 && is_gimple_min_invariant (ubase
)))
4025 && (TREE_CODE (cbase
) == INTEGER_CST
))
4028 if (((TREE_CODE (cbase
) == SSA_NAME
)
4029 || (TREE_CODE (cbase
) == ADDR_EXPR
4030 && is_gimple_min_invariant (cbase
)))
4031 && (TREE_CODE (ubase
) == INTEGER_CST
))
4037 if (operand_equal_p (ubase
, cbase
, 0))
4040 if (TREE_CODE (ubase
) == ADDR_EXPR
4041 && TREE_CODE (cbase
) == ADDR_EXPR
)
4045 usym
= TREE_OPERAND (ubase
, 0);
4046 csym
= TREE_OPERAND (cbase
, 0);
4047 if (TREE_CODE (usym
) == ARRAY_REF
)
4049 tree ind
= TREE_OPERAND (usym
, 1);
4050 if (TREE_CODE (ind
) == INTEGER_CST
4051 && tree_fits_shwi_p (ind
)
4052 && tree_to_shwi (ind
) == 0)
4053 usym
= TREE_OPERAND (usym
, 0);
4055 if (TREE_CODE (csym
) == ARRAY_REF
)
4057 tree ind
= TREE_OPERAND (csym
, 1);
4058 if (TREE_CODE (ind
) == INTEGER_CST
4059 && tree_fits_shwi_p (ind
)
4060 && tree_to_shwi (ind
) == 0)
4061 csym
= TREE_OPERAND (csym
, 0);
4063 if (operand_equal_p (usym
, csym
, 0))
4066 /* Now do more complex comparison */
4067 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4068 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4069 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4073 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4074 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4076 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4077 aff_combination_add (&ubase_aff
, &cbase_aff
);
4078 expr
= aff_combination_to_tree (&ubase_aff
);
4079 return get_expr_id (data
, expr
);
4084 /* Determines the cost of the computation by that USE is expressed
4085 from induction variable CAND. If ADDRESS_P is true, we just need
4086 to create an address from it, otherwise we want to get it into
4087 register. A set of invariants we depend on is stored in
4088 DEPENDS_ON. AT is the statement at that the value is computed.
4089 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4090 addressing is likely. */
4093 get_computation_cost_at (struct ivopts_data
*data
,
4094 struct iv_use
*use
, struct iv_cand
*cand
,
4095 bool address_p
, bitmap
*depends_on
, gimple at
,
4099 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4101 tree utype
= TREE_TYPE (ubase
), ctype
;
4102 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4103 HOST_WIDE_INT ratio
, aratio
;
4104 bool var_present
, symbol_present
, stmt_is_after_inc
;
4107 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4108 machine_mode mem_mode
= (address_p
4109 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4114 /* Only consider real candidates. */
4116 return infinite_cost
;
4118 cbase
= cand
->iv
->base
;
4119 cstep
= cand
->iv
->step
;
4120 ctype
= TREE_TYPE (cbase
);
4122 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4124 /* We do not have a precision to express the values of use. */
4125 return infinite_cost
;
4129 || (use
->iv
->base_object
4130 && cand
->iv
->base_object
4131 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4132 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4134 /* Do not try to express address of an object with computation based
4135 on address of a different object. This may cause problems in rtl
4136 level alias analysis (that does not expect this to be happening,
4137 as this is illegal in C), and would be unlikely to be useful
4139 if (use
->iv
->base_object
4140 && cand
->iv
->base_object
4141 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4142 return infinite_cost
;
4145 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4147 /* TODO -- add direct handling of this case. */
4151 /* CSTEPI is removed from the offset in case statement is after the
4152 increment. If the step is not constant, we use zero instead.
4153 This is a bit imprecise (there is the extra addition), but
4154 redundancy elimination is likely to transform the code so that
4155 it uses value of the variable before increment anyway,
4156 so it is not that much unrealistic. */
4157 if (cst_and_fits_in_hwi (cstep
))
4158 cstepi
= int_cst_value (cstep
);
4162 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4163 return infinite_cost
;
4165 if (wi::fits_shwi_p (rat
))
4166 ratio
= rat
.to_shwi ();
4168 return infinite_cost
;
4171 ctype
= TREE_TYPE (cbase
);
4173 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4175 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4176 or ratio == 1, it is better to handle this like
4178 ubase - ratio * cbase + ratio * var
4180 (also holds in the case ratio == -1, TODO. */
4182 if (cst_and_fits_in_hwi (cbase
))
4184 offset
= - ratio
* int_cst_value (cbase
);
4185 cost
= difference_cost (data
,
4186 ubase
, build_int_cst (utype
, 0),
4187 &symbol_present
, &var_present
, &offset
,
4189 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4191 else if (ratio
== 1)
4193 tree real_cbase
= cbase
;
4195 /* Check to see if any adjustment is needed. */
4196 if (cstepi
== 0 && stmt_is_after_inc
)
4198 aff_tree real_cbase_aff
;
4201 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4203 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4205 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4206 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4209 cost
= difference_cost (data
,
4211 &symbol_present
, &var_present
, &offset
,
4213 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4216 && !POINTER_TYPE_P (ctype
)
4217 && multiplier_allowed_in_address_p
4219 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4222 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4223 cost
= difference_cost (data
,
4225 &symbol_present
, &var_present
, &offset
,
4227 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4231 cost
= force_var_cost (data
, cbase
, depends_on
);
4232 cost
= add_costs (cost
,
4233 difference_cost (data
,
4234 ubase
, build_int_cst (utype
, 0),
4235 &symbol_present
, &var_present
,
4236 &offset
, depends_on
));
4237 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4238 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4244 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4245 /* Clear depends on. */
4246 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4247 bitmap_clear (*depends_on
);
4250 /* If we are after the increment, the value of the candidate is higher by
4252 if (stmt_is_after_inc
)
4253 offset
-= ratio
* cstepi
;
4255 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4256 (symbol/var1/const parts may be omitted). If we are looking for an
4257 address, find the cost of addressing this. */
4259 return add_costs (cost
,
4260 get_address_cost (symbol_present
, var_present
,
4261 offset
, ratio
, cstepi
,
4263 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4264 speed
, stmt_is_after_inc
,
4267 /* Otherwise estimate the costs for computing the expression. */
4268 if (!symbol_present
&& !var_present
&& !offset
)
4271 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4275 /* Symbol + offset should be compile-time computable so consider that they
4276 are added once to the variable, if present. */
4277 if (var_present
&& (symbol_present
|| offset
))
4278 cost
.cost
+= adjust_setup_cost (data
,
4279 add_cost (speed
, TYPE_MODE (ctype
)));
4281 /* Having offset does not affect runtime cost in case it is added to
4282 symbol, but it increases complexity. */
4286 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4288 aratio
= ratio
> 0 ? ratio
: -ratio
;
4290 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4295 *can_autoinc
= false;
4298 /* Just get the expression, expand it and measure the cost. */
4299 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4302 return infinite_cost
;
4305 comp
= build_simple_mem_ref (comp
);
4307 return new_cost (computation_cost (comp
, speed
), 0);
4311 /* Determines the cost of the computation by that USE is expressed
4312 from induction variable CAND. If ADDRESS_P is true, we just need
4313 to create an address from it, otherwise we want to get it into
4314 register. A set of invariants we depend on is stored in
4315 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4316 autoinc addressing is likely. */
4319 get_computation_cost (struct ivopts_data
*data
,
4320 struct iv_use
*use
, struct iv_cand
*cand
,
4321 bool address_p
, bitmap
*depends_on
,
4322 bool *can_autoinc
, int *inv_expr_id
)
4324 return get_computation_cost_at (data
,
4325 use
, cand
, address_p
, depends_on
, use
->stmt
,
4326 can_autoinc
, inv_expr_id
);
4329 /* Determines cost of basing replacement of USE on CAND in a generic
4333 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4334 struct iv_use
*use
, struct iv_cand
*cand
)
4338 int inv_expr_id
= -1;
4340 /* The simple case first -- if we need to express value of the preserved
4341 original biv, the cost is 0. This also prevents us from counting the
4342 cost of increment twice -- once at this use and once in the cost of
4344 if (cand
->pos
== IP_ORIGINAL
4345 && cand
->incremented_at
== use
->stmt
)
4347 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4352 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4353 NULL
, &inv_expr_id
);
4355 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4358 return !infinite_cost_p (cost
);
4361 /* Determines cost of basing replacement of USE on CAND in an address. */
4364 determine_use_iv_cost_address (struct ivopts_data
*data
,
4365 struct iv_use
*use
, struct iv_cand
*cand
)
4369 int inv_expr_id
= -1;
4370 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4371 &can_autoinc
, &inv_expr_id
);
4373 if (cand
->ainc_use
== use
)
4376 cost
.cost
-= cand
->cost_step
;
4377 /* If we generated the candidate solely for exploiting autoincrement
4378 opportunities, and it turns out it can't be used, set the cost to
4379 infinity to make sure we ignore it. */
4380 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4381 cost
= infinite_cost
;
4383 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4386 return !infinite_cost_p (cost
);
4389 /* Computes value of candidate CAND at position AT in iteration NITER, and
4390 stores it to VAL. */
4393 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4396 aff_tree step
, delta
, nit
;
4397 struct iv
*iv
= cand
->iv
;
4398 tree type
= TREE_TYPE (iv
->base
);
4399 tree steptype
= type
;
4400 if (POINTER_TYPE_P (type
))
4401 steptype
= sizetype
;
4402 steptype
= unsigned_type_for (type
);
4404 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4405 aff_combination_convert (&step
, steptype
);
4406 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4407 aff_combination_convert (&nit
, steptype
);
4408 aff_combination_mult (&nit
, &step
, &delta
);
4409 if (stmt_after_increment (loop
, cand
, at
))
4410 aff_combination_add (&delta
, &step
);
4412 tree_to_aff_combination (iv
->base
, type
, val
);
4413 if (!POINTER_TYPE_P (type
))
4414 aff_combination_convert (val
, steptype
);
4415 aff_combination_add (val
, &delta
);
4418 /* Returns period of induction variable iv. */
4421 iv_period (struct iv
*iv
)
4423 tree step
= iv
->step
, period
, type
;
4426 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4428 type
= unsigned_type_for (TREE_TYPE (step
));
4429 /* Period of the iv is lcm (step, type_range)/step -1,
4430 i.e., N*type_range/step - 1. Since type range is power
4431 of two, N == (step >> num_of_ending_zeros_binary (step),
4432 so the final result is
4434 (type_range >> num_of_ending_zeros_binary (step)) - 1
4437 pow2div
= num_ending_zeros (step
);
4439 period
= build_low_bits_mask (type
,
4440 (TYPE_PRECISION (type
)
4441 - tree_to_uhwi (pow2div
)));
4446 /* Returns the comparison operator used when eliminating the iv USE. */
4448 static enum tree_code
4449 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4451 struct loop
*loop
= data
->current_loop
;
4455 ex_bb
= gimple_bb (use
->stmt
);
4456 exit
= EDGE_SUCC (ex_bb
, 0);
4457 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4458 exit
= EDGE_SUCC (ex_bb
, 1);
4460 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4463 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4464 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4465 calculation is performed in non-wrapping type.
4467 TODO: More generally, we could test for the situation that
4468 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4469 This would require knowing the sign of OFFSET. */
4472 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4474 enum tree_code code
;
4476 aff_tree aff_e1
, aff_e2
, aff_offset
;
4478 if (!nowrap_type_p (TREE_TYPE (base
)))
4481 base
= expand_simple_operations (base
);
4483 if (TREE_CODE (base
) == SSA_NAME
)
4485 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4487 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4490 code
= gimple_assign_rhs_code (stmt
);
4491 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4494 e1
= gimple_assign_rhs1 (stmt
);
4495 e2
= gimple_assign_rhs2 (stmt
);
4499 code
= TREE_CODE (base
);
4500 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4502 e1
= TREE_OPERAND (base
, 0);
4503 e2
= TREE_OPERAND (base
, 1);
4506 /* Use affine expansion as deeper inspection to prove the equality. */
4507 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4508 &aff_e2
, &data
->name_expansion_cache
);
4509 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4510 &aff_offset
, &data
->name_expansion_cache
);
4511 aff_combination_scale (&aff_offset
, -1);
4515 aff_combination_add (&aff_e2
, &aff_offset
);
4516 if (aff_combination_zero_p (&aff_e2
))
4519 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4520 &aff_e1
, &data
->name_expansion_cache
);
4521 aff_combination_add (&aff_e1
, &aff_offset
);
4522 return aff_combination_zero_p (&aff_e1
);
4524 case POINTER_PLUS_EXPR
:
4525 aff_combination_add (&aff_e2
, &aff_offset
);
4526 return aff_combination_zero_p (&aff_e2
);
4533 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4534 comparison with CAND. NITER describes the number of iterations of
4535 the loops. If successful, the comparison in COMP_P is altered accordingly.
4537 We aim to handle the following situation:
4553 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4554 We aim to optimize this to
4562 while (p < p_0 - a + b);
4564 This preserves the correctness, since the pointer arithmetics does not
4565 overflow. More precisely:
4567 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4568 overflow in computing it or the values of p.
4569 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4570 overflow. To prove this, we use the fact that p_0 = base + a. */
4573 iv_elimination_compare_lt (struct ivopts_data
*data
,
4574 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4575 struct tree_niter_desc
*niter
)
4577 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4578 struct aff_tree nit
, tmpa
, tmpb
;
4579 enum tree_code comp
;
4582 /* We need to know that the candidate induction variable does not overflow.
4583 While more complex analysis may be used to prove this, for now just
4584 check that the variable appears in the original program and that it
4585 is computed in a type that guarantees no overflows. */
4586 cand_type
= TREE_TYPE (cand
->iv
->base
);
4587 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4590 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4591 the calculation of the BOUND could overflow, making the comparison
4593 if (!data
->loop_single_exit_p
)
4596 /* We need to be able to decide whether candidate is increasing or decreasing
4597 in order to choose the right comparison operator. */
4598 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4600 step
= int_cst_value (cand
->iv
->step
);
4602 /* Check that the number of iterations matches the expected pattern:
4603 a + 1 > b ? 0 : b - a - 1. */
4604 mbz
= niter
->may_be_zero
;
4605 if (TREE_CODE (mbz
) == GT_EXPR
)
4607 /* Handle a + 1 > b. */
4608 tree op0
= TREE_OPERAND (mbz
, 0);
4609 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4611 a
= TREE_OPERAND (op0
, 0);
4612 b
= TREE_OPERAND (mbz
, 1);
4617 else if (TREE_CODE (mbz
) == LT_EXPR
)
4619 tree op1
= TREE_OPERAND (mbz
, 1);
4621 /* Handle b < a + 1. */
4622 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4624 a
= TREE_OPERAND (op1
, 0);
4625 b
= TREE_OPERAND (mbz
, 0);
4633 /* Expected number of iterations is B - A - 1. Check that it matches
4634 the actual number, i.e., that B - A - NITER = 1. */
4635 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4636 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4637 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4638 aff_combination_scale (&nit
, -1);
4639 aff_combination_scale (&tmpa
, -1);
4640 aff_combination_add (&tmpb
, &tmpa
);
4641 aff_combination_add (&tmpb
, &nit
);
4642 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4645 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4647 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4649 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4650 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4653 /* Determine the new comparison operator. */
4654 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4655 if (*comp_p
== NE_EXPR
)
4657 else if (*comp_p
== EQ_EXPR
)
4658 *comp_p
= invert_tree_comparison (comp
, false);
4665 /* Check whether it is possible to express the condition in USE by comparison
4666 of candidate CAND. If so, store the value compared with to BOUND, and the
4667 comparison operator to COMP. */
4670 may_eliminate_iv (struct ivopts_data
*data
,
4671 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4672 enum tree_code
*comp
)
4677 struct loop
*loop
= data
->current_loop
;
4679 struct tree_niter_desc
*desc
= NULL
;
4681 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4684 /* For now works only for exits that dominate the loop latch.
4685 TODO: extend to other conditions inside loop body. */
4686 ex_bb
= gimple_bb (use
->stmt
);
4687 if (use
->stmt
!= last_stmt (ex_bb
)
4688 || gimple_code (use
->stmt
) != GIMPLE_COND
4689 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4692 exit
= EDGE_SUCC (ex_bb
, 0);
4693 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4694 exit
= EDGE_SUCC (ex_bb
, 1);
4695 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4698 desc
= niter_for_exit (data
, exit
);
4702 /* Determine whether we can use the variable to test the exit condition.
4703 This is the case iff the period of the induction variable is greater
4704 than the number of iterations for which the exit condition is true. */
4705 period
= iv_period (cand
->iv
);
4707 /* If the number of iterations is constant, compare against it directly. */
4708 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4710 /* See cand_value_at. */
4711 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4713 if (!tree_int_cst_lt (desc
->niter
, period
))
4718 if (tree_int_cst_lt (period
, desc
->niter
))
4723 /* If not, and if this is the only possible exit of the loop, see whether
4724 we can get a conservative estimate on the number of iterations of the
4725 entire loop and compare against that instead. */
4728 widest_int period_value
, max_niter
;
4730 max_niter
= desc
->max
;
4731 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4733 period_value
= wi::to_widest (period
);
4734 if (wi::gtu_p (max_niter
, period_value
))
4736 /* See if we can take advantage of inferred loop bound information. */
4737 if (data
->loop_single_exit_p
)
4739 if (!max_loop_iterations (loop
, &max_niter
))
4741 /* The loop bound is already adjusted by adding 1. */
4742 if (wi::gtu_p (max_niter
, period_value
))
4750 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4752 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4753 aff_combination_to_tree (&bnd
));
4754 *comp
= iv_elimination_compare (data
, use
);
4756 /* It is unlikely that computing the number of iterations using division
4757 would be more profitable than keeping the original induction variable. */
4758 if (expression_expensive_p (*bound
))
4761 /* Sometimes, it is possible to handle the situation that the number of
4762 iterations may be zero unless additional assumtions by using <
4763 instead of != in the exit condition.
4765 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4766 base the exit condition on it. However, that is often too
4768 if (!integer_zerop (desc
->may_be_zero
))
4769 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4774 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4775 be copied, if is is used in the loop body and DATA->body_includes_call. */
4778 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4780 tree sbound
= bound
;
4781 STRIP_NOPS (sbound
);
4783 if (TREE_CODE (sbound
) == SSA_NAME
4784 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4785 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4786 && data
->body_includes_call
)
4787 return COSTS_N_INSNS (1);
4792 /* Determines cost of basing replacement of USE on CAND in a condition. */
4795 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4796 struct iv_use
*use
, struct iv_cand
*cand
)
4798 tree bound
= NULL_TREE
;
4800 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4801 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4803 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4804 tree
*control_var
, *bound_cst
;
4805 enum tree_code comp
= ERROR_MARK
;
4807 /* Only consider real candidates. */
4810 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4815 /* Try iv elimination. */
4816 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4818 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4819 if (elim_cost
.cost
== 0)
4820 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4821 else if (TREE_CODE (bound
) == INTEGER_CST
)
4823 /* If we replace a loop condition 'i < n' with 'p < base + n',
4824 depends_on_elim will have 'base' and 'n' set, which implies
4825 that both 'base' and 'n' will be live during the loop. More likely,
4826 'base + n' will be loop invariant, resulting in only one live value
4827 during the loop. So in that case we clear depends_on_elim and set
4828 elim_inv_expr_id instead. */
4829 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4831 elim_inv_expr_id
= get_expr_id (data
, bound
);
4832 bitmap_clear (depends_on_elim
);
4834 /* The bound is a loop invariant, so it will be only computed
4836 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4839 elim_cost
= infinite_cost
;
4841 /* Try expressing the original giv. If it is compared with an invariant,
4842 note that we cannot get rid of it. */
4843 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4847 /* When the condition is a comparison of the candidate IV against
4848 zero, prefer this IV.
4850 TODO: The constant that we're subtracting from the cost should
4851 be target-dependent. This information should be added to the
4852 target costs for each backend. */
4853 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4854 && integer_zerop (*bound_cst
)
4855 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4856 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4857 elim_cost
.cost
-= 1;
4859 express_cost
= get_computation_cost (data
, use
, cand
, false,
4860 &depends_on_express
, NULL
,
4861 &express_inv_expr_id
);
4862 fd_ivopts_data
= data
;
4863 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4865 /* Count the cost of the original bound as well. */
4866 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4867 if (bound_cost
.cost
== 0)
4868 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4869 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4870 bound_cost
.cost
= 0;
4871 express_cost
.cost
+= bound_cost
.cost
;
4873 /* Choose the better approach, preferring the eliminated IV. */
4874 if (compare_costs (elim_cost
, express_cost
) <= 0)
4877 depends_on
= depends_on_elim
;
4878 depends_on_elim
= NULL
;
4879 inv_expr_id
= elim_inv_expr_id
;
4883 cost
= express_cost
;
4884 depends_on
= depends_on_express
;
4885 depends_on_express
= NULL
;
4888 inv_expr_id
= express_inv_expr_id
;
4891 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4893 if (depends_on_elim
)
4894 BITMAP_FREE (depends_on_elim
);
4895 if (depends_on_express
)
4896 BITMAP_FREE (depends_on_express
);
4898 return !infinite_cost_p (cost
);
4901 /* Determines cost of basing replacement of USE on CAND. Returns false
4902 if USE cannot be based on CAND. */
4905 determine_use_iv_cost (struct ivopts_data
*data
,
4906 struct iv_use
*use
, struct iv_cand
*cand
)
4910 case USE_NONLINEAR_EXPR
:
4911 return determine_use_iv_cost_generic (data
, use
, cand
);
4914 return determine_use_iv_cost_address (data
, use
, cand
);
4917 return determine_use_iv_cost_condition (data
, use
, cand
);
4924 /* Return true if get_computation_cost indicates that autoincrement is
4925 a possibility for the pair of USE and CAND, false otherwise. */
4928 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4929 struct iv_cand
*cand
)
4935 if (use
->type
!= USE_ADDRESS
)
4938 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4939 &can_autoinc
, NULL
);
4941 BITMAP_FREE (depends_on
);
4943 return !infinite_cost_p (cost
) && can_autoinc
;
4946 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4947 use that allows autoincrement, and set their AINC_USE if possible. */
4950 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4954 for (i
= 0; i
< n_iv_cands (data
); i
++)
4956 struct iv_cand
*cand
= iv_cand (data
, i
);
4957 struct iv_use
*closest_before
= NULL
;
4958 struct iv_use
*closest_after
= NULL
;
4959 if (cand
->pos
!= IP_ORIGINAL
)
4962 for (j
= 0; j
< n_iv_uses (data
); j
++)
4964 struct iv_use
*use
= iv_use (data
, j
);
4965 unsigned uid
= gimple_uid (use
->stmt
);
4967 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4970 if (uid
< gimple_uid (cand
->incremented_at
)
4971 && (closest_before
== NULL
4972 || uid
> gimple_uid (closest_before
->stmt
)))
4973 closest_before
= use
;
4975 if (uid
> gimple_uid (cand
->incremented_at
)
4976 && (closest_after
== NULL
4977 || uid
< gimple_uid (closest_after
->stmt
)))
4978 closest_after
= use
;
4981 if (closest_before
!= NULL
4982 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4983 cand
->ainc_use
= closest_before
;
4984 else if (closest_after
!= NULL
4985 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4986 cand
->ainc_use
= closest_after
;
4990 /* Finds the candidates for the induction variables. */
4993 find_iv_candidates (struct ivopts_data
*data
)
4995 /* Add commonly used ivs. */
4996 add_standard_iv_candidates (data
);
4998 /* Add old induction variables. */
4999 add_old_ivs_candidates (data
);
5001 /* Add induction variables derived from uses. */
5002 add_derived_ivs_candidates (data
);
5004 set_autoinc_for_original_candidates (data
);
5006 /* Record the important candidates. */
5007 record_important_candidates (data
);
5010 /* Determines costs of basing the use of the iv on an iv candidate. */
5013 determine_use_iv_costs (struct ivopts_data
*data
)
5017 struct iv_cand
*cand
;
5018 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5020 alloc_use_cost_map (data
);
5022 for (i
= 0; i
< n_iv_uses (data
); i
++)
5024 use
= iv_use (data
, i
);
5026 if (data
->consider_all_candidates
)
5028 for (j
= 0; j
< n_iv_cands (data
); j
++)
5030 cand
= iv_cand (data
, j
);
5031 determine_use_iv_cost (data
, use
, cand
);
5038 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5040 cand
= iv_cand (data
, j
);
5041 if (!determine_use_iv_cost (data
, use
, cand
))
5042 bitmap_set_bit (to_clear
, j
);
5045 /* Remove the candidates for that the cost is infinite from
5046 the list of related candidates. */
5047 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5048 bitmap_clear (to_clear
);
5052 BITMAP_FREE (to_clear
);
5054 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5056 fprintf (dump_file
, "Use-candidate costs:\n");
5058 for (i
= 0; i
< n_iv_uses (data
); i
++)
5060 use
= iv_use (data
, i
);
5062 fprintf (dump_file
, "Use %d:\n", i
);
5063 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5064 for (j
= 0; j
< use
->n_map_members
; j
++)
5066 if (!use
->cost_map
[j
].cand
5067 || infinite_cost_p (use
->cost_map
[j
].cost
))
5070 fprintf (dump_file
, " %d\t%d\t%d\t",
5071 use
->cost_map
[j
].cand
->id
,
5072 use
->cost_map
[j
].cost
.cost
,
5073 use
->cost_map
[j
].cost
.complexity
);
5074 if (use
->cost_map
[j
].depends_on
)
5075 bitmap_print (dump_file
,
5076 use
->cost_map
[j
].depends_on
, "","");
5077 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5078 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5079 fprintf (dump_file
, "\n");
5082 fprintf (dump_file
, "\n");
5084 fprintf (dump_file
, "\n");
5088 /* Determines cost of the candidate CAND. */
5091 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5093 comp_cost cost_base
;
5094 unsigned cost
, cost_step
;
5103 /* There are two costs associated with the candidate -- its increment
5104 and its initialization. The second is almost negligible for any loop
5105 that rolls enough, so we take it just very little into account. */
5107 base
= cand
->iv
->base
;
5108 cost_base
= force_var_cost (data
, base
, NULL
);
5109 /* It will be exceptional that the iv register happens to be initialized with
5110 the proper value at no cost. In general, there will at least be a regcopy
5112 if (cost_base
.cost
== 0)
5113 cost_base
.cost
= COSTS_N_INSNS (1);
5114 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5116 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5118 /* Prefer the original ivs unless we may gain something by replacing it.
5119 The reason is to make debugging simpler; so this is not relevant for
5120 artificial ivs created by other optimization passes. */
5121 if (cand
->pos
!= IP_ORIGINAL
5122 || !SSA_NAME_VAR (cand
->var_before
)
5123 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5126 /* Prefer not to insert statements into latch unless there are some
5127 already (so that we do not create unnecessary jumps). */
5128 if (cand
->pos
== IP_END
5129 && empty_block_p (ip_end_pos (data
->current_loop
)))
5133 cand
->cost_step
= cost_step
;
5136 /* Determines costs of computation of the candidates. */
5139 determine_iv_costs (struct ivopts_data
*data
)
5143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5145 fprintf (dump_file
, "Candidate costs:\n");
5146 fprintf (dump_file
, " cand\tcost\n");
5149 for (i
= 0; i
< n_iv_cands (data
); i
++)
5151 struct iv_cand
*cand
= iv_cand (data
, i
);
5153 determine_iv_cost (data
, cand
);
5155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5156 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5159 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5160 fprintf (dump_file
, "\n");
5163 /* Calculates cost for having SIZE induction variables. */
5166 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5168 /* We add size to the cost, so that we prefer eliminating ivs
5170 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5171 data
->body_includes_call
);
5174 /* For each size of the induction variable set determine the penalty. */
5177 determine_set_costs (struct ivopts_data
*data
)
5181 gimple_stmt_iterator psi
;
5183 struct loop
*loop
= data
->current_loop
;
5186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5188 fprintf (dump_file
, "Global costs:\n");
5189 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5190 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5191 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5192 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5196 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5198 phi
= gsi_stmt (psi
);
5199 op
= PHI_RESULT (phi
);
5201 if (virtual_operand_p (op
))
5204 if (get_iv (data
, op
))
5210 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5212 struct version_info
*info
= ver_info (data
, j
);
5214 if (info
->inv_id
&& info
->has_nonlin_use
)
5218 data
->regs_used
= n
;
5219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5220 fprintf (dump_file
, " regs_used %d\n", n
);
5222 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5224 fprintf (dump_file
, " cost for size:\n");
5225 fprintf (dump_file
, " ivs\tcost\n");
5226 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5227 fprintf (dump_file
, " %d\t%d\n", j
,
5228 ivopts_global_cost_for_size (data
, j
));
5229 fprintf (dump_file
, "\n");
5233 /* Returns true if A is a cheaper cost pair than B. */
5236 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5246 cmp
= compare_costs (a
->cost
, b
->cost
);
5253 /* In case the costs are the same, prefer the cheaper candidate. */
5254 if (a
->cand
->cost
< b
->cand
->cost
)
5261 /* Returns candidate by that USE is expressed in IVS. */
5263 static struct cost_pair
*
5264 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5266 return ivs
->cand_for_use
[use
->id
];
5269 /* Computes the cost field of IVS structure. */
5272 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5274 comp_cost cost
= ivs
->cand_use_cost
;
5276 cost
.cost
+= ivs
->cand_cost
;
5278 cost
.cost
+= ivopts_global_cost_for_size (data
,
5279 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5284 /* Remove invariants in set INVS to set IVS. */
5287 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5295 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5297 ivs
->n_invariant_uses
[iid
]--;
5298 if (ivs
->n_invariant_uses
[iid
] == 0)
5303 /* Set USE not to be expressed by any candidate in IVS. */
5306 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5309 unsigned uid
= use
->id
, cid
;
5310 struct cost_pair
*cp
;
5312 cp
= ivs
->cand_for_use
[uid
];
5318 ivs
->cand_for_use
[uid
] = NULL
;
5319 ivs
->n_cand_uses
[cid
]--;
5321 if (ivs
->n_cand_uses
[cid
] == 0)
5323 bitmap_clear_bit (ivs
->cands
, cid
);
5324 /* Do not count the pseudocandidates. */
5328 ivs
->cand_cost
-= cp
->cand
->cost
;
5330 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5333 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5335 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5337 if (cp
->inv_expr_id
!= -1)
5339 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5340 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5341 ivs
->num_used_inv_expr
--;
5343 iv_ca_recount_cost (data
, ivs
);
5346 /* Add invariants in set INVS to set IVS. */
5349 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5357 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5359 ivs
->n_invariant_uses
[iid
]++;
5360 if (ivs
->n_invariant_uses
[iid
] == 1)
5365 /* Set cost pair for USE in set IVS to CP. */
5368 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5369 struct iv_use
*use
, struct cost_pair
*cp
)
5371 unsigned uid
= use
->id
, cid
;
5373 if (ivs
->cand_for_use
[uid
] == cp
)
5376 if (ivs
->cand_for_use
[uid
])
5377 iv_ca_set_no_cp (data
, ivs
, use
);
5384 ivs
->cand_for_use
[uid
] = cp
;
5385 ivs
->n_cand_uses
[cid
]++;
5386 if (ivs
->n_cand_uses
[cid
] == 1)
5388 bitmap_set_bit (ivs
->cands
, cid
);
5389 /* Do not count the pseudocandidates. */
5393 ivs
->cand_cost
+= cp
->cand
->cost
;
5395 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5398 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5399 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5401 if (cp
->inv_expr_id
!= -1)
5403 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5404 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5405 ivs
->num_used_inv_expr
++;
5407 iv_ca_recount_cost (data
, ivs
);
5411 /* Extend set IVS by expressing USE by some of the candidates in it
5412 if possible. Consider all important candidates if candidates in
5413 set IVS don't give any result. */
5416 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5419 struct cost_pair
*best_cp
= NULL
, *cp
;
5422 struct iv_cand
*cand
;
5424 gcc_assert (ivs
->upto
>= use
->id
);
5428 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5430 cand
= iv_cand (data
, i
);
5431 cp
= get_use_iv_cost (data
, use
, cand
);
5432 if (cheaper_cost_pair (cp
, best_cp
))
5436 if (best_cp
== NULL
)
5438 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5440 cand
= iv_cand (data
, i
);
5441 cp
= get_use_iv_cost (data
, use
, cand
);
5442 if (cheaper_cost_pair (cp
, best_cp
))
5447 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5450 /* Get cost for assignment IVS. */
5453 iv_ca_cost (struct iv_ca
*ivs
)
5455 /* This was a conditional expression but it triggered a bug in
5458 return infinite_cost
;
5463 /* Returns true if all dependences of CP are among invariants in IVS. */
5466 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5471 if (!cp
->depends_on
)
5474 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5476 if (ivs
->n_invariant_uses
[i
] == 0)
5483 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5484 it before NEXT_CHANGE. */
5486 static struct iv_ca_delta
*
5487 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5488 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5490 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5493 change
->old_cp
= old_cp
;
5494 change
->new_cp
= new_cp
;
5495 change
->next_change
= next_change
;
5500 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5503 static struct iv_ca_delta
*
5504 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5506 struct iv_ca_delta
*last
;
5514 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5516 last
->next_change
= l2
;
5521 /* Reverse the list of changes DELTA, forming the inverse to it. */
5523 static struct iv_ca_delta
*
5524 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5526 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5527 struct cost_pair
*tmp
;
5529 for (act
= delta
; act
; act
= next
)
5531 next
= act
->next_change
;
5532 act
->next_change
= prev
;
5536 act
->old_cp
= act
->new_cp
;
5543 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5544 reverted instead. */
5547 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5548 struct iv_ca_delta
*delta
, bool forward
)
5550 struct cost_pair
*from
, *to
;
5551 struct iv_ca_delta
*act
;
5554 delta
= iv_ca_delta_reverse (delta
);
5556 for (act
= delta
; act
; act
= act
->next_change
)
5560 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5561 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5565 iv_ca_delta_reverse (delta
);
5568 /* Returns true if CAND is used in IVS. */
5571 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5573 return ivs
->n_cand_uses
[cand
->id
] > 0;
5576 /* Returns number of induction variable candidates in the set IVS. */
5579 iv_ca_n_cands (struct iv_ca
*ivs
)
5581 return ivs
->n_cands
;
5584 /* Free the list of changes DELTA. */
5587 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5589 struct iv_ca_delta
*act
, *next
;
5591 for (act
= *delta
; act
; act
= next
)
5593 next
= act
->next_change
;
5600 /* Allocates new iv candidates assignment. */
5602 static struct iv_ca
*
5603 iv_ca_new (struct ivopts_data
*data
)
5605 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5609 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5610 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5611 nw
->cands
= BITMAP_ALLOC (NULL
);
5614 nw
->cand_use_cost
= no_cost
;
5616 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5618 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5619 nw
->num_used_inv_expr
= 0;
5624 /* Free memory occupied by the set IVS. */
5627 iv_ca_free (struct iv_ca
**ivs
)
5629 free ((*ivs
)->cand_for_use
);
5630 free ((*ivs
)->n_cand_uses
);
5631 BITMAP_FREE ((*ivs
)->cands
);
5632 free ((*ivs
)->n_invariant_uses
);
5633 free ((*ivs
)->used_inv_expr
);
5638 /* Dumps IVS to FILE. */
5641 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5643 const char *pref
= " invariants ";
5645 comp_cost cost
= iv_ca_cost (ivs
);
5647 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5648 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5649 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5650 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5652 for (i
= 0; i
< ivs
->upto
; i
++)
5654 struct iv_use
*use
= iv_use (data
, i
);
5655 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5657 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5658 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5660 fprintf (file
, " use:%d --> ??\n", use
->id
);
5663 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5664 if (ivs
->n_invariant_uses
[i
])
5666 fprintf (file
, "%s%d", pref
, i
);
5669 fprintf (file
, "\n\n");
5672 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5673 new set, and store differences in DELTA. Number of induction variables
5674 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5675 the function will try to find a solution with mimimal iv candidates. */
5678 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5679 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5680 unsigned *n_ivs
, bool min_ncand
)
5685 struct cost_pair
*old_cp
, *new_cp
;
5688 for (i
= 0; i
< ivs
->upto
; i
++)
5690 use
= iv_use (data
, i
);
5691 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5694 && old_cp
->cand
== cand
)
5697 new_cp
= get_use_iv_cost (data
, use
, cand
);
5701 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5704 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5707 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5710 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5711 cost
= iv_ca_cost (ivs
);
5713 *n_ivs
= iv_ca_n_cands (ivs
);
5714 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5719 /* Try narrowing set IVS by removing CAND. Return the cost of
5720 the new set and store the differences in DELTA. START is
5721 the candidate with which we start narrowing. */
5724 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5725 struct iv_cand
*cand
, struct iv_cand
*start
,
5726 struct iv_ca_delta
**delta
)
5730 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5732 struct iv_cand
*cnd
;
5733 comp_cost cost
, best_cost
, acost
;
5736 for (i
= 0; i
< n_iv_uses (data
); i
++)
5738 use
= iv_use (data
, i
);
5740 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5741 if (old_cp
->cand
!= cand
)
5744 best_cost
= iv_ca_cost (ivs
);
5745 /* Start narrowing with START. */
5746 new_cp
= get_use_iv_cost (data
, use
, start
);
5748 if (data
->consider_all_candidates
)
5750 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5752 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5755 cnd
= iv_cand (data
, ci
);
5757 cp
= get_use_iv_cost (data
, use
, cnd
);
5761 iv_ca_set_cp (data
, ivs
, use
, cp
);
5762 acost
= iv_ca_cost (ivs
);
5764 if (compare_costs (acost
, best_cost
) < 0)
5773 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5775 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5778 cnd
= iv_cand (data
, ci
);
5780 cp
= get_use_iv_cost (data
, use
, cnd
);
5784 iv_ca_set_cp (data
, ivs
, use
, cp
);
5785 acost
= iv_ca_cost (ivs
);
5787 if (compare_costs (acost
, best_cost
) < 0)
5794 /* Restore to old cp for use. */
5795 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5799 iv_ca_delta_free (delta
);
5800 return infinite_cost
;
5803 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5806 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5807 cost
= iv_ca_cost (ivs
);
5808 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5813 /* Try optimizing the set of candidates IVS by removing candidates different
5814 from to EXCEPT_CAND from it. Return cost of the new set, and store
5815 differences in DELTA. */
5818 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5819 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5822 struct iv_ca_delta
*act_delta
, *best_delta
;
5824 comp_cost best_cost
, acost
;
5825 struct iv_cand
*cand
;
5828 best_cost
= iv_ca_cost (ivs
);
5830 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5832 cand
= iv_cand (data
, i
);
5834 if (cand
== except_cand
)
5837 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5839 if (compare_costs (acost
, best_cost
) < 0)
5842 iv_ca_delta_free (&best_delta
);
5843 best_delta
= act_delta
;
5846 iv_ca_delta_free (&act_delta
);
5855 /* Recurse to possibly remove other unnecessary ivs. */
5856 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5857 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5858 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5859 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5863 /* Tries to extend the sets IVS in the best possible way in order
5864 to express the USE. If ORIGINALP is true, prefer candidates from
5865 the original set of IVs, otherwise favor important candidates not
5866 based on any memory object. */
5869 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5870 struct iv_use
*use
, bool originalp
)
5872 comp_cost best_cost
, act_cost
;
5875 struct iv_cand
*cand
;
5876 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5877 struct cost_pair
*cp
;
5879 iv_ca_add_use (data
, ivs
, use
);
5880 best_cost
= iv_ca_cost (ivs
);
5881 cp
= iv_ca_cand_for_use (ivs
, use
);
5884 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5885 iv_ca_set_no_cp (data
, ivs
, use
);
5888 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5889 first try important candidates not based on any memory object. Only if
5890 this fails, try the specific ones. Rationale -- in loops with many
5891 variables the best choice often is to use just one generic biv. If we
5892 added here many ivs specific to the uses, the optimization algorithm later
5893 would be likely to get stuck in a local minimum, thus causing us to create
5894 too many ivs. The approach from few ivs to more seems more likely to be
5895 successful -- starting from few ivs, replacing an expensive use by a
5896 specific iv should always be a win. */
5897 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5899 cand
= iv_cand (data
, i
);
5901 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5904 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5907 if (iv_ca_cand_used_p (ivs
, cand
))
5910 cp
= get_use_iv_cost (data
, use
, cand
);
5914 iv_ca_set_cp (data
, ivs
, use
, cp
);
5915 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5917 iv_ca_set_no_cp (data
, ivs
, use
);
5918 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5920 if (compare_costs (act_cost
, best_cost
) < 0)
5922 best_cost
= act_cost
;
5924 iv_ca_delta_free (&best_delta
);
5925 best_delta
= act_delta
;
5928 iv_ca_delta_free (&act_delta
);
5931 if (infinite_cost_p (best_cost
))
5933 for (i
= 0; i
< use
->n_map_members
; i
++)
5935 cp
= use
->cost_map
+ i
;
5940 /* Already tried this. */
5941 if (cand
->important
)
5943 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5945 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5949 if (iv_ca_cand_used_p (ivs
, cand
))
5953 iv_ca_set_cp (data
, ivs
, use
, cp
);
5954 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5955 iv_ca_set_no_cp (data
, ivs
, use
);
5956 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5959 if (compare_costs (act_cost
, best_cost
) < 0)
5961 best_cost
= act_cost
;
5964 iv_ca_delta_free (&best_delta
);
5965 best_delta
= act_delta
;
5968 iv_ca_delta_free (&act_delta
);
5972 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5973 iv_ca_delta_free (&best_delta
);
5975 return !infinite_cost_p (best_cost
);
5978 /* Finds an initial assignment of candidates to uses. */
5980 static struct iv_ca
*
5981 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5983 struct iv_ca
*ivs
= iv_ca_new (data
);
5986 for (i
= 0; i
< n_iv_uses (data
); i
++)
5987 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5996 /* Tries to improve set of induction variables IVS. */
5999 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6002 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6003 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6004 struct iv_cand
*cand
;
6006 /* Try extending the set of induction variables by one. */
6007 for (i
= 0; i
< n_iv_cands (data
); i
++)
6009 cand
= iv_cand (data
, i
);
6011 if (iv_ca_cand_used_p (ivs
, cand
))
6014 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6018 /* If we successfully added the candidate and the set is small enough,
6019 try optimizing it by removing other candidates. */
6020 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6022 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6023 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6024 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6025 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6028 if (compare_costs (acost
, best_cost
) < 0)
6031 iv_ca_delta_free (&best_delta
);
6032 best_delta
= act_delta
;
6035 iv_ca_delta_free (&act_delta
);
6040 /* Try removing the candidates from the set instead. */
6041 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6043 /* Nothing more we can do. */
6048 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6049 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6050 iv_ca_delta_free (&best_delta
);
6054 /* Attempts to find the optimal set of induction variables. We do simple
6055 greedy heuristic -- we try to replace at most one candidate in the selected
6056 solution and remove the unused ivs while this improves the cost. */
6058 static struct iv_ca
*
6059 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6063 /* Get the initial solution. */
6064 set
= get_initial_solution (data
, originalp
);
6067 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6068 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6072 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6074 fprintf (dump_file
, "Initial set of candidates:\n");
6075 iv_ca_dump (data
, dump_file
, set
);
6078 while (try_improve_iv_set (data
, set
))
6080 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6082 fprintf (dump_file
, "Improved to:\n");
6083 iv_ca_dump (data
, dump_file
, set
);
6090 static struct iv_ca
*
6091 find_optimal_iv_set (struct ivopts_data
*data
)
6094 struct iv_ca
*set
, *origset
;
6096 comp_cost cost
, origcost
;
6098 /* Determine the cost based on a strategy that starts with original IVs,
6099 and try again using a strategy that prefers candidates not based
6101 origset
= find_optimal_iv_set_1 (data
, true);
6102 set
= find_optimal_iv_set_1 (data
, false);
6104 if (!origset
&& !set
)
6107 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6108 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6112 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6113 origcost
.cost
, origcost
.complexity
);
6114 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6115 cost
.cost
, cost
.complexity
);
6118 /* Choose the one with the best cost. */
6119 if (compare_costs (origcost
, cost
) <= 0)
6126 iv_ca_free (&origset
);
6128 for (i
= 0; i
< n_iv_uses (data
); i
++)
6130 use
= iv_use (data
, i
);
6131 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6137 /* Creates a new induction variable corresponding to CAND. */
6140 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6142 gimple_stmt_iterator incr_pos
;
6152 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6156 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6164 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6168 /* Mark that the iv is preserved. */
6169 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6170 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6172 /* Rewrite the increment so that it uses var_before directly. */
6173 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6177 gimple_add_tmp_var (cand
->var_before
);
6179 base
= unshare_expr (cand
->iv
->base
);
6181 create_iv (base
, unshare_expr (cand
->iv
->step
),
6182 cand
->var_before
, data
->current_loop
,
6183 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6186 /* Creates new induction variables described in SET. */
6189 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6192 struct iv_cand
*cand
;
6195 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6197 cand
= iv_cand (data
, i
);
6198 create_new_iv (data
, cand
);
6201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6203 fprintf (dump_file
, "\nSelected IV set: \n");
6204 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6206 cand
= iv_cand (data
, i
);
6207 dump_cand (dump_file
, cand
);
6209 fprintf (dump_file
, "\n");
6213 /* Rewrites USE (definition of iv used in a nonlinear expression)
6214 using candidate CAND. */
6217 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6218 struct iv_use
*use
, struct iv_cand
*cand
)
6223 gimple_stmt_iterator bsi
;
6225 /* An important special case -- if we are asked to express value of
6226 the original iv by itself, just exit; there is no need to
6227 introduce a new computation (that might also need casting the
6228 variable to unsigned and back). */
6229 if (cand
->pos
== IP_ORIGINAL
6230 && cand
->incremented_at
== use
->stmt
)
6232 enum tree_code stmt_code
;
6234 gcc_assert (is_gimple_assign (use
->stmt
));
6235 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6237 /* Check whether we may leave the computation unchanged.
6238 This is the case only if it does not rely on other
6239 computations in the loop -- otherwise, the computation
6240 we rely upon may be removed in remove_unused_ivs,
6241 thus leading to ICE. */
6242 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6243 if (stmt_code
== PLUS_EXPR
6244 || stmt_code
== MINUS_EXPR
6245 || stmt_code
== POINTER_PLUS_EXPR
)
6247 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6248 op
= gimple_assign_rhs2 (use
->stmt
);
6249 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6250 op
= gimple_assign_rhs1 (use
->stmt
);
6257 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6261 comp
= get_computation (data
->current_loop
, use
, cand
);
6262 gcc_assert (comp
!= NULL_TREE
);
6264 switch (gimple_code (use
->stmt
))
6267 tgt
= PHI_RESULT (use
->stmt
);
6269 /* If we should keep the biv, do not replace it. */
6270 if (name_info (data
, tgt
)->preserve_biv
)
6273 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6277 tgt
= gimple_assign_lhs (use
->stmt
);
6278 bsi
= gsi_for_stmt (use
->stmt
);
6285 if (!valid_gimple_rhs_p (comp
)
6286 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6287 /* We can't allow re-allocating the stmt as it might be pointed
6289 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6290 >= gimple_num_ops (gsi_stmt (bsi
)))))
6292 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6293 true, GSI_SAME_STMT
);
6294 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6296 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6297 /* As this isn't a plain copy we have to reset alignment
6299 if (SSA_NAME_PTR_INFO (comp
))
6300 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6304 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6306 ass
= gimple_build_assign (tgt
, comp
);
6307 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6309 bsi
= gsi_for_stmt (use
->stmt
);
6310 remove_phi_node (&bsi
, false);
6314 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6315 use
->stmt
= gsi_stmt (bsi
);
6319 /* Performs a peephole optimization to reorder the iv update statement with
6320 a mem ref to enable instruction combining in later phases. The mem ref uses
6321 the iv value before the update, so the reordering transformation requires
6322 adjustment of the offset. CAND is the selected IV_CAND.
6326 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6334 directly propagating t over to (1) will introduce overlapping live range
6335 thus increase register pressure. This peephole transform it into:
6339 t = MEM_REF (base, iv2, 8, 8);
6346 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6349 gimple iv_update
, stmt
;
6351 gimple_stmt_iterator gsi
, gsi_iv
;
6353 if (cand
->pos
!= IP_NORMAL
)
6356 var_after
= cand
->var_after
;
6357 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6359 bb
= gimple_bb (iv_update
);
6360 gsi
= gsi_last_nondebug_bb (bb
);
6361 stmt
= gsi_stmt (gsi
);
6363 /* Only handle conditional statement for now. */
6364 if (gimple_code (stmt
) != GIMPLE_COND
)
6367 gsi_prev_nondebug (&gsi
);
6368 stmt
= gsi_stmt (gsi
);
6369 if (stmt
!= iv_update
)
6372 gsi_prev_nondebug (&gsi
);
6373 if (gsi_end_p (gsi
))
6376 stmt
= gsi_stmt (gsi
);
6377 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6380 if (stmt
!= use
->stmt
)
6383 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6388 fprintf (dump_file
, "Reordering \n");
6389 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6390 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6391 fprintf (dump_file
, "\n");
6394 gsi
= gsi_for_stmt (use
->stmt
);
6395 gsi_iv
= gsi_for_stmt (iv_update
);
6396 gsi_move_before (&gsi_iv
, &gsi
);
6398 cand
->pos
= IP_BEFORE_USE
;
6399 cand
->incremented_at
= use
->stmt
;
6402 /* Rewrites USE (address that is an iv) using candidate CAND. */
6405 rewrite_use_address (struct ivopts_data
*data
,
6406 struct iv_use
*use
, struct iv_cand
*cand
)
6409 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6410 tree base_hint
= NULL_TREE
;
6414 adjust_iv_update_pos (cand
, use
);
6415 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6417 unshare_aff_combination (&aff
);
6419 /* To avoid undefined overflow problems, all IV candidates use unsigned
6420 integer types. The drawback is that this makes it impossible for
6421 create_mem_ref to distinguish an IV that is based on a memory object
6422 from one that represents simply an offset.
6424 To work around this problem, we pass a hint to create_mem_ref that
6425 indicates which variable (if any) in aff is an IV based on a memory
6426 object. Note that we only consider the candidate. If this is not
6427 based on an object, the base of the reference is in some subexpression
6428 of the use -- but these will use pointer types, so they are recognized
6429 by the create_mem_ref heuristics anyway. */
6430 if (cand
->iv
->base_object
)
6431 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6433 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6434 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6435 reference_alias_ptr_type (*use
->op_p
),
6436 iv
, base_hint
, data
->speed
);
6437 copy_ref_info (ref
, *use
->op_p
);
6441 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6445 rewrite_use_compare (struct ivopts_data
*data
,
6446 struct iv_use
*use
, struct iv_cand
*cand
)
6448 tree comp
, *var_p
, op
, bound
;
6449 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6450 enum tree_code compare
;
6451 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6457 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6458 tree var_type
= TREE_TYPE (var
);
6461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6463 fprintf (dump_file
, "Replacing exit test: ");
6464 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6467 bound
= unshare_expr (fold_convert (var_type
, bound
));
6468 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6470 gsi_insert_seq_on_edge_immediate (
6471 loop_preheader_edge (data
->current_loop
),
6474 gimple_cond_set_lhs (use
->stmt
, var
);
6475 gimple_cond_set_code (use
->stmt
, compare
);
6476 gimple_cond_set_rhs (use
->stmt
, op
);
6480 /* The induction variable elimination failed; just express the original
6482 comp
= get_computation (data
->current_loop
, use
, cand
);
6483 gcc_assert (comp
!= NULL_TREE
);
6485 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6488 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6489 true, GSI_SAME_STMT
);
6492 /* Rewrites USE using candidate CAND. */
6495 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6499 case USE_NONLINEAR_EXPR
:
6500 rewrite_use_nonlinear_expr (data
, use
, cand
);
6504 rewrite_use_address (data
, use
, cand
);
6508 rewrite_use_compare (data
, use
, cand
);
6515 update_stmt (use
->stmt
);
6518 /* Rewrite the uses using the selected induction variables. */
6521 rewrite_uses (struct ivopts_data
*data
)
6524 struct iv_cand
*cand
;
6527 for (i
= 0; i
< n_iv_uses (data
); i
++)
6529 use
= iv_use (data
, i
);
6530 cand
= use
->selected
;
6533 rewrite_use (data
, use
, cand
);
6537 /* Removes the ivs that are not used after rewriting. */
6540 remove_unused_ivs (struct ivopts_data
*data
)
6544 bitmap toremove
= BITMAP_ALLOC (NULL
);
6546 /* Figure out an order in which to release SSA DEFs so that we don't
6547 release something that we'd have to propagate into a debug stmt
6549 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6551 struct version_info
*info
;
6553 info
= ver_info (data
, j
);
6555 && !integer_zerop (info
->iv
->step
)
6557 && !info
->iv
->have_use_for
6558 && !info
->preserve_biv
)
6560 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6562 tree def
= info
->iv
->ssa_name
;
6564 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6566 imm_use_iterator imm_iter
;
6567 use_operand_p use_p
;
6571 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6573 if (!gimple_debug_bind_p (stmt
))
6576 /* We just want to determine whether to do nothing
6577 (count == 0), to substitute the computed
6578 expression into a single use of the SSA DEF by
6579 itself (count == 1), or to use a debug temp
6580 because the SSA DEF is used multiple times or as
6581 part of a larger expression (count > 1). */
6583 if (gimple_debug_bind_get_value (stmt
) != def
)
6587 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6593 struct iv_use dummy_use
;
6594 struct iv_cand
*best_cand
= NULL
, *cand
;
6595 unsigned i
, best_pref
= 0, cand_pref
;
6597 memset (&dummy_use
, 0, sizeof (dummy_use
));
6598 dummy_use
.iv
= info
->iv
;
6599 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6601 cand
= iv_use (data
, i
)->selected
;
6602 if (cand
== best_cand
)
6604 cand_pref
= operand_equal_p (cand
->iv
->step
,
6608 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6609 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6612 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6614 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6617 best_pref
= cand_pref
;
6624 tree comp
= get_computation_at (data
->current_loop
,
6625 &dummy_use
, best_cand
,
6626 SSA_NAME_DEF_STMT (def
));
6632 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6633 DECL_ARTIFICIAL (vexpr
) = 1;
6634 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6635 if (SSA_NAME_VAR (def
))
6636 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6638 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6639 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6640 gimple_stmt_iterator gsi
;
6642 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6643 gsi
= gsi_after_labels (gimple_bb
6644 (SSA_NAME_DEF_STMT (def
)));
6646 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6648 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6652 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6654 if (!gimple_debug_bind_p (stmt
))
6657 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6658 SET_USE (use_p
, comp
);
6666 release_defs_bitset (toremove
);
6668 BITMAP_FREE (toremove
);
6671 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6672 for hash_map::traverse. */
6675 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6681 /* Frees data allocated by the optimization of a single loop. */
6684 free_loop_data (struct ivopts_data
*data
)
6692 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6693 delete data
->niters
;
6694 data
->niters
= NULL
;
6697 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6699 struct version_info
*info
;
6701 info
= ver_info (data
, i
);
6704 info
->has_nonlin_use
= false;
6705 info
->preserve_biv
= false;
6708 bitmap_clear (data
->relevant
);
6709 bitmap_clear (data
->important_candidates
);
6711 for (i
= 0; i
< n_iv_uses (data
); i
++)
6713 struct iv_use
*use
= iv_use (data
, i
);
6716 BITMAP_FREE (use
->related_cands
);
6717 for (j
= 0; j
< use
->n_map_members
; j
++)
6718 if (use
->cost_map
[j
].depends_on
)
6719 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6720 free (use
->cost_map
);
6723 data
->iv_uses
.truncate (0);
6725 for (i
= 0; i
< n_iv_cands (data
); i
++)
6727 struct iv_cand
*cand
= iv_cand (data
, i
);
6730 if (cand
->depends_on
)
6731 BITMAP_FREE (cand
->depends_on
);
6734 data
->iv_candidates
.truncate (0);
6736 if (data
->version_info_size
< num_ssa_names
)
6738 data
->version_info_size
= 2 * num_ssa_names
;
6739 free (data
->version_info
);
6740 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6743 data
->max_inv_id
= 0;
6745 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6746 SET_DECL_RTL (obj
, NULL_RTX
);
6748 decl_rtl_to_reset
.truncate (0);
6750 data
->inv_expr_tab
->empty ();
6751 data
->inv_expr_id
= 0;
6754 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6760 free_loop_data (data
);
6761 free (data
->version_info
);
6762 BITMAP_FREE (data
->relevant
);
6763 BITMAP_FREE (data
->important_candidates
);
6765 decl_rtl_to_reset
.release ();
6766 data
->iv_uses
.release ();
6767 data
->iv_candidates
.release ();
6768 delete data
->inv_expr_tab
;
6769 data
->inv_expr_tab
= NULL
;
6770 free_affine_expand_cache (&data
->name_expansion_cache
);
6773 /* Returns true if the loop body BODY includes any function calls. */
6776 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6778 gimple_stmt_iterator gsi
;
6781 for (i
= 0; i
< num_nodes
; i
++)
6782 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6784 gimple stmt
= gsi_stmt (gsi
);
6785 if (is_gimple_call (stmt
)
6786 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6792 /* Optimizes the LOOP. Returns true if anything changed. */
6795 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6797 bool changed
= false;
6798 struct iv_ca
*iv_ca
;
6799 edge exit
= single_dom_exit (loop
);
6802 gcc_assert (!data
->niters
);
6803 data
->current_loop
= loop
;
6804 data
->speed
= optimize_loop_for_speed_p (loop
);
6806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6808 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6812 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6813 exit
->src
->index
, exit
->dest
->index
);
6814 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6815 fprintf (dump_file
, "\n");
6818 fprintf (dump_file
, "\n");
6821 body
= get_loop_body (loop
);
6822 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6823 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6826 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6828 /* For each ssa name determines whether it behaves as an induction variable
6830 if (!find_induction_variables (data
))
6833 /* Finds interesting uses (item 1). */
6834 find_interesting_uses (data
);
6835 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6838 /* Finds candidates for the induction variables (item 2). */
6839 find_iv_candidates (data
);
6841 /* Calculates the costs (item 3, part 1). */
6842 determine_iv_costs (data
);
6843 determine_use_iv_costs (data
);
6844 determine_set_costs (data
);
6846 /* Find the optimal set of induction variables (item 3, part 2). */
6847 iv_ca
= find_optimal_iv_set (data
);
6852 /* Create the new induction variables (item 4, part 1). */
6853 create_new_ivs (data
, iv_ca
);
6854 iv_ca_free (&iv_ca
);
6856 /* Rewrite the uses (item 4, part 2). */
6857 rewrite_uses (data
);
6859 /* Remove the ivs that are unused after rewriting. */
6860 remove_unused_ivs (data
);
6862 /* We have changed the structure of induction variables; it might happen
6863 that definitions in the scev database refer to some of them that were
6868 free_loop_data (data
);
6873 /* Main entry point. Optimizes induction variables in loops. */
6876 tree_ssa_iv_optimize (void)
6879 struct ivopts_data data
;
6881 tree_ssa_iv_optimize_init (&data
);
6883 /* Optimize the loops starting with the innermost ones. */
6884 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
6886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6887 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6889 tree_ssa_iv_optimize_loop (&data
, loop
);
6892 tree_ssa_iv_optimize_finalize (&data
);