1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
78 #include "gimple-expr.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 bool biv_p
; /* Is it a biv? */
147 bool have_use_for
; /* Do we already have a use for it? */
148 unsigned use_id
; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
154 tree name
; /* The ssa name. */
155 struct iv
*iv
; /* Induction variable description. */
156 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv
; /* For the original biv, whether to preserve it. */
159 unsigned inv_id
; /* Id of an invariant. */
165 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
166 USE_ADDRESS
, /* Use in an address. */
167 USE_COMPARE
/* Use is a compare. */
170 /* Cost of a computation. */
173 int cost
; /* The runtime cost. */
174 unsigned complexity
; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
180 static const comp_cost no_cost
= {0, 0};
181 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
183 /* The candidate - cost pair. */
186 struct iv_cand
*cand
; /* The candidate. */
187 comp_cost cost
; /* The cost. */
188 bitmap depends_on
; /* The list of invariants that have to be
190 tree value
; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp
; /* For iv elimination, the comparison. */
194 int inv_expr_id
; /* Loop invariant expression id. */
200 unsigned id
; /* The id of the use. */
201 enum use_type type
; /* Type of the use. */
202 struct iv
*iv
; /* The induction variable it is based on. */
203 gimple stmt
; /* Statement in that it occurs. */
204 tree
*op_p
; /* The place where it occurs. */
205 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
208 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
209 struct cost_pair
*cost_map
;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand
*selected
;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
219 IP_NORMAL
, /* At the end, just before the exit condition. */
220 IP_END
, /* At the end of the latch block. */
221 IP_BEFORE_USE
, /* Immediately before a specific use. */
222 IP_AFTER_USE
, /* Immediately after a specific use. */
223 IP_ORIGINAL
/* The original biv. */
226 /* The induction variable candidate. */
229 unsigned id
; /* The number of the candidate. */
230 bool important
; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
233 gimple incremented_at
;/* For original biv, the statement where it is
235 tree var_before
; /* The variable used for it before increment. */
236 tree var_after
; /* The variable used for it after increment. */
237 struct iv
*iv
; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost
; /* Cost of the candidate. */
242 unsigned cost_step
; /* Cost of the candidate's increment operation. */
243 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on
; /* The list of invariants that are used in step of the
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use
*iv_use_p
;
261 typedef struct iv_cand
*iv_cand_p
;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
267 typedef iv_inv_expr_ent value_type
;
268 typedef iv_inv_expr_ent compare_type
;
269 static inline hashval_t
hash (const value_type
*);
270 static inline bool equal (const value_type
*, const compare_type
*);
273 /* Hash function for loop invariant expressions. */
276 iv_inv_expr_hasher::hash (const value_type
*expr
)
281 /* Hash table equality function for expressions. */
284 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
286 return expr1
->hash
== expr2
->hash
287 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
292 /* The currently optimized loop. */
293 struct loop
*current_loop
;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map
<edge
, tree_niter_desc
*> *niters
;
298 /* Number of registers used in it. */
301 /* The size of version_info array allocated. */
302 unsigned version_info_size
;
304 /* The array of information for the ssa names. */
305 struct version_info
*version_info
;
307 /* The hashtable of loop invariant expressions created
309 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
311 /* Loop invariant expression id. */
314 /* The bitmap of indices in version_info whose value was changed. */
317 /* The uses of induction variables. */
318 vec
<iv_use_p
> iv_uses
;
320 /* The candidates. */
321 vec
<iv_cand_p
> iv_candidates
;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates
;
326 /* Cache used by tree_to_aff_combination_expand. */
327 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
329 /* The maximum invariant id. */
332 /* Whether to consider just related and important candidates when replacing a
334 bool consider_all_candidates
;
336 /* Are we optimizing for speed? */
339 /* Whether the loop body includes any function calls. */
340 bool body_includes_call
;
342 /* Whether the loop body can only be exited via single exit. */
343 bool loop_single_exit_p
;
346 /* An assignment of iv candidates to uses. */
350 /* The number of uses covered by the assignment. */
353 /* Number of uses that cannot be expressed by the candidates in the set. */
356 /* Candidate assigned to a use, together with the related costs. */
357 struct cost_pair
**cand_for_use
;
359 /* Number of times each candidate is used. */
360 unsigned *n_cand_uses
;
362 /* The candidates used. */
365 /* The number of candidates in the set. */
368 /* Total number of registers needed. */
371 /* Total cost of expressing uses. */
372 comp_cost cand_use_cost
;
374 /* Total cost of candidates. */
377 /* Number of times each invariant is used. */
378 unsigned *n_invariant_uses
;
380 /* The array holding the number of uses of each loop
381 invariant expressions created by ivopt. */
382 unsigned *used_inv_expr
;
384 /* The number of created loop invariants. */
385 unsigned num_used_inv_expr
;
387 /* Total cost of the assignment. */
391 /* Difference of two iv candidate assignments. */
398 /* An old assignment (for rollback purposes). */
399 struct cost_pair
*old_cp
;
401 /* A new assignment. */
402 struct cost_pair
*new_cp
;
404 /* Next change in the list. */
405 struct iv_ca_delta
*next_change
;
408 /* Bound on number of candidates below that all candidates are considered. */
410 #define CONSIDER_ALL_CANDIDATES_BOUND \
411 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
414 optimizing such a loop would help, and it would take ages). */
416 #define MAX_CONSIDERED_USES \
417 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
419 /* If there are at most this number of ivs in the set, try removing unnecessary
420 ivs from the set always. */
422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
425 /* The list of trees for that the decl_rtl field must be reset is stored
428 static vec
<tree
> decl_rtl_to_reset
;
430 static comp_cost
force_expr_to_var_cost (tree
, bool);
432 /* Number of uses recorded in DATA. */
434 static inline unsigned
435 n_iv_uses (struct ivopts_data
*data
)
437 return data
->iv_uses
.length ();
440 /* Ith use recorded in DATA. */
442 static inline struct iv_use
*
443 iv_use (struct ivopts_data
*data
, unsigned i
)
445 return data
->iv_uses
[i
];
448 /* Number of candidates recorded in DATA. */
450 static inline unsigned
451 n_iv_cands (struct ivopts_data
*data
)
453 return data
->iv_candidates
.length ();
456 /* Ith candidate recorded in DATA. */
458 static inline struct iv_cand
*
459 iv_cand (struct ivopts_data
*data
, unsigned i
)
461 return data
->iv_candidates
[i
];
464 /* The single loop exit if it dominates the latch, NULL otherwise. */
467 single_dom_exit (struct loop
*loop
)
469 edge exit
= single_exit (loop
);
474 if (!just_once_each_iteration_p (loop
, exit
->src
))
480 /* Dumps information about the induction variable IV to FILE. */
483 dump_iv (FILE *file
, struct iv
*iv
)
487 fprintf (file
, "ssa name ");
488 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
489 fprintf (file
, "\n");
492 fprintf (file
, " type ");
493 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
494 fprintf (file
, "\n");
498 fprintf (file
, " base ");
499 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
500 fprintf (file
, "\n");
502 fprintf (file
, " step ");
503 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
504 fprintf (file
, "\n");
508 fprintf (file
, " invariant ");
509 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
510 fprintf (file
, "\n");
515 fprintf (file
, " base object ");
516 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
517 fprintf (file
, "\n");
521 fprintf (file
, " is a biv\n");
524 /* Dumps information about the USE to FILE. */
527 dump_use (FILE *file
, struct iv_use
*use
)
529 fprintf (file
, "use %d\n", use
->id
);
533 case USE_NONLINEAR_EXPR
:
534 fprintf (file
, " generic\n");
538 fprintf (file
, " address\n");
542 fprintf (file
, " compare\n");
549 fprintf (file
, " in statement ");
550 print_gimple_stmt (file
, use
->stmt
, 0, 0);
551 fprintf (file
, "\n");
553 fprintf (file
, " at position ");
555 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
556 fprintf (file
, "\n");
558 dump_iv (file
, use
->iv
);
560 if (use
->related_cands
)
562 fprintf (file
, " related candidates ");
563 dump_bitmap (file
, use
->related_cands
);
567 /* Dumps information about the uses to FILE. */
570 dump_uses (FILE *file
, struct ivopts_data
*data
)
575 for (i
= 0; i
< n_iv_uses (data
); i
++)
577 use
= iv_use (data
, i
);
579 dump_use (file
, use
);
580 fprintf (file
, "\n");
584 /* Dumps information about induction variable candidate CAND to FILE. */
587 dump_cand (FILE *file
, struct iv_cand
*cand
)
589 struct iv
*iv
= cand
->iv
;
591 fprintf (file
, "candidate %d%s\n",
592 cand
->id
, cand
->important
? " (important)" : "");
594 if (cand
->depends_on
)
596 fprintf (file
, " depends on ");
597 dump_bitmap (file
, cand
->depends_on
);
602 fprintf (file
, " final value replacement\n");
606 if (cand
->var_before
)
608 fprintf (file
, " var_before ");
609 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
610 fprintf (file
, "\n");
614 fprintf (file
, " var_after ");
615 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
616 fprintf (file
, "\n");
622 fprintf (file
, " incremented before exit test\n");
626 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
630 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
634 fprintf (file
, " incremented at end\n");
638 fprintf (file
, " original biv\n");
645 /* Returns the info for ssa version VER. */
647 static inline struct version_info
*
648 ver_info (struct ivopts_data
*data
, unsigned ver
)
650 return data
->version_info
+ ver
;
653 /* Returns the info for ssa name NAME. */
655 static inline struct version_info
*
656 name_info (struct ivopts_data
*data
, tree name
)
658 return ver_info (data
, SSA_NAME_VERSION (name
));
661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
665 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
667 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
671 if (sbb
== loop
->latch
)
677 return stmt
== last_stmt (bb
);
680 /* Returns true if STMT if after the place where the original induction
681 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
682 if the positions are identical. */
685 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
687 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
688 basic_block stmt_bb
= gimple_bb (stmt
);
690 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
693 if (stmt_bb
!= cand_bb
)
697 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
699 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
702 /* Returns true if STMT if after the place where the induction variable
703 CAND is incremented in LOOP. */
706 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
714 return stmt_after_ip_normal_pos (loop
, stmt
);
718 return stmt_after_inc_pos (cand
, stmt
, false);
721 return stmt_after_inc_pos (cand
, stmt
, true);
728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
731 abnormal_ssa_name_p (tree exp
)
736 if (TREE_CODE (exp
) != SSA_NAME
)
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
743 abnormal phi node. Callback for for_each_index. */
746 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
747 void *data ATTRIBUTE_UNUSED
)
749 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
751 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
753 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
757 return !abnormal_ssa_name_p (*index
);
760 /* Returns true if EXPR contains a ssa name that occurs in an
761 abnormal phi node. */
764 contains_abnormal_ssa_name_p (tree expr
)
767 enum tree_code_class codeclass
;
772 code
= TREE_CODE (expr
);
773 codeclass
= TREE_CODE_CLASS (code
);
775 if (code
== SSA_NAME
)
776 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
778 if (code
== INTEGER_CST
779 || is_gimple_min_invariant (expr
))
782 if (code
== ADDR_EXPR
)
783 return !for_each_index (&TREE_OPERAND (expr
, 0),
784 idx_contains_abnormal_ssa_name_p
,
787 if (code
== COND_EXPR
)
788 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
789 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
790 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
796 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
801 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
813 /* Returns the structure describing number of iterations determined from
814 EXIT of DATA->current_loop, or NULL if something goes wrong. */
816 static struct tree_niter_desc
*
817 niter_for_exit (struct ivopts_data
*data
, edge exit
)
819 struct tree_niter_desc
*desc
;
820 tree_niter_desc
**slot
;
824 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
828 slot
= data
->niters
->get (exit
);
832 /* Try to determine number of iterations. We cannot safely work with ssa
833 names that appear in phi nodes on abnormal edges, so that we do not
834 create overlapping life ranges for them (PR 27283). */
835 desc
= XNEW (struct tree_niter_desc
);
836 if (!number_of_iterations_exit (data
->current_loop
,
838 || contains_abnormal_ssa_name_p (desc
->niter
))
843 data
->niters
->put (exit
, desc
);
851 /* Returns the structure describing number of iterations determined from
852 single dominating exit of DATA->current_loop, or NULL if something
855 static struct tree_niter_desc
*
856 niter_for_single_dom_exit (struct ivopts_data
*data
)
858 edge exit
= single_dom_exit (data
->current_loop
);
863 return niter_for_exit (data
, exit
);
866 /* Initializes data structures used by the iv optimization pass, stored
870 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
872 data
->version_info_size
= 2 * num_ssa_names
;
873 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
874 data
->relevant
= BITMAP_ALLOC (NULL
);
875 data
->important_candidates
= BITMAP_ALLOC (NULL
);
876 data
->max_inv_id
= 0;
878 data
->iv_uses
.create (20);
879 data
->iv_candidates
.create (20);
880 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
881 data
->inv_expr_id
= 0;
882 data
->name_expansion_cache
= NULL
;
883 decl_rtl_to_reset
.create (20);
886 /* Returns a memory object to that EXPR points. In case we are able to
887 determine that it does not point to any such object, NULL is returned. */
890 determine_base_object (tree expr
)
892 enum tree_code code
= TREE_CODE (expr
);
895 /* If this is a pointer casted to any type, we need to determine
896 the base object for the pointer; so handle conversions before
897 throwing away non-pointer expressions. */
898 if (CONVERT_EXPR_P (expr
))
899 return determine_base_object (TREE_OPERAND (expr
, 0));
901 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
910 obj
= TREE_OPERAND (expr
, 0);
911 base
= get_base_address (obj
);
916 if (TREE_CODE (base
) == MEM_REF
)
917 return determine_base_object (TREE_OPERAND (base
, 0));
919 return fold_convert (ptr_type_node
,
920 build_fold_addr_expr (base
));
922 case POINTER_PLUS_EXPR
:
923 return determine_base_object (TREE_OPERAND (expr
, 0));
927 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
931 return fold_convert (ptr_type_node
, expr
);
935 /* Return true if address expression with non-DECL_P operand appears
939 contain_complex_addr_expr (tree expr
)
944 switch (TREE_CODE (expr
))
946 case POINTER_PLUS_EXPR
:
949 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
950 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
954 return (!DECL_P (TREE_OPERAND (expr
, 0)));
963 /* Allocates an induction variable with given initial value BASE and step STEP
967 alloc_iv (tree base
, tree step
)
970 struct iv
*iv
= XCNEW (struct iv
);
971 gcc_assert (step
!= NULL_TREE
);
973 /* Lower address expression in base except ones with DECL_P as operand.
975 1) More accurate cost can be computed for address expressions;
976 2) Duplicate candidates won't be created for bases in different
977 forms, like &a[0] and &a. */
979 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
980 || contain_complex_addr_expr (expr
))
983 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
984 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
988 iv
->base_object
= determine_base_object (base
);
991 iv
->have_use_for
= false;
993 iv
->ssa_name
= NULL_TREE
;
998 /* Sets STEP and BASE for induction variable IV. */
1001 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1003 struct version_info
*info
= name_info (data
, iv
);
1005 gcc_assert (!info
->iv
);
1007 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1008 info
->iv
= alloc_iv (base
, step
);
1009 info
->iv
->ssa_name
= iv
;
1012 /* Finds induction variable declaration for VAR. */
1015 get_iv (struct ivopts_data
*data
, tree var
)
1018 tree type
= TREE_TYPE (var
);
1020 if (!POINTER_TYPE_P (type
)
1021 && !INTEGRAL_TYPE_P (type
))
1024 if (!name_info (data
, var
)->iv
)
1026 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1029 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1030 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1033 return name_info (data
, var
)->iv
;
1036 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1037 not define a simple affine biv with nonzero step. */
1040 determine_biv_step (gimple phi
)
1042 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1043 tree name
= PHI_RESULT (phi
);
1046 if (virtual_operand_p (name
))
1049 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1052 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1055 /* Finds basic ivs. */
1058 find_bivs (struct ivopts_data
*data
)
1061 tree step
, type
, base
;
1063 struct loop
*loop
= data
->current_loop
;
1064 gimple_stmt_iterator psi
;
1066 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1068 phi
= gsi_stmt (psi
);
1070 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1073 step
= determine_biv_step (phi
);
1077 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1078 base
= expand_simple_operations (base
);
1079 if (contains_abnormal_ssa_name_p (base
)
1080 || contains_abnormal_ssa_name_p (step
))
1083 type
= TREE_TYPE (PHI_RESULT (phi
));
1084 base
= fold_convert (type
, base
);
1087 if (POINTER_TYPE_P (type
))
1088 step
= convert_to_ptrofftype (step
);
1090 step
= fold_convert (type
, step
);
1093 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1100 /* Marks basic ivs. */
1103 mark_bivs (struct ivopts_data
*data
)
1107 struct iv
*iv
, *incr_iv
;
1108 struct loop
*loop
= data
->current_loop
;
1109 basic_block incr_bb
;
1110 gimple_stmt_iterator psi
;
1112 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1114 phi
= gsi_stmt (psi
);
1116 iv
= get_iv (data
, PHI_RESULT (phi
));
1120 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1121 def
= SSA_NAME_DEF_STMT (var
);
1122 /* Don't mark iv peeled from other one as biv. */
1124 && gimple_code (def
) == GIMPLE_PHI
1125 && gimple_bb (def
) == loop
->header
)
1128 incr_iv
= get_iv (data
, var
);
1132 /* If the increment is in the subloop, ignore it. */
1133 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1134 if (incr_bb
->loop_father
!= data
->current_loop
1135 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1139 incr_iv
->biv_p
= true;
1143 /* Checks whether STMT defines a linear induction variable and stores its
1144 parameters to IV. */
1147 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1150 struct loop
*loop
= data
->current_loop
;
1152 iv
->base
= NULL_TREE
;
1153 iv
->step
= NULL_TREE
;
1155 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1158 lhs
= gimple_assign_lhs (stmt
);
1159 if (TREE_CODE (lhs
) != SSA_NAME
)
1162 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1164 iv
->base
= expand_simple_operations (iv
->base
);
1166 if (contains_abnormal_ssa_name_p (iv
->base
)
1167 || contains_abnormal_ssa_name_p (iv
->step
))
1170 /* If STMT could throw, then do not consider STMT as defining a GIV.
1171 While this will suppress optimizations, we can not safely delete this
1172 GIV and associated statements, even if it appears it is not used. */
1173 if (stmt_could_throw_p (stmt
))
1179 /* Finds general ivs in statement STMT. */
1182 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1186 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1189 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1192 /* Finds general ivs in basic block BB. */
1195 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1197 gimple_stmt_iterator bsi
;
1199 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1200 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1203 /* Finds general ivs. */
1206 find_givs (struct ivopts_data
*data
)
1208 struct loop
*loop
= data
->current_loop
;
1209 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1212 for (i
= 0; i
< loop
->num_nodes
; i
++)
1213 find_givs_in_bb (data
, body
[i
]);
1217 /* For each ssa name defined in LOOP determines whether it is an induction
1218 variable and if so, its initial value and step. */
1221 find_induction_variables (struct ivopts_data
*data
)
1226 if (!find_bivs (data
))
1232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1234 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1238 fprintf (dump_file
, " number of iterations ");
1239 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1240 if (!integer_zerop (niter
->may_be_zero
))
1242 fprintf (dump_file
, "; zero if ");
1243 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1245 fprintf (dump_file
, "\n\n");
1248 fprintf (dump_file
, "Induction variables:\n\n");
1250 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1252 if (ver_info (data
, i
)->iv
)
1253 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1260 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1262 static struct iv_use
*
1263 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1264 gimple stmt
, enum use_type use_type
)
1266 struct iv_use
*use
= XCNEW (struct iv_use
);
1268 use
->id
= n_iv_uses (data
);
1269 use
->type
= use_type
;
1273 use
->related_cands
= BITMAP_ALLOC (NULL
);
1275 /* To avoid showing ssa name in the dumps, if it was not reset by the
1277 iv
->ssa_name
= NULL_TREE
;
1279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1280 dump_use (dump_file
, use
);
1282 data
->iv_uses
.safe_push (use
);
1287 /* Checks whether OP is a loop-level invariant and if so, records it.
1288 NONLINEAR_USE is true if the invariant is used in a way we do not
1289 handle specially. */
1292 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1295 struct version_info
*info
;
1297 if (TREE_CODE (op
) != SSA_NAME
1298 || virtual_operand_p (op
))
1301 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1303 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1306 info
= name_info (data
, op
);
1308 info
->has_nonlin_use
|= nonlinear_use
;
1310 info
->inv_id
= ++data
->max_inv_id
;
1311 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1314 /* Checks whether the use OP is interesting and if so, records it. */
1316 static struct iv_use
*
1317 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1324 if (TREE_CODE (op
) != SSA_NAME
)
1327 iv
= get_iv (data
, op
);
1331 if (iv
->have_use_for
)
1333 use
= iv_use (data
, iv
->use_id
);
1335 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1339 if (integer_zerop (iv
->step
))
1341 record_invariant (data
, op
, true);
1344 iv
->have_use_for
= true;
1346 civ
= XNEW (struct iv
);
1349 stmt
= SSA_NAME_DEF_STMT (op
);
1350 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1351 || is_gimple_assign (stmt
));
1353 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1354 iv
->use_id
= use
->id
;
1359 /* Given a condition in statement STMT, checks whether it is a compare
1360 of an induction variable and an invariant. If this is the case,
1361 CONTROL_VAR is set to location of the iv, BOUND to the location of
1362 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1363 induction variable descriptions, and true is returned. If this is not
1364 the case, CONTROL_VAR and BOUND are set to the arguments of the
1365 condition and false is returned. */
1368 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1369 tree
**control_var
, tree
**bound
,
1370 struct iv
**iv_var
, struct iv
**iv_bound
)
1372 /* The objects returned when COND has constant operands. */
1373 static struct iv const_iv
;
1375 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1376 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1379 if (gimple_code (stmt
) == GIMPLE_COND
)
1381 op0
= gimple_cond_lhs_ptr (stmt
);
1382 op1
= gimple_cond_rhs_ptr (stmt
);
1386 op0
= gimple_assign_rhs1_ptr (stmt
);
1387 op1
= gimple_assign_rhs2_ptr (stmt
);
1390 zero
= integer_zero_node
;
1391 const_iv
.step
= integer_zero_node
;
1393 if (TREE_CODE (*op0
) == SSA_NAME
)
1394 iv0
= get_iv (data
, *op0
);
1395 if (TREE_CODE (*op1
) == SSA_NAME
)
1396 iv1
= get_iv (data
, *op1
);
1398 /* Exactly one of the compared values must be an iv, and the other one must
1403 if (integer_zerop (iv0
->step
))
1405 /* Control variable may be on the other side. */
1406 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1407 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1409 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1413 *control_var
= op0
;;
1424 /* Checks whether the condition in STMT is interesting and if so,
1428 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1430 tree
*var_p
, *bound_p
;
1431 struct iv
*var_iv
, *civ
;
1433 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1435 find_interesting_uses_op (data
, *var_p
);
1436 find_interesting_uses_op (data
, *bound_p
);
1440 civ
= XNEW (struct iv
);
1442 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1445 /* Returns the outermost loop EXPR is obviously invariant in
1446 relative to the loop LOOP, i.e. if all its operands are defined
1447 outside of the returned loop. Returns NULL if EXPR is not
1448 even obviously invariant in LOOP. */
1451 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1456 if (is_gimple_min_invariant (expr
))
1457 return current_loops
->tree_root
;
1459 if (TREE_CODE (expr
) == SSA_NAME
)
1461 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1464 if (flow_bb_inside_loop_p (loop
, def_bb
))
1466 return superloop_at_depth (loop
,
1467 loop_depth (def_bb
->loop_father
) + 1);
1470 return current_loops
->tree_root
;
1476 unsigned maxdepth
= 0;
1477 len
= TREE_OPERAND_LENGTH (expr
);
1478 for (i
= 0; i
< len
; i
++)
1480 struct loop
*ivloop
;
1481 if (!TREE_OPERAND (expr
, i
))
1484 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1487 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1490 return superloop_at_depth (loop
, maxdepth
);
1493 /* Returns true if expression EXPR is obviously invariant in LOOP,
1494 i.e. if all its operands are defined outside of the LOOP. LOOP
1495 should not be the function body. */
1498 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1503 gcc_assert (loop_depth (loop
) > 0);
1505 if (is_gimple_min_invariant (expr
))
1508 if (TREE_CODE (expr
) == SSA_NAME
)
1510 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1512 && flow_bb_inside_loop_p (loop
, def_bb
))
1521 len
= TREE_OPERAND_LENGTH (expr
);
1522 for (i
= 0; i
< len
; i
++)
1523 if (TREE_OPERAND (expr
, i
)
1524 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1530 /* Cumulates the steps of indices into DATA and replaces their values with the
1531 initial ones. Returns false when the value of the index cannot be determined.
1532 Callback for for_each_index. */
1534 struct ifs_ivopts_data
1536 struct ivopts_data
*ivopts_data
;
1542 idx_find_step (tree base
, tree
*idx
, void *data
)
1544 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1546 tree step
, iv_base
, iv_step
, lbound
, off
;
1547 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1549 /* If base is a component ref, require that the offset of the reference
1551 if (TREE_CODE (base
) == COMPONENT_REF
)
1553 off
= component_ref_field_offset (base
);
1554 return expr_invariant_in_loop_p (loop
, off
);
1557 /* If base is array, first check whether we will be able to move the
1558 reference out of the loop (in order to take its address in strength
1559 reduction). In order for this to work we need both lower bound
1560 and step to be loop invariants. */
1561 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1563 /* Moreover, for a range, the size needs to be invariant as well. */
1564 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1565 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1568 step
= array_ref_element_size (base
);
1569 lbound
= array_ref_low_bound (base
);
1571 if (!expr_invariant_in_loop_p (loop
, step
)
1572 || !expr_invariant_in_loop_p (loop
, lbound
))
1576 if (TREE_CODE (*idx
) != SSA_NAME
)
1579 iv
= get_iv (dta
->ivopts_data
, *idx
);
1583 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1584 *&x[0], which is not folded and does not trigger the
1585 ARRAY_REF path below. */
1588 if (integer_zerop (iv
->step
))
1591 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1593 step
= array_ref_element_size (base
);
1595 /* We only handle addresses whose step is an integer constant. */
1596 if (TREE_CODE (step
) != INTEGER_CST
)
1600 /* The step for pointer arithmetics already is 1 byte. */
1601 step
= size_one_node
;
1605 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1606 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1609 /* The index might wrap. */
1613 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1614 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1619 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1620 object is passed to it in DATA. */
1623 idx_record_use (tree base
, tree
*idx
,
1626 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1627 find_interesting_uses_op (data
, *idx
);
1628 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1630 find_interesting_uses_op (data
, array_ref_element_size (base
));
1631 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1636 /* If we can prove that TOP = cst * BOT for some constant cst,
1637 store cst to MUL and return true. Otherwise return false.
1638 The returned value is always sign-extended, regardless of the
1639 signedness of TOP and BOT. */
1642 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1645 enum tree_code code
;
1646 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1647 widest_int res
, p0
, p1
;
1652 if (operand_equal_p (top
, bot
, 0))
1658 code
= TREE_CODE (top
);
1662 mby
= TREE_OPERAND (top
, 1);
1663 if (TREE_CODE (mby
) != INTEGER_CST
)
1666 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1669 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1674 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1675 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1678 if (code
== MINUS_EXPR
)
1680 *mul
= wi::sext (p0
+ p1
, precision
);
1684 if (TREE_CODE (bot
) != INTEGER_CST
)
1687 p0
= widest_int::from (top
, SIGNED
);
1688 p1
= widest_int::from (bot
, SIGNED
);
1691 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1699 /* Return true if memory reference REF with step STEP may be unaligned. */
1702 may_be_unaligned_p (tree ref
, tree step
)
1704 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1705 thus they are not misaligned. */
1706 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1709 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1710 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1711 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1713 unsigned HOST_WIDE_INT bitpos
;
1714 unsigned int ref_align
;
1715 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1716 if (ref_align
< align
1717 || (bitpos
% align
) != 0
1718 || (bitpos
% BITS_PER_UNIT
) != 0)
1721 unsigned int trailing_zeros
= tree_ctz (step
);
1722 if (trailing_zeros
< HOST_BITS_PER_INT
1723 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1729 /* Return true if EXPR may be non-addressable. */
1732 may_be_nonaddressable_p (tree expr
)
1734 switch (TREE_CODE (expr
))
1736 case TARGET_MEM_REF
:
1737 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1738 target, thus they are always addressable. */
1742 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1743 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1745 case VIEW_CONVERT_EXPR
:
1746 /* This kind of view-conversions may wrap non-addressable objects
1747 and make them look addressable. After some processing the
1748 non-addressability may be uncovered again, causing ADDR_EXPRs
1749 of inappropriate objects to be built. */
1750 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1751 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1754 /* ... fall through ... */
1757 case ARRAY_RANGE_REF
:
1758 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1770 /* Finds addresses in *OP_P inside STMT. */
1773 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1775 tree base
= *op_p
, step
= size_zero_node
;
1777 struct ifs_ivopts_data ifs_ivopts_data
;
1779 /* Do not play with volatile memory references. A bit too conservative,
1780 perhaps, but safe. */
1781 if (gimple_has_volatile_ops (stmt
))
1784 /* Ignore bitfields for now. Not really something terribly complicated
1786 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1789 base
= unshare_expr (base
);
1791 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1793 tree type
= build_pointer_type (TREE_TYPE (base
));
1797 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1799 civ
= get_iv (data
, TMR_BASE (base
));
1803 TMR_BASE (base
) = civ
->base
;
1806 if (TMR_INDEX2 (base
)
1807 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1809 civ
= get_iv (data
, TMR_INDEX2 (base
));
1813 TMR_INDEX2 (base
) = civ
->base
;
1816 if (TMR_INDEX (base
)
1817 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1819 civ
= get_iv (data
, TMR_INDEX (base
));
1823 TMR_INDEX (base
) = civ
->base
;
1828 if (TMR_STEP (base
))
1829 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1831 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1835 if (integer_zerop (step
))
1837 base
= tree_mem_ref_addr (type
, base
);
1841 ifs_ivopts_data
.ivopts_data
= data
;
1842 ifs_ivopts_data
.stmt
= stmt
;
1843 ifs_ivopts_data
.step
= size_zero_node
;
1844 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1845 || integer_zerop (ifs_ivopts_data
.step
))
1847 step
= ifs_ivopts_data
.step
;
1849 /* Check that the base expression is addressable. This needs
1850 to be done after substituting bases of IVs into it. */
1851 if (may_be_nonaddressable_p (base
))
1854 /* Moreover, on strict alignment platforms, check that it is
1855 sufficiently aligned. */
1856 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1859 base
= build_fold_addr_expr (base
);
1861 /* Substituting bases of IVs into the base expression might
1862 have caused folding opportunities. */
1863 if (TREE_CODE (base
) == ADDR_EXPR
)
1865 tree
*ref
= &TREE_OPERAND (base
, 0);
1866 while (handled_component_p (*ref
))
1867 ref
= &TREE_OPERAND (*ref
, 0);
1868 if (TREE_CODE (*ref
) == MEM_REF
)
1870 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1871 TREE_OPERAND (*ref
, 0),
1872 TREE_OPERAND (*ref
, 1));
1879 civ
= alloc_iv (base
, step
);
1880 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1884 for_each_index (op_p
, idx_record_use
, data
);
1887 /* Finds and records invariants used in STMT. */
1890 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1893 use_operand_p use_p
;
1896 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1898 op
= USE_FROM_PTR (use_p
);
1899 record_invariant (data
, op
, false);
1903 /* Finds interesting uses of induction variables in the statement STMT. */
1906 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1909 tree op
, *lhs
, *rhs
;
1911 use_operand_p use_p
;
1912 enum tree_code code
;
1914 find_invariants_stmt (data
, stmt
);
1916 if (gimple_code (stmt
) == GIMPLE_COND
)
1918 find_interesting_uses_cond (data
, stmt
);
1922 if (is_gimple_assign (stmt
))
1924 lhs
= gimple_assign_lhs_ptr (stmt
);
1925 rhs
= gimple_assign_rhs1_ptr (stmt
);
1927 if (TREE_CODE (*lhs
) == SSA_NAME
)
1929 /* If the statement defines an induction variable, the uses are not
1930 interesting by themselves. */
1932 iv
= get_iv (data
, *lhs
);
1934 if (iv
&& !integer_zerop (iv
->step
))
1938 code
= gimple_assign_rhs_code (stmt
);
1939 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1940 && (REFERENCE_CLASS_P (*rhs
)
1941 || is_gimple_val (*rhs
)))
1943 if (REFERENCE_CLASS_P (*rhs
))
1944 find_interesting_uses_address (data
, stmt
, rhs
);
1946 find_interesting_uses_op (data
, *rhs
);
1948 if (REFERENCE_CLASS_P (*lhs
))
1949 find_interesting_uses_address (data
, stmt
, lhs
);
1952 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1954 find_interesting_uses_cond (data
, stmt
);
1958 /* TODO -- we should also handle address uses of type
1960 memory = call (whatever);
1967 if (gimple_code (stmt
) == GIMPLE_PHI
1968 && gimple_bb (stmt
) == data
->current_loop
->header
)
1970 iv
= get_iv (data
, PHI_RESULT (stmt
));
1972 if (iv
&& !integer_zerop (iv
->step
))
1976 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1978 op
= USE_FROM_PTR (use_p
);
1980 if (TREE_CODE (op
) != SSA_NAME
)
1983 iv
= get_iv (data
, op
);
1987 find_interesting_uses_op (data
, op
);
1991 /* Finds interesting uses of induction variables outside of loops
1992 on loop exit edge EXIT. */
1995 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1998 gimple_stmt_iterator psi
;
2001 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2003 phi
= gsi_stmt (psi
);
2004 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2005 if (!virtual_operand_p (def
))
2006 find_interesting_uses_op (data
, def
);
2010 /* Finds uses of the induction variables that are interesting. */
2013 find_interesting_uses (struct ivopts_data
*data
)
2016 gimple_stmt_iterator bsi
;
2017 basic_block
*body
= get_loop_body (data
->current_loop
);
2019 struct version_info
*info
;
2022 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2023 fprintf (dump_file
, "Uses:\n\n");
2025 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2030 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2031 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2032 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2033 find_interesting_uses_outside (data
, e
);
2035 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2036 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2037 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2038 if (!is_gimple_debug (gsi_stmt (bsi
)))
2039 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2042 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2046 fprintf (dump_file
, "\n");
2048 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2050 info
= ver_info (data
, i
);
2053 fprintf (dump_file
, " ");
2054 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2055 fprintf (dump_file
, " is invariant (%d)%s\n",
2056 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2060 fprintf (dump_file
, "\n");
2066 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2067 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2068 we are at the top-level of the processed address. */
2071 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2072 HOST_WIDE_INT
*offset
)
2074 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2075 enum tree_code code
;
2076 tree type
, orig_type
= TREE_TYPE (expr
);
2077 HOST_WIDE_INT off0
, off1
, st
;
2078 tree orig_expr
= expr
;
2082 type
= TREE_TYPE (expr
);
2083 code
= TREE_CODE (expr
);
2089 if (!cst_and_fits_in_hwi (expr
)
2090 || integer_zerop (expr
))
2093 *offset
= int_cst_value (expr
);
2094 return build_int_cst (orig_type
, 0);
2096 case POINTER_PLUS_EXPR
:
2099 op0
= TREE_OPERAND (expr
, 0);
2100 op1
= TREE_OPERAND (expr
, 1);
2102 op0
= strip_offset_1 (op0
, false, false, &off0
);
2103 op1
= strip_offset_1 (op1
, false, false, &off1
);
2105 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2106 if (op0
== TREE_OPERAND (expr
, 0)
2107 && op1
== TREE_OPERAND (expr
, 1))
2110 if (integer_zerop (op1
))
2112 else if (integer_zerop (op0
))
2114 if (code
== MINUS_EXPR
)
2115 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2120 expr
= fold_build2 (code
, type
, op0
, op1
);
2122 return fold_convert (orig_type
, expr
);
2125 op1
= TREE_OPERAND (expr
, 1);
2126 if (!cst_and_fits_in_hwi (op1
))
2129 op0
= TREE_OPERAND (expr
, 0);
2130 op0
= strip_offset_1 (op0
, false, false, &off0
);
2131 if (op0
== TREE_OPERAND (expr
, 0))
2134 *offset
= off0
* int_cst_value (op1
);
2135 if (integer_zerop (op0
))
2138 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2140 return fold_convert (orig_type
, expr
);
2143 case ARRAY_RANGE_REF
:
2147 step
= array_ref_element_size (expr
);
2148 if (!cst_and_fits_in_hwi (step
))
2151 st
= int_cst_value (step
);
2152 op1
= TREE_OPERAND (expr
, 1);
2153 op1
= strip_offset_1 (op1
, false, false, &off1
);
2154 *offset
= off1
* st
;
2157 && integer_zerop (op1
))
2159 /* Strip the component reference completely. */
2160 op0
= TREE_OPERAND (expr
, 0);
2161 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2174 tmp
= component_ref_field_offset (expr
);
2175 field
= TREE_OPERAND (expr
, 1);
2177 && cst_and_fits_in_hwi (tmp
)
2178 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2180 HOST_WIDE_INT boffset
, abs_off
;
2182 /* Strip the component reference completely. */
2183 op0
= TREE_OPERAND (expr
, 0);
2184 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2185 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2186 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2190 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2197 op0
= TREE_OPERAND (expr
, 0);
2198 op0
= strip_offset_1 (op0
, true, true, &off0
);
2201 if (op0
== TREE_OPERAND (expr
, 0))
2204 expr
= build_fold_addr_expr (op0
);
2205 return fold_convert (orig_type
, expr
);
2208 /* ??? Offset operand? */
2209 inside_addr
= false;
2216 /* Default handling of expressions for that we want to recurse into
2217 the first operand. */
2218 op0
= TREE_OPERAND (expr
, 0);
2219 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2222 if (op0
== TREE_OPERAND (expr
, 0)
2223 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2226 expr
= copy_node (expr
);
2227 TREE_OPERAND (expr
, 0) = op0
;
2229 TREE_OPERAND (expr
, 1) = op1
;
2231 /* Inside address, we might strip the top level component references,
2232 thus changing type of the expression. Handling of ADDR_EXPR
2234 expr
= fold_convert (orig_type
, expr
);
2239 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2242 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2245 tree core
= strip_offset_1 (expr
, false, false, &off
);
2250 /* Returns variant of TYPE that can be used as base for different uses.
2251 We return unsigned type with the same precision, which avoids problems
2255 generic_type_for (tree type
)
2257 if (POINTER_TYPE_P (type
))
2258 return unsigned_type_for (type
);
2260 if (TYPE_UNSIGNED (type
))
2263 return unsigned_type_for (type
);
2266 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2267 the bitmap to that we should store it. */
2269 static struct ivopts_data
*fd_ivopts_data
;
2271 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2273 bitmap
*depends_on
= (bitmap
*) data
;
2274 struct version_info
*info
;
2276 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2278 info
= name_info (fd_ivopts_data
, *expr_p
);
2280 if (!info
->inv_id
|| info
->has_nonlin_use
)
2284 *depends_on
= BITMAP_ALLOC (NULL
);
2285 bitmap_set_bit (*depends_on
, info
->inv_id
);
2290 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2291 position to POS. If USE is not NULL, the candidate is set as related to
2292 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2293 replacement of the final value of the iv by a direct computation. */
2295 static struct iv_cand
*
2296 add_candidate_1 (struct ivopts_data
*data
,
2297 tree base
, tree step
, bool important
, enum iv_position pos
,
2298 struct iv_use
*use
, gimple incremented_at
)
2301 struct iv_cand
*cand
= NULL
;
2302 tree type
, orig_type
;
2304 /* For non-original variables, make sure their values are computed in a type
2305 that does not invoke undefined behavior on overflows (since in general,
2306 we cannot prove that these induction variables are non-wrapping). */
2307 if (pos
!= IP_ORIGINAL
)
2309 orig_type
= TREE_TYPE (base
);
2310 type
= generic_type_for (orig_type
);
2311 if (type
!= orig_type
)
2313 base
= fold_convert (type
, base
);
2314 step
= fold_convert (type
, step
);
2318 for (i
= 0; i
< n_iv_cands (data
); i
++)
2320 cand
= iv_cand (data
, i
);
2322 if (cand
->pos
!= pos
)
2325 if (cand
->incremented_at
!= incremented_at
2326 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2327 && cand
->ainc_use
!= use
))
2341 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2342 && operand_equal_p (step
, cand
->iv
->step
, 0)
2343 && (TYPE_PRECISION (TREE_TYPE (base
))
2344 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2348 if (i
== n_iv_cands (data
))
2350 cand
= XCNEW (struct iv_cand
);
2356 cand
->iv
= alloc_iv (base
, step
);
2359 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2361 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2362 cand
->var_after
= cand
->var_before
;
2364 cand
->important
= important
;
2365 cand
->incremented_at
= incremented_at
;
2366 data
->iv_candidates
.safe_push (cand
);
2369 && TREE_CODE (step
) != INTEGER_CST
)
2371 fd_ivopts_data
= data
;
2372 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2375 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2376 cand
->ainc_use
= use
;
2378 cand
->ainc_use
= NULL
;
2380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2381 dump_cand (dump_file
, cand
);
2384 if (important
&& !cand
->important
)
2386 cand
->important
= true;
2387 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2388 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2393 bitmap_set_bit (use
->related_cands
, i
);
2394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2395 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2402 /* Returns true if incrementing the induction variable at the end of the LOOP
2405 The purpose is to avoid splitting latch edge with a biv increment, thus
2406 creating a jump, possibly confusing other optimization passes and leaving
2407 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2408 is not available (so we do not have a better alternative), or if the latch
2409 edge is already nonempty. */
2412 allow_ip_end_pos_p (struct loop
*loop
)
2414 if (!ip_normal_pos (loop
))
2417 if (!empty_block_p (ip_end_pos (loop
)))
2423 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2424 Important field is set to IMPORTANT. */
2427 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2428 bool important
, struct iv_use
*use
)
2430 basic_block use_bb
= gimple_bb (use
->stmt
);
2431 enum machine_mode mem_mode
;
2432 unsigned HOST_WIDE_INT cstepi
;
2434 /* If we insert the increment in any position other than the standard
2435 ones, we must ensure that it is incremented once per iteration.
2436 It must not be in an inner nested loop, or one side of an if
2438 if (use_bb
->loop_father
!= data
->current_loop
2439 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2440 || stmt_could_throw_p (use
->stmt
)
2441 || !cst_and_fits_in_hwi (step
))
2444 cstepi
= int_cst_value (step
);
2446 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2447 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2448 || USE_STORE_PRE_INCREMENT (mem_mode
))
2449 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2450 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2451 || USE_STORE_PRE_DECREMENT (mem_mode
))
2452 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2454 enum tree_code code
= MINUS_EXPR
;
2456 tree new_step
= step
;
2458 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2460 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2461 code
= POINTER_PLUS_EXPR
;
2464 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2465 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2466 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2469 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2470 || USE_STORE_POST_INCREMENT (mem_mode
))
2471 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2472 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2473 || USE_STORE_POST_DECREMENT (mem_mode
))
2474 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2476 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2481 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2482 position to POS. If USE is not NULL, the candidate is set as related to
2483 it. The candidate computation is scheduled on all available positions. */
2486 add_candidate (struct ivopts_data
*data
,
2487 tree base
, tree step
, bool important
, struct iv_use
*use
)
2489 if (ip_normal_pos (data
->current_loop
))
2490 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2491 if (ip_end_pos (data
->current_loop
)
2492 && allow_ip_end_pos_p (data
->current_loop
))
2493 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2495 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2496 add_autoinc_candidates (data
, base
, step
, important
, use
);
2499 /* Adds standard iv candidates. */
2502 add_standard_iv_candidates (struct ivopts_data
*data
)
2504 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2506 /* The same for a double-integer type if it is still fast enough. */
2508 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2509 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2510 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2511 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2513 /* The same for a double-integer type if it is still fast enough. */
2515 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2516 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2517 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2518 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2522 /* Adds candidates bases on the old induction variable IV. */
2525 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2529 struct iv_cand
*cand
;
2531 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2533 /* The same, but with initial value zero. */
2534 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2535 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2537 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2538 iv
->step
, true, NULL
);
2540 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2541 if (gimple_code (phi
) == GIMPLE_PHI
)
2543 /* Additionally record the possibility of leaving the original iv
2545 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2546 /* Don't add candidate if it's from another PHI node because
2547 it's an affine iv appearing in the form of PEELED_CHREC. */
2548 phi
= SSA_NAME_DEF_STMT (def
);
2549 if (gimple_code (phi
) != GIMPLE_PHI
)
2551 cand
= add_candidate_1 (data
,
2552 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2553 SSA_NAME_DEF_STMT (def
));
2554 cand
->var_before
= iv
->ssa_name
;
2555 cand
->var_after
= def
;
2558 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2562 /* Adds candidates based on the old induction variables. */
2565 add_old_ivs_candidates (struct ivopts_data
*data
)
2571 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2573 iv
= ver_info (data
, i
)->iv
;
2574 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2575 add_old_iv_candidates (data
, iv
);
2579 /* Adds candidates based on the value of the induction variable IV and USE. */
2582 add_iv_value_candidates (struct ivopts_data
*data
,
2583 struct iv
*iv
, struct iv_use
*use
)
2585 unsigned HOST_WIDE_INT offset
;
2589 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2591 /* The same, but with initial value zero. Make such variable important,
2592 since it is generic enough so that possibly many uses may be based
2594 basetype
= TREE_TYPE (iv
->base
);
2595 if (POINTER_TYPE_P (basetype
))
2596 basetype
= sizetype
;
2597 add_candidate (data
, build_int_cst (basetype
, 0),
2598 iv
->step
, true, use
);
2600 /* Third, try removing the constant offset. Make sure to even
2601 add a candidate for &a[0] vs. (T *)&a. */
2602 base
= strip_offset (iv
->base
, &offset
);
2604 || base
!= iv
->base
)
2605 add_candidate (data
, base
, iv
->step
, false, use
);
2608 /* Adds candidates based on the uses. */
2611 add_derived_ivs_candidates (struct ivopts_data
*data
)
2615 for (i
= 0; i
< n_iv_uses (data
); i
++)
2617 struct iv_use
*use
= iv_use (data
, i
);
2624 case USE_NONLINEAR_EXPR
:
2627 /* Just add the ivs based on the value of the iv used here. */
2628 add_iv_value_candidates (data
, use
->iv
, use
);
2637 /* Record important candidates and add them to related_cands bitmaps
2641 record_important_candidates (struct ivopts_data
*data
)
2646 for (i
= 0; i
< n_iv_cands (data
); i
++)
2648 struct iv_cand
*cand
= iv_cand (data
, i
);
2650 if (cand
->important
)
2651 bitmap_set_bit (data
->important_candidates
, i
);
2654 data
->consider_all_candidates
= (n_iv_cands (data
)
2655 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2657 if (data
->consider_all_candidates
)
2659 /* We will not need "related_cands" bitmaps in this case,
2660 so release them to decrease peak memory consumption. */
2661 for (i
= 0; i
< n_iv_uses (data
); i
++)
2663 use
= iv_use (data
, i
);
2664 BITMAP_FREE (use
->related_cands
);
2669 /* Add important candidates to the related_cands bitmaps. */
2670 for (i
= 0; i
< n_iv_uses (data
); i
++)
2671 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2672 data
->important_candidates
);
2676 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2677 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2678 we allocate a simple list to every use. */
2681 alloc_use_cost_map (struct ivopts_data
*data
)
2683 unsigned i
, size
, s
;
2685 for (i
= 0; i
< n_iv_uses (data
); i
++)
2687 struct iv_use
*use
= iv_use (data
, i
);
2689 if (data
->consider_all_candidates
)
2690 size
= n_iv_cands (data
);
2693 s
= bitmap_count_bits (use
->related_cands
);
2695 /* Round up to the power of two, so that moduling by it is fast. */
2696 size
= s
? (1 << ceil_log2 (s
)) : 1;
2699 use
->n_map_members
= size
;
2700 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2704 /* Returns description of computation cost of expression whose runtime
2705 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2708 new_cost (unsigned runtime
, unsigned complexity
)
2712 cost
.cost
= runtime
;
2713 cost
.complexity
= complexity
;
2718 /* Adds costs COST1 and COST2. */
2721 add_costs (comp_cost cost1
, comp_cost cost2
)
2723 cost1
.cost
+= cost2
.cost
;
2724 cost1
.complexity
+= cost2
.complexity
;
2728 /* Subtracts costs COST1 and COST2. */
2731 sub_costs (comp_cost cost1
, comp_cost cost2
)
2733 cost1
.cost
-= cost2
.cost
;
2734 cost1
.complexity
-= cost2
.complexity
;
2739 /* Returns a negative number if COST1 < COST2, a positive number if
2740 COST1 > COST2, and 0 if COST1 = COST2. */
2743 compare_costs (comp_cost cost1
, comp_cost cost2
)
2745 if (cost1
.cost
== cost2
.cost
)
2746 return cost1
.complexity
- cost2
.complexity
;
2748 return cost1
.cost
- cost2
.cost
;
2751 /* Returns true if COST is infinite. */
2754 infinite_cost_p (comp_cost cost
)
2756 return cost
.cost
== INFTY
;
2759 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2760 on invariants DEPENDS_ON and that the value used in expressing it
2761 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2764 set_use_iv_cost (struct ivopts_data
*data
,
2765 struct iv_use
*use
, struct iv_cand
*cand
,
2766 comp_cost cost
, bitmap depends_on
, tree value
,
2767 enum tree_code comp
, int inv_expr_id
)
2771 if (infinite_cost_p (cost
))
2773 BITMAP_FREE (depends_on
);
2777 if (data
->consider_all_candidates
)
2779 use
->cost_map
[cand
->id
].cand
= cand
;
2780 use
->cost_map
[cand
->id
].cost
= cost
;
2781 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2782 use
->cost_map
[cand
->id
].value
= value
;
2783 use
->cost_map
[cand
->id
].comp
= comp
;
2784 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2788 /* n_map_members is a power of two, so this computes modulo. */
2789 s
= cand
->id
& (use
->n_map_members
- 1);
2790 for (i
= s
; i
< use
->n_map_members
; i
++)
2791 if (!use
->cost_map
[i
].cand
)
2793 for (i
= 0; i
< s
; i
++)
2794 if (!use
->cost_map
[i
].cand
)
2800 use
->cost_map
[i
].cand
= cand
;
2801 use
->cost_map
[i
].cost
= cost
;
2802 use
->cost_map
[i
].depends_on
= depends_on
;
2803 use
->cost_map
[i
].value
= value
;
2804 use
->cost_map
[i
].comp
= comp
;
2805 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2808 /* Gets cost of (USE, CANDIDATE) pair. */
2810 static struct cost_pair
*
2811 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2812 struct iv_cand
*cand
)
2815 struct cost_pair
*ret
;
2820 if (data
->consider_all_candidates
)
2822 ret
= use
->cost_map
+ cand
->id
;
2829 /* n_map_members is a power of two, so this computes modulo. */
2830 s
= cand
->id
& (use
->n_map_members
- 1);
2831 for (i
= s
; i
< use
->n_map_members
; i
++)
2832 if (use
->cost_map
[i
].cand
== cand
)
2833 return use
->cost_map
+ i
;
2834 else if (use
->cost_map
[i
].cand
== NULL
)
2836 for (i
= 0; i
< s
; i
++)
2837 if (use
->cost_map
[i
].cand
== cand
)
2838 return use
->cost_map
+ i
;
2839 else if (use
->cost_map
[i
].cand
== NULL
)
2845 /* Returns estimate on cost of computing SEQ. */
2848 seq_cost (rtx seq
, bool speed
)
2853 for (; seq
; seq
= NEXT_INSN (seq
))
2855 set
= single_set (seq
);
2857 cost
+= set_src_cost (SET_SRC (set
), speed
);
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2867 produce_memory_decl_rtl (tree obj
, int *regno
)
2869 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2870 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2874 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2876 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2877 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2878 SET_SYMBOL_REF_DECL (x
, obj
);
2879 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2880 set_mem_addr_space (x
, as
);
2881 targetm
.encode_section_info (obj
, x
, true);
2885 x
= gen_raw_REG (address_mode
, (*regno
)++);
2886 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2887 set_mem_addr_space (x
, as
);
2893 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2894 walk_tree. DATA contains the actual fake register number. */
2897 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2899 tree obj
= NULL_TREE
;
2901 int *regno
= (int *) data
;
2903 switch (TREE_CODE (*expr_p
))
2906 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2907 handled_component_p (*expr_p
);
2908 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2911 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2912 x
= produce_memory_decl_rtl (obj
, regno
);
2917 obj
= SSA_NAME_VAR (*expr_p
);
2918 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2921 if (!DECL_RTL_SET_P (obj
))
2922 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2931 if (DECL_RTL_SET_P (obj
))
2934 if (DECL_MODE (obj
) == BLKmode
)
2935 x
= produce_memory_decl_rtl (obj
, regno
);
2937 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2947 decl_rtl_to_reset
.safe_push (obj
);
2948 SET_DECL_RTL (obj
, x
);
2954 /* Determines cost of the computation of EXPR. */
2957 computation_cost (tree expr
, bool speed
)
2960 tree type
= TREE_TYPE (expr
);
2962 /* Avoid using hard regs in ways which may be unsupported. */
2963 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2964 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2965 enum node_frequency real_frequency
= node
->frequency
;
2967 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2968 crtl
->maybe_hot_insn_p
= speed
;
2969 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2971 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2974 default_rtl_profile ();
2975 node
->frequency
= real_frequency
;
2977 cost
= seq_cost (seq
, speed
);
2979 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2980 TYPE_ADDR_SPACE (type
), speed
);
2981 else if (!REG_P (rslt
))
2982 cost
+= set_src_cost (rslt
, speed
);
2987 /* Returns variable containing the value of candidate CAND at statement AT. */
2990 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2992 if (stmt_after_increment (loop
, cand
, stmt
))
2993 return cand
->var_after
;
2995 return cand
->var_before
;
2998 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2999 same precision that is at least as wide as the precision of TYPE, stores
3000 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3004 determine_common_wider_type (tree
*a
, tree
*b
)
3006 tree wider_type
= NULL
;
3008 tree atype
= TREE_TYPE (*a
);
3010 if (CONVERT_EXPR_P (*a
))
3012 suba
= TREE_OPERAND (*a
, 0);
3013 wider_type
= TREE_TYPE (suba
);
3014 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3020 if (CONVERT_EXPR_P (*b
))
3022 subb
= TREE_OPERAND (*b
, 0);
3023 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3034 /* Determines the expression by that USE is expressed from induction variable
3035 CAND at statement AT in LOOP. The expression is stored in a decomposed
3036 form into AFF. Returns false if USE cannot be expressed using CAND. */
3039 get_computation_aff (struct loop
*loop
,
3040 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3041 struct aff_tree
*aff
)
3043 tree ubase
= use
->iv
->base
;
3044 tree ustep
= use
->iv
->step
;
3045 tree cbase
= cand
->iv
->base
;
3046 tree cstep
= cand
->iv
->step
, cstep_common
;
3047 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3048 tree common_type
, var
;
3050 aff_tree cbase_aff
, var_aff
;
3053 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3055 /* We do not have a precision to express the values of use. */
3059 var
= var_at_stmt (loop
, cand
, at
);
3060 uutype
= unsigned_type_for (utype
);
3062 /* If the conversion is not noop, perform it. */
3063 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3065 cstep
= fold_convert (uutype
, cstep
);
3066 cbase
= fold_convert (uutype
, cbase
);
3067 var
= fold_convert (uutype
, var
);
3070 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3073 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3074 type, we achieve better folding by computing their difference in this
3075 wider type, and cast the result to UUTYPE. We do not need to worry about
3076 overflows, as all the arithmetics will in the end be performed in UUTYPE
3078 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3080 /* use = ubase - ratio * cbase + ratio * var. */
3081 tree_to_aff_combination (ubase
, common_type
, aff
);
3082 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3083 tree_to_aff_combination (var
, uutype
, &var_aff
);
3085 /* We need to shift the value if we are after the increment. */
3086 if (stmt_after_increment (loop
, cand
, at
))
3090 if (common_type
!= uutype
)
3091 cstep_common
= fold_convert (common_type
, cstep
);
3093 cstep_common
= cstep
;
3095 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3096 aff_combination_add (&cbase_aff
, &cstep_aff
);
3099 aff_combination_scale (&cbase_aff
, -rat
);
3100 aff_combination_add (aff
, &cbase_aff
);
3101 if (common_type
!= uutype
)
3102 aff_combination_convert (aff
, uutype
);
3104 aff_combination_scale (&var_aff
, rat
);
3105 aff_combination_add (aff
, &var_aff
);
3110 /* Return the type of USE. */
3113 get_use_type (struct iv_use
*use
)
3115 tree base_type
= TREE_TYPE (use
->iv
->base
);
3118 if (use
->type
== USE_ADDRESS
)
3120 /* The base_type may be a void pointer. Create a pointer type based on
3121 the mem_ref instead. */
3122 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3123 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3124 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3132 /* Determines the expression by that USE is expressed from induction variable
3133 CAND at statement AT in LOOP. The computation is unshared. */
3136 get_computation_at (struct loop
*loop
,
3137 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3140 tree type
= get_use_type (use
);
3142 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3144 unshare_aff_combination (&aff
);
3145 return fold_convert (type
, aff_combination_to_tree (&aff
));
3148 /* Determines the expression by that USE is expressed from induction variable
3149 CAND in LOOP. The computation is unshared. */
3152 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3154 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3157 /* Adjust the cost COST for being in loop setup rather than loop body.
3158 If we're optimizing for space, the loop setup overhead is constant;
3159 if we're optimizing for speed, amortize it over the per-iteration cost. */
3161 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3165 else if (optimize_loop_for_speed_p (data
->current_loop
))
3166 return cost
/ avg_loop_niter (data
->current_loop
);
3171 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3172 validity for a memory reference accessing memory of mode MODE in
3173 address space AS. */
3177 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3180 #define MAX_RATIO 128
3181 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3182 static vec
<sbitmap
> valid_mult_list
;
3185 if (data_index
>= valid_mult_list
.length ())
3186 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3188 valid_mult
= valid_mult_list
[data_index
];
3191 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3192 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3193 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3197 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3198 bitmap_clear (valid_mult
);
3199 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3200 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3201 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3203 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3204 if (memory_address_addr_space_p (mode
, addr
, as
)
3205 || memory_address_addr_space_p (mode
, scaled
, as
))
3206 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3211 fprintf (dump_file
, " allowed multipliers:");
3212 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3213 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3214 fprintf (dump_file
, " %d", (int) i
);
3215 fprintf (dump_file
, "\n");
3216 fprintf (dump_file
, "\n");
3219 valid_mult_list
[data_index
] = valid_mult
;
3222 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3225 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3228 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3229 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3230 variable is omitted. Compute the cost for a memory reference that accesses
3231 a memory location of mode MEM_MODE in address space AS.
3233 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3234 size of MEM_MODE / RATIO) is available. To make this determination, we
3235 look at the size of the increment to be made, which is given in CSTEP.
3236 CSTEP may be zero if the step is unknown.
3237 STMT_AFTER_INC is true iff the statement we're looking at is after the
3238 increment of the original biv.
3240 TODO -- there must be some better way. This all is quite crude. */
3244 AINC_PRE_INC
, /* Pre increment. */
3245 AINC_PRE_DEC
, /* Pre decrement. */
3246 AINC_POST_INC
, /* Post increment. */
3247 AINC_POST_DEC
, /* Post decrement. */
3248 AINC_NONE
/* Also the number of auto increment types. */
3251 typedef struct address_cost_data_s
3253 HOST_WIDE_INT min_offset
, max_offset
;
3254 unsigned costs
[2][2][2][2];
3255 unsigned ainc_costs
[AINC_NONE
];
3256 } *address_cost_data
;
3260 get_address_cost (bool symbol_present
, bool var_present
,
3261 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3262 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3263 addr_space_t as
, bool speed
,
3264 bool stmt_after_inc
, bool *may_autoinc
)
3266 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3267 static vec
<address_cost_data
> address_cost_data_list
;
3268 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3269 address_cost_data data
;
3270 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3271 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3272 unsigned cost
, acost
, complexity
;
3273 enum ainc_type autoinc_type
;
3274 bool offset_p
, ratio_p
, autoinc
;
3275 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3276 unsigned HOST_WIDE_INT mask
;
3279 if (data_index
>= address_cost_data_list
.length ())
3280 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3282 data
= address_cost_data_list
[data_index
];
3286 HOST_WIDE_INT rat
, off
= 0;
3287 int old_cse_not_expected
, width
;
3288 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3289 rtx seq
, addr
, base
;
3292 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3294 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3296 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3297 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3298 width
= HOST_BITS_PER_WIDE_INT
- 1;
3299 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3301 for (i
= width
; i
>= 0; i
--)
3303 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3304 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3305 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3308 data
->min_offset
= (i
== -1? 0 : off
);
3310 for (i
= width
; i
>= 0; i
--)
3312 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3313 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3314 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3316 /* For some TARGET, like ARM THUMB1, the offset should be nature
3317 aligned. Try an aligned offset if address_mode is not QImode. */
3318 off
= (address_mode
== QImode
)
3320 : ((unsigned HOST_WIDE_INT
) 1 << i
)
3321 - GET_MODE_SIZE (address_mode
);
3324 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3325 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3331 data
->max_offset
= off
;
3333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3335 fprintf (dump_file
, "get_address_cost:\n");
3336 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3337 GET_MODE_NAME (mem_mode
),
3339 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3340 GET_MODE_NAME (mem_mode
),
3345 for (i
= 2; i
<= MAX_RATIO
; i
++)
3346 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3352 /* Compute the cost of various addressing modes. */
3354 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3355 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3357 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3358 || USE_STORE_PRE_DECREMENT (mem_mode
))
3360 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3361 has_predec
[mem_mode
]
3362 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3364 if (has_predec
[mem_mode
])
3365 data
->ainc_costs
[AINC_PRE_DEC
]
3366 = address_cost (addr
, mem_mode
, as
, speed
);
3368 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3369 || USE_STORE_POST_DECREMENT (mem_mode
))
3371 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3372 has_postdec
[mem_mode
]
3373 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3375 if (has_postdec
[mem_mode
])
3376 data
->ainc_costs
[AINC_POST_DEC
]
3377 = address_cost (addr
, mem_mode
, as
, speed
);
3379 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3380 || USE_STORE_PRE_DECREMENT (mem_mode
))
3382 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3383 has_preinc
[mem_mode
]
3384 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3386 if (has_preinc
[mem_mode
])
3387 data
->ainc_costs
[AINC_PRE_INC
]
3388 = address_cost (addr
, mem_mode
, as
, speed
);
3390 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3391 || USE_STORE_POST_INCREMENT (mem_mode
))
3393 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3394 has_postinc
[mem_mode
]
3395 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3397 if (has_postinc
[mem_mode
])
3398 data
->ainc_costs
[AINC_POST_INC
]
3399 = address_cost (addr
, mem_mode
, as
, speed
);
3401 for (i
= 0; i
< 16; i
++)
3404 var_p
= (i
>> 1) & 1;
3405 off_p
= (i
>> 2) & 1;
3406 rat_p
= (i
>> 3) & 1;
3410 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3411 gen_int_mode (rat
, address_mode
));
3414 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3418 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3419 /* ??? We can run into trouble with some backends by presenting
3420 it with symbols which haven't been properly passed through
3421 targetm.encode_section_info. By setting the local bit, we
3422 enhance the probability of things working. */
3423 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3426 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3428 (PLUS
, address_mode
, base
,
3429 gen_int_mode (off
, address_mode
)));
3432 base
= gen_int_mode (off
, address_mode
);
3437 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3440 /* To avoid splitting addressing modes, pretend that no cse will
3442 old_cse_not_expected
= cse_not_expected
;
3443 cse_not_expected
= true;
3444 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3445 cse_not_expected
= old_cse_not_expected
;
3449 acost
= seq_cost (seq
, speed
);
3450 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3454 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3457 /* On some targets, it is quite expensive to load symbol to a register,
3458 which makes addresses that contain symbols look much more expensive.
3459 However, the symbol will have to be loaded in any case before the
3460 loop (and quite likely we have it in register already), so it does not
3461 make much sense to penalize them too heavily. So make some final
3462 tweaks for the SYMBOL_PRESENT modes:
3464 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3465 var is cheaper, use this mode with small penalty.
3466 If VAR_PRESENT is true, try whether the mode with
3467 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3468 if this is the case, use it. */
3469 add_c
= add_cost (speed
, address_mode
);
3470 for (i
= 0; i
< 8; i
++)
3473 off_p
= (i
>> 1) & 1;
3474 rat_p
= (i
>> 2) & 1;
3476 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3480 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3481 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3486 fprintf (dump_file
, "Address costs:\n");
3488 for (i
= 0; i
< 16; i
++)
3491 var_p
= (i
>> 1) & 1;
3492 off_p
= (i
>> 2) & 1;
3493 rat_p
= (i
>> 3) & 1;
3495 fprintf (dump_file
, " ");
3497 fprintf (dump_file
, "sym + ");
3499 fprintf (dump_file
, "var + ");
3501 fprintf (dump_file
, "cst + ");
3503 fprintf (dump_file
, "rat * ");
3505 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3506 fprintf (dump_file
, "index costs %d\n", acost
);
3508 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3509 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3510 fprintf (dump_file
, " May include autoinc/dec\n");
3511 fprintf (dump_file
, "\n");
3514 address_cost_data_list
[data_index
] = data
;
3517 bits
= GET_MODE_BITSIZE (address_mode
);
3518 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3520 if ((offset
>> (bits
- 1) & 1))
3525 autoinc_type
= AINC_NONE
;
3526 msize
= GET_MODE_SIZE (mem_mode
);
3527 autoinc_offset
= offset
;
3529 autoinc_offset
+= ratio
* cstep
;
3530 if (symbol_present
|| var_present
|| ratio
!= 1)
3534 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3536 autoinc_type
= AINC_POST_INC
;
3537 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3539 autoinc_type
= AINC_POST_DEC
;
3540 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3542 autoinc_type
= AINC_PRE_INC
;
3543 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3545 autoinc_type
= AINC_PRE_DEC
;
3547 if (autoinc_type
!= AINC_NONE
)
3552 offset_p
= (s_offset
!= 0
3553 && data
->min_offset
<= s_offset
3554 && s_offset
<= data
->max_offset
);
3555 ratio_p
= (ratio
!= 1
3556 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3558 if (ratio
!= 1 && !ratio_p
)
3559 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3561 if (s_offset
&& !offset_p
&& !symbol_present
)
3562 cost
+= add_cost (speed
, address_mode
);
3565 *may_autoinc
= autoinc
;
3567 acost
= data
->ainc_costs
[autoinc_type
];
3569 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3570 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3571 return new_cost (cost
+ acost
, complexity
);
3574 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3575 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3576 calculating the operands of EXPR. Returns true if successful, and returns
3577 the cost in COST. */
3580 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3581 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3584 tree op1
= TREE_OPERAND (expr
, 1);
3585 tree cst
= TREE_OPERAND (mult
, 1);
3586 tree multop
= TREE_OPERAND (mult
, 0);
3587 int m
= exact_log2 (int_cst_value (cst
));
3588 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3590 bool equal_p
= false;
3592 if (!(m
>= 0 && m
< maxm
))
3595 if (operand_equal_p (op1
, mult
, 0))
3598 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3599 ? shiftadd_cost (speed
, mode
, m
)
3601 ? shiftsub1_cost (speed
, mode
, m
)
3602 : shiftsub0_cost (speed
, mode
, m
)));
3603 res
= new_cost (sa_cost
, 0);
3604 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3606 STRIP_NOPS (multop
);
3607 if (!is_gimple_val (multop
))
3608 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3614 /* Estimates cost of forcing expression EXPR into a variable. */
3617 force_expr_to_var_cost (tree expr
, bool speed
)
3619 static bool costs_initialized
= false;
3620 static unsigned integer_cost
[2];
3621 static unsigned symbol_cost
[2];
3622 static unsigned address_cost
[2];
3624 comp_cost cost0
, cost1
, cost
;
3625 enum machine_mode mode
;
3627 if (!costs_initialized
)
3629 tree type
= build_pointer_type (integer_type_node
);
3634 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3635 TREE_STATIC (var
) = 1;
3636 x
= produce_memory_decl_rtl (var
, NULL
);
3637 SET_DECL_RTL (var
, x
);
3639 addr
= build1 (ADDR_EXPR
, type
, var
);
3642 for (i
= 0; i
< 2; i
++)
3644 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3647 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3650 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3651 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3653 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3654 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3655 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3656 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3657 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3658 fprintf (dump_file
, "\n");
3662 costs_initialized
= true;
3667 if (SSA_VAR_P (expr
))
3670 if (is_gimple_min_invariant (expr
))
3672 if (TREE_CODE (expr
) == INTEGER_CST
)
3673 return new_cost (integer_cost
[speed
], 0);
3675 if (TREE_CODE (expr
) == ADDR_EXPR
)
3677 tree obj
= TREE_OPERAND (expr
, 0);
3679 if (TREE_CODE (obj
) == VAR_DECL
3680 || TREE_CODE (obj
) == PARM_DECL
3681 || TREE_CODE (obj
) == RESULT_DECL
)
3682 return new_cost (symbol_cost
[speed
], 0);
3685 return new_cost (address_cost
[speed
], 0);
3688 switch (TREE_CODE (expr
))
3690 case POINTER_PLUS_EXPR
:
3694 op0
= TREE_OPERAND (expr
, 0);
3695 op1
= TREE_OPERAND (expr
, 1);
3702 op0
= TREE_OPERAND (expr
, 0);
3708 /* Just an arbitrary value, FIXME. */
3709 return new_cost (target_spill_cost
[speed
], 0);
3712 if (op0
== NULL_TREE
3713 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3716 cost0
= force_expr_to_var_cost (op0
, speed
);
3718 if (op1
== NULL_TREE
3719 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3722 cost1
= force_expr_to_var_cost (op1
, speed
);
3724 mode
= TYPE_MODE (TREE_TYPE (expr
));
3725 switch (TREE_CODE (expr
))
3727 case POINTER_PLUS_EXPR
:
3731 cost
= new_cost (add_cost (speed
, mode
), 0);
3732 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3734 tree mult
= NULL_TREE
;
3736 if (TREE_CODE (op1
) == MULT_EXPR
)
3738 else if (TREE_CODE (op0
) == MULT_EXPR
)
3741 if (mult
!= NULL_TREE
3742 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3743 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3751 tree inner_mode
, outer_mode
;
3752 outer_mode
= TREE_TYPE (expr
);
3753 inner_mode
= TREE_TYPE (op0
);
3754 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3755 TYPE_MODE (inner_mode
), speed
), 0);
3760 if (cst_and_fits_in_hwi (op0
))
3761 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3763 else if (cst_and_fits_in_hwi (op1
))
3764 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3767 return new_cost (target_spill_cost
[speed
], 0);
3774 cost
= add_costs (cost
, cost0
);
3775 cost
= add_costs (cost
, cost1
);
3777 /* Bound the cost by target_spill_cost. The parts of complicated
3778 computations often are either loop invariant or at least can
3779 be shared between several iv uses, so letting this grow without
3780 limits would not give reasonable results. */
3781 if (cost
.cost
> (int) target_spill_cost
[speed
])
3782 cost
.cost
= target_spill_cost
[speed
];
3787 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3788 invariants the computation depends on. */
3791 force_var_cost (struct ivopts_data
*data
,
3792 tree expr
, bitmap
*depends_on
)
3796 fd_ivopts_data
= data
;
3797 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3800 return force_expr_to_var_cost (expr
, data
->speed
);
3803 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3804 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3805 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3806 invariants the computation depends on. */
3809 split_address_cost (struct ivopts_data
*data
,
3810 tree addr
, bool *symbol_present
, bool *var_present
,
3811 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3814 HOST_WIDE_INT bitsize
;
3815 HOST_WIDE_INT bitpos
;
3817 enum machine_mode mode
;
3818 int unsignedp
, volatilep
;
3820 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3821 &unsignedp
, &volatilep
, false);
3824 || bitpos
% BITS_PER_UNIT
!= 0
3825 || TREE_CODE (core
) != VAR_DECL
)
3827 *symbol_present
= false;
3828 *var_present
= true;
3829 fd_ivopts_data
= data
;
3830 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3831 return new_cost (target_spill_cost
[data
->speed
], 0);
3834 *offset
+= bitpos
/ BITS_PER_UNIT
;
3835 if (TREE_STATIC (core
)
3836 || DECL_EXTERNAL (core
))
3838 *symbol_present
= true;
3839 *var_present
= false;
3843 *symbol_present
= false;
3844 *var_present
= true;
3848 /* Estimates cost of expressing difference of addresses E1 - E2 as
3849 var + symbol + offset. The value of offset is added to OFFSET,
3850 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3851 part is missing. DEPENDS_ON is a set of the invariants the computation
3855 ptr_difference_cost (struct ivopts_data
*data
,
3856 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3857 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3859 HOST_WIDE_INT diff
= 0;
3860 aff_tree aff_e1
, aff_e2
;
3863 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3865 if (ptr_difference_const (e1
, e2
, &diff
))
3868 *symbol_present
= false;
3869 *var_present
= false;
3873 if (integer_zerop (e2
))
3874 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3875 symbol_present
, var_present
, offset
, depends_on
);
3877 *symbol_present
= false;
3878 *var_present
= true;
3880 type
= signed_type_for (TREE_TYPE (e1
));
3881 tree_to_aff_combination (e1
, type
, &aff_e1
);
3882 tree_to_aff_combination (e2
, type
, &aff_e2
);
3883 aff_combination_scale (&aff_e2
, -1);
3884 aff_combination_add (&aff_e1
, &aff_e2
);
3886 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3889 /* Estimates cost of expressing difference E1 - E2 as
3890 var + symbol + offset. The value of offset is added to OFFSET,
3891 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3892 part is missing. DEPENDS_ON is a set of the invariants the computation
3896 difference_cost (struct ivopts_data
*data
,
3897 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3898 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3900 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3901 unsigned HOST_WIDE_INT off1
, off2
;
3902 aff_tree aff_e1
, aff_e2
;
3905 e1
= strip_offset (e1
, &off1
);
3906 e2
= strip_offset (e2
, &off2
);
3907 *offset
+= off1
- off2
;
3912 if (TREE_CODE (e1
) == ADDR_EXPR
)
3913 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3914 offset
, depends_on
);
3915 *symbol_present
= false;
3917 if (operand_equal_p (e1
, e2
, 0))
3919 *var_present
= false;
3923 *var_present
= true;
3925 if (integer_zerop (e2
))
3926 return force_var_cost (data
, e1
, depends_on
);
3928 if (integer_zerop (e1
))
3930 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3931 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3935 type
= signed_type_for (TREE_TYPE (e1
));
3936 tree_to_aff_combination (e1
, type
, &aff_e1
);
3937 tree_to_aff_combination (e2
, type
, &aff_e2
);
3938 aff_combination_scale (&aff_e2
, -1);
3939 aff_combination_add (&aff_e1
, &aff_e2
);
3941 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3944 /* Returns true if AFF1 and AFF2 are identical. */
3947 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3951 if (aff1
->n
!= aff2
->n
)
3954 for (i
= 0; i
< aff1
->n
; i
++)
3956 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3959 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3965 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3968 get_expr_id (struct ivopts_data
*data
, tree expr
)
3970 struct iv_inv_expr_ent ent
;
3971 struct iv_inv_expr_ent
**slot
;
3974 ent
.hash
= iterative_hash_expr (expr
, 0);
3975 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3979 *slot
= XNEW (struct iv_inv_expr_ent
);
3980 (*slot
)->expr
= expr
;
3981 (*slot
)->hash
= ent
.hash
;
3982 (*slot
)->id
= data
->inv_expr_id
++;
3986 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3987 requires a new compiler generated temporary. Returns -1 otherwise.
3988 ADDRESS_P is a flag indicating if the expression is for address
3992 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3993 tree cbase
, HOST_WIDE_INT ratio
,
3996 aff_tree ubase_aff
, cbase_aff
;
4004 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4005 && (TREE_CODE (cbase
) == INTEGER_CST
))
4008 /* Strips the constant part. */
4009 if (TREE_CODE (ubase
) == PLUS_EXPR
4010 || TREE_CODE (ubase
) == MINUS_EXPR
4011 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4013 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4014 ubase
= TREE_OPERAND (ubase
, 0);
4017 /* Strips the constant part. */
4018 if (TREE_CODE (cbase
) == PLUS_EXPR
4019 || TREE_CODE (cbase
) == MINUS_EXPR
4020 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4022 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4023 cbase
= TREE_OPERAND (cbase
, 0);
4028 if (((TREE_CODE (ubase
) == SSA_NAME
)
4029 || (TREE_CODE (ubase
) == ADDR_EXPR
4030 && is_gimple_min_invariant (ubase
)))
4031 && (TREE_CODE (cbase
) == INTEGER_CST
))
4034 if (((TREE_CODE (cbase
) == SSA_NAME
)
4035 || (TREE_CODE (cbase
) == ADDR_EXPR
4036 && is_gimple_min_invariant (cbase
)))
4037 && (TREE_CODE (ubase
) == INTEGER_CST
))
4043 if (operand_equal_p (ubase
, cbase
, 0))
4046 if (TREE_CODE (ubase
) == ADDR_EXPR
4047 && TREE_CODE (cbase
) == ADDR_EXPR
)
4051 usym
= TREE_OPERAND (ubase
, 0);
4052 csym
= TREE_OPERAND (cbase
, 0);
4053 if (TREE_CODE (usym
) == ARRAY_REF
)
4055 tree ind
= TREE_OPERAND (usym
, 1);
4056 if (TREE_CODE (ind
) == INTEGER_CST
4057 && tree_fits_shwi_p (ind
)
4058 && tree_to_shwi (ind
) == 0)
4059 usym
= TREE_OPERAND (usym
, 0);
4061 if (TREE_CODE (csym
) == ARRAY_REF
)
4063 tree ind
= TREE_OPERAND (csym
, 1);
4064 if (TREE_CODE (ind
) == INTEGER_CST
4065 && tree_fits_shwi_p (ind
)
4066 && tree_to_shwi (ind
) == 0)
4067 csym
= TREE_OPERAND (csym
, 0);
4069 if (operand_equal_p (usym
, csym
, 0))
4072 /* Now do more complex comparison */
4073 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4074 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4075 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4079 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4080 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4082 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4083 aff_combination_add (&ubase_aff
, &cbase_aff
);
4084 expr
= aff_combination_to_tree (&ubase_aff
);
4085 return get_expr_id (data
, expr
);
4090 /* Determines the cost of the computation by that USE is expressed
4091 from induction variable CAND. If ADDRESS_P is true, we just need
4092 to create an address from it, otherwise we want to get it into
4093 register. A set of invariants we depend on is stored in
4094 DEPENDS_ON. AT is the statement at that the value is computed.
4095 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4096 addressing is likely. */
4099 get_computation_cost_at (struct ivopts_data
*data
,
4100 struct iv_use
*use
, struct iv_cand
*cand
,
4101 bool address_p
, bitmap
*depends_on
, gimple at
,
4105 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4107 tree utype
= TREE_TYPE (ubase
), ctype
;
4108 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4109 HOST_WIDE_INT ratio
, aratio
;
4110 bool var_present
, symbol_present
, stmt_is_after_inc
;
4113 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4114 enum machine_mode mem_mode
= (address_p
4115 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4120 /* Only consider real candidates. */
4122 return infinite_cost
;
4124 cbase
= cand
->iv
->base
;
4125 cstep
= cand
->iv
->step
;
4126 ctype
= TREE_TYPE (cbase
);
4128 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4130 /* We do not have a precision to express the values of use. */
4131 return infinite_cost
;
4135 || (use
->iv
->base_object
4136 && cand
->iv
->base_object
4137 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4138 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4140 /* Do not try to express address of an object with computation based
4141 on address of a different object. This may cause problems in rtl
4142 level alias analysis (that does not expect this to be happening,
4143 as this is illegal in C), and would be unlikely to be useful
4145 if (use
->iv
->base_object
4146 && cand
->iv
->base_object
4147 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4148 return infinite_cost
;
4151 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4153 /* TODO -- add direct handling of this case. */
4157 /* CSTEPI is removed from the offset in case statement is after the
4158 increment. If the step is not constant, we use zero instead.
4159 This is a bit imprecise (there is the extra addition), but
4160 redundancy elimination is likely to transform the code so that
4161 it uses value of the variable before increment anyway,
4162 so it is not that much unrealistic. */
4163 if (cst_and_fits_in_hwi (cstep
))
4164 cstepi
= int_cst_value (cstep
);
4168 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4169 return infinite_cost
;
4171 if (wi::fits_shwi_p (rat
))
4172 ratio
= rat
.to_shwi ();
4174 return infinite_cost
;
4177 ctype
= TREE_TYPE (cbase
);
4179 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4181 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4182 or ratio == 1, it is better to handle this like
4184 ubase - ratio * cbase + ratio * var
4186 (also holds in the case ratio == -1, TODO. */
4188 if (cst_and_fits_in_hwi (cbase
))
4190 offset
= - ratio
* int_cst_value (cbase
);
4191 cost
= difference_cost (data
,
4192 ubase
, build_int_cst (utype
, 0),
4193 &symbol_present
, &var_present
, &offset
,
4195 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4197 else if (ratio
== 1)
4199 tree real_cbase
= cbase
;
4201 /* Check to see if any adjustment is needed. */
4202 if (cstepi
== 0 && stmt_is_after_inc
)
4204 aff_tree real_cbase_aff
;
4207 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4209 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4211 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4212 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4215 cost
= difference_cost (data
,
4217 &symbol_present
, &var_present
, &offset
,
4219 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4222 && !POINTER_TYPE_P (ctype
)
4223 && multiplier_allowed_in_address_p
4225 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4228 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4229 cost
= difference_cost (data
,
4231 &symbol_present
, &var_present
, &offset
,
4233 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4237 cost
= force_var_cost (data
, cbase
, depends_on
);
4238 cost
= add_costs (cost
,
4239 difference_cost (data
,
4240 ubase
, build_int_cst (utype
, 0),
4241 &symbol_present
, &var_present
,
4242 &offset
, depends_on
));
4243 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4244 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4250 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4251 /* Clear depends on. */
4252 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4253 bitmap_clear (*depends_on
);
4256 /* If we are after the increment, the value of the candidate is higher by
4258 if (stmt_is_after_inc
)
4259 offset
-= ratio
* cstepi
;
4261 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4262 (symbol/var1/const parts may be omitted). If we are looking for an
4263 address, find the cost of addressing this. */
4265 return add_costs (cost
,
4266 get_address_cost (symbol_present
, var_present
,
4267 offset
, ratio
, cstepi
,
4269 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4270 speed
, stmt_is_after_inc
,
4273 /* Otherwise estimate the costs for computing the expression. */
4274 if (!symbol_present
&& !var_present
&& !offset
)
4277 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4281 /* Symbol + offset should be compile-time computable so consider that they
4282 are added once to the variable, if present. */
4283 if (var_present
&& (symbol_present
|| offset
))
4284 cost
.cost
+= adjust_setup_cost (data
,
4285 add_cost (speed
, TYPE_MODE (ctype
)));
4287 /* Having offset does not affect runtime cost in case it is added to
4288 symbol, but it increases complexity. */
4292 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4294 aratio
= ratio
> 0 ? ratio
: -ratio
;
4296 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4301 *can_autoinc
= false;
4304 /* Just get the expression, expand it and measure the cost. */
4305 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4308 return infinite_cost
;
4311 comp
= build_simple_mem_ref (comp
);
4313 return new_cost (computation_cost (comp
, speed
), 0);
4317 /* Determines the cost of the computation by that USE is expressed
4318 from induction variable CAND. If ADDRESS_P is true, we just need
4319 to create an address from it, otherwise we want to get it into
4320 register. A set of invariants we depend on is stored in
4321 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4322 autoinc addressing is likely. */
4325 get_computation_cost (struct ivopts_data
*data
,
4326 struct iv_use
*use
, struct iv_cand
*cand
,
4327 bool address_p
, bitmap
*depends_on
,
4328 bool *can_autoinc
, int *inv_expr_id
)
4330 return get_computation_cost_at (data
,
4331 use
, cand
, address_p
, depends_on
, use
->stmt
,
4332 can_autoinc
, inv_expr_id
);
4335 /* Determines cost of basing replacement of USE on CAND in a generic
4339 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4340 struct iv_use
*use
, struct iv_cand
*cand
)
4344 int inv_expr_id
= -1;
4346 /* The simple case first -- if we need to express value of the preserved
4347 original biv, the cost is 0. This also prevents us from counting the
4348 cost of increment twice -- once at this use and once in the cost of
4350 if (cand
->pos
== IP_ORIGINAL
4351 && cand
->incremented_at
== use
->stmt
)
4353 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4358 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4359 NULL
, &inv_expr_id
);
4361 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4364 return !infinite_cost_p (cost
);
4367 /* Determines cost of basing replacement of USE on CAND in an address. */
4370 determine_use_iv_cost_address (struct ivopts_data
*data
,
4371 struct iv_use
*use
, struct iv_cand
*cand
)
4375 int inv_expr_id
= -1;
4376 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4377 &can_autoinc
, &inv_expr_id
);
4379 if (cand
->ainc_use
== use
)
4382 cost
.cost
-= cand
->cost_step
;
4383 /* If we generated the candidate solely for exploiting autoincrement
4384 opportunities, and it turns out it can't be used, set the cost to
4385 infinity to make sure we ignore it. */
4386 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4387 cost
= infinite_cost
;
4389 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4392 return !infinite_cost_p (cost
);
4395 /* Computes value of candidate CAND at position AT in iteration NITER, and
4396 stores it to VAL. */
4399 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4402 aff_tree step
, delta
, nit
;
4403 struct iv
*iv
= cand
->iv
;
4404 tree type
= TREE_TYPE (iv
->base
);
4405 tree steptype
= type
;
4406 if (POINTER_TYPE_P (type
))
4407 steptype
= sizetype
;
4408 steptype
= unsigned_type_for (type
);
4410 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4411 aff_combination_convert (&step
, steptype
);
4412 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4413 aff_combination_convert (&nit
, steptype
);
4414 aff_combination_mult (&nit
, &step
, &delta
);
4415 if (stmt_after_increment (loop
, cand
, at
))
4416 aff_combination_add (&delta
, &step
);
4418 tree_to_aff_combination (iv
->base
, type
, val
);
4419 if (!POINTER_TYPE_P (type
))
4420 aff_combination_convert (val
, steptype
);
4421 aff_combination_add (val
, &delta
);
4424 /* Returns period of induction variable iv. */
4427 iv_period (struct iv
*iv
)
4429 tree step
= iv
->step
, period
, type
;
4432 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4434 type
= unsigned_type_for (TREE_TYPE (step
));
4435 /* Period of the iv is lcm (step, type_range)/step -1,
4436 i.e., N*type_range/step - 1. Since type range is power
4437 of two, N == (step >> num_of_ending_zeros_binary (step),
4438 so the final result is
4440 (type_range >> num_of_ending_zeros_binary (step)) - 1
4443 pow2div
= num_ending_zeros (step
);
4445 period
= build_low_bits_mask (type
,
4446 (TYPE_PRECISION (type
)
4447 - tree_to_uhwi (pow2div
)));
4452 /* Returns the comparison operator used when eliminating the iv USE. */
4454 static enum tree_code
4455 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4457 struct loop
*loop
= data
->current_loop
;
4461 ex_bb
= gimple_bb (use
->stmt
);
4462 exit
= EDGE_SUCC (ex_bb
, 0);
4463 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4464 exit
= EDGE_SUCC (ex_bb
, 1);
4466 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4469 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4470 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4471 calculation is performed in non-wrapping type.
4473 TODO: More generally, we could test for the situation that
4474 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4475 This would require knowing the sign of OFFSET. */
4478 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4480 enum tree_code code
;
4482 aff_tree aff_e1
, aff_e2
, aff_offset
;
4484 if (!nowrap_type_p (TREE_TYPE (base
)))
4487 base
= expand_simple_operations (base
);
4489 if (TREE_CODE (base
) == SSA_NAME
)
4491 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4493 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4496 code
= gimple_assign_rhs_code (stmt
);
4497 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4500 e1
= gimple_assign_rhs1 (stmt
);
4501 e2
= gimple_assign_rhs2 (stmt
);
4505 code
= TREE_CODE (base
);
4506 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4508 e1
= TREE_OPERAND (base
, 0);
4509 e2
= TREE_OPERAND (base
, 1);
4512 /* Use affine expansion as deeper inspection to prove the equality. */
4513 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4514 &aff_e2
, &data
->name_expansion_cache
);
4515 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4516 &aff_offset
, &data
->name_expansion_cache
);
4517 aff_combination_scale (&aff_offset
, -1);
4521 aff_combination_add (&aff_e2
, &aff_offset
);
4522 if (aff_combination_zero_p (&aff_e2
))
4525 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4526 &aff_e1
, &data
->name_expansion_cache
);
4527 aff_combination_add (&aff_e1
, &aff_offset
);
4528 return aff_combination_zero_p (&aff_e1
);
4530 case POINTER_PLUS_EXPR
:
4531 aff_combination_add (&aff_e2
, &aff_offset
);
4532 return aff_combination_zero_p (&aff_e2
);
4539 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4540 comparison with CAND. NITER describes the number of iterations of
4541 the loops. If successful, the comparison in COMP_P is altered accordingly.
4543 We aim to handle the following situation:
4559 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4560 We aim to optimize this to
4568 while (p < p_0 - a + b);
4570 This preserves the correctness, since the pointer arithmetics does not
4571 overflow. More precisely:
4573 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4574 overflow in computing it or the values of p.
4575 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4576 overflow. To prove this, we use the fact that p_0 = base + a. */
4579 iv_elimination_compare_lt (struct ivopts_data
*data
,
4580 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4581 struct tree_niter_desc
*niter
)
4583 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4584 struct aff_tree nit
, tmpa
, tmpb
;
4585 enum tree_code comp
;
4588 /* We need to know that the candidate induction variable does not overflow.
4589 While more complex analysis may be used to prove this, for now just
4590 check that the variable appears in the original program and that it
4591 is computed in a type that guarantees no overflows. */
4592 cand_type
= TREE_TYPE (cand
->iv
->base
);
4593 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4596 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4597 the calculation of the BOUND could overflow, making the comparison
4599 if (!data
->loop_single_exit_p
)
4602 /* We need to be able to decide whether candidate is increasing or decreasing
4603 in order to choose the right comparison operator. */
4604 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4606 step
= int_cst_value (cand
->iv
->step
);
4608 /* Check that the number of iterations matches the expected pattern:
4609 a + 1 > b ? 0 : b - a - 1. */
4610 mbz
= niter
->may_be_zero
;
4611 if (TREE_CODE (mbz
) == GT_EXPR
)
4613 /* Handle a + 1 > b. */
4614 tree op0
= TREE_OPERAND (mbz
, 0);
4615 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4617 a
= TREE_OPERAND (op0
, 0);
4618 b
= TREE_OPERAND (mbz
, 1);
4623 else if (TREE_CODE (mbz
) == LT_EXPR
)
4625 tree op1
= TREE_OPERAND (mbz
, 1);
4627 /* Handle b < a + 1. */
4628 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4630 a
= TREE_OPERAND (op1
, 0);
4631 b
= TREE_OPERAND (mbz
, 0);
4639 /* Expected number of iterations is B - A - 1. Check that it matches
4640 the actual number, i.e., that B - A - NITER = 1. */
4641 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4642 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4643 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4644 aff_combination_scale (&nit
, -1);
4645 aff_combination_scale (&tmpa
, -1);
4646 aff_combination_add (&tmpb
, &tmpa
);
4647 aff_combination_add (&tmpb
, &nit
);
4648 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4651 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4653 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4655 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4656 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4659 /* Determine the new comparison operator. */
4660 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4661 if (*comp_p
== NE_EXPR
)
4663 else if (*comp_p
== EQ_EXPR
)
4664 *comp_p
= invert_tree_comparison (comp
, false);
4671 /* Check whether it is possible to express the condition in USE by comparison
4672 of candidate CAND. If so, store the value compared with to BOUND, and the
4673 comparison operator to COMP. */
4676 may_eliminate_iv (struct ivopts_data
*data
,
4677 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4678 enum tree_code
*comp
)
4683 struct loop
*loop
= data
->current_loop
;
4685 struct tree_niter_desc
*desc
= NULL
;
4687 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4690 /* For now works only for exits that dominate the loop latch.
4691 TODO: extend to other conditions inside loop body. */
4692 ex_bb
= gimple_bb (use
->stmt
);
4693 if (use
->stmt
!= last_stmt (ex_bb
)
4694 || gimple_code (use
->stmt
) != GIMPLE_COND
4695 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4698 exit
= EDGE_SUCC (ex_bb
, 0);
4699 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4700 exit
= EDGE_SUCC (ex_bb
, 1);
4701 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4704 desc
= niter_for_exit (data
, exit
);
4708 /* Determine whether we can use the variable to test the exit condition.
4709 This is the case iff the period of the induction variable is greater
4710 than the number of iterations for which the exit condition is true. */
4711 period
= iv_period (cand
->iv
);
4713 /* If the number of iterations is constant, compare against it directly. */
4714 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4716 /* See cand_value_at. */
4717 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4719 if (!tree_int_cst_lt (desc
->niter
, period
))
4724 if (tree_int_cst_lt (period
, desc
->niter
))
4729 /* If not, and if this is the only possible exit of the loop, see whether
4730 we can get a conservative estimate on the number of iterations of the
4731 entire loop and compare against that instead. */
4734 widest_int period_value
, max_niter
;
4736 max_niter
= desc
->max
;
4737 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4739 period_value
= wi::to_widest (period
);
4740 if (wi::gtu_p (max_niter
, period_value
))
4742 /* See if we can take advantage of inferred loop bound information. */
4743 if (data
->loop_single_exit_p
)
4745 if (!max_loop_iterations (loop
, &max_niter
))
4747 /* The loop bound is already adjusted by adding 1. */
4748 if (wi::gtu_p (max_niter
, period_value
))
4756 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4758 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4759 aff_combination_to_tree (&bnd
));
4760 *comp
= iv_elimination_compare (data
, use
);
4762 /* It is unlikely that computing the number of iterations using division
4763 would be more profitable than keeping the original induction variable. */
4764 if (expression_expensive_p (*bound
))
4767 /* Sometimes, it is possible to handle the situation that the number of
4768 iterations may be zero unless additional assumtions by using <
4769 instead of != in the exit condition.
4771 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4772 base the exit condition on it. However, that is often too
4774 if (!integer_zerop (desc
->may_be_zero
))
4775 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4780 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4781 be copied, if is is used in the loop body and DATA->body_includes_call. */
4784 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4786 tree sbound
= bound
;
4787 STRIP_NOPS (sbound
);
4789 if (TREE_CODE (sbound
) == SSA_NAME
4790 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4791 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4792 && data
->body_includes_call
)
4793 return COSTS_N_INSNS (1);
4798 /* Determines cost of basing replacement of USE on CAND in a condition. */
4801 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4802 struct iv_use
*use
, struct iv_cand
*cand
)
4804 tree bound
= NULL_TREE
;
4806 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4807 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4809 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4810 tree
*control_var
, *bound_cst
;
4811 enum tree_code comp
= ERROR_MARK
;
4813 /* Only consider real candidates. */
4816 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4821 /* Try iv elimination. */
4822 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4824 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4825 if (elim_cost
.cost
== 0)
4826 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4827 else if (TREE_CODE (bound
) == INTEGER_CST
)
4829 /* If we replace a loop condition 'i < n' with 'p < base + n',
4830 depends_on_elim will have 'base' and 'n' set, which implies
4831 that both 'base' and 'n' will be live during the loop. More likely,
4832 'base + n' will be loop invariant, resulting in only one live value
4833 during the loop. So in that case we clear depends_on_elim and set
4834 elim_inv_expr_id instead. */
4835 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4837 elim_inv_expr_id
= get_expr_id (data
, bound
);
4838 bitmap_clear (depends_on_elim
);
4840 /* The bound is a loop invariant, so it will be only computed
4842 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4845 elim_cost
= infinite_cost
;
4847 /* Try expressing the original giv. If it is compared with an invariant,
4848 note that we cannot get rid of it. */
4849 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4853 /* When the condition is a comparison of the candidate IV against
4854 zero, prefer this IV.
4856 TODO: The constant that we're subtracting from the cost should
4857 be target-dependent. This information should be added to the
4858 target costs for each backend. */
4859 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4860 && integer_zerop (*bound_cst
)
4861 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4862 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4863 elim_cost
.cost
-= 1;
4865 express_cost
= get_computation_cost (data
, use
, cand
, false,
4866 &depends_on_express
, NULL
,
4867 &express_inv_expr_id
);
4868 fd_ivopts_data
= data
;
4869 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4871 /* Count the cost of the original bound as well. */
4872 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4873 if (bound_cost
.cost
== 0)
4874 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4875 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4876 bound_cost
.cost
= 0;
4877 express_cost
.cost
+= bound_cost
.cost
;
4879 /* Choose the better approach, preferring the eliminated IV. */
4880 if (compare_costs (elim_cost
, express_cost
) <= 0)
4883 depends_on
= depends_on_elim
;
4884 depends_on_elim
= NULL
;
4885 inv_expr_id
= elim_inv_expr_id
;
4889 cost
= express_cost
;
4890 depends_on
= depends_on_express
;
4891 depends_on_express
= NULL
;
4894 inv_expr_id
= express_inv_expr_id
;
4897 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4899 if (depends_on_elim
)
4900 BITMAP_FREE (depends_on_elim
);
4901 if (depends_on_express
)
4902 BITMAP_FREE (depends_on_express
);
4904 return !infinite_cost_p (cost
);
4907 /* Determines cost of basing replacement of USE on CAND. Returns false
4908 if USE cannot be based on CAND. */
4911 determine_use_iv_cost (struct ivopts_data
*data
,
4912 struct iv_use
*use
, struct iv_cand
*cand
)
4916 case USE_NONLINEAR_EXPR
:
4917 return determine_use_iv_cost_generic (data
, use
, cand
);
4920 return determine_use_iv_cost_address (data
, use
, cand
);
4923 return determine_use_iv_cost_condition (data
, use
, cand
);
4930 /* Return true if get_computation_cost indicates that autoincrement is
4931 a possibility for the pair of USE and CAND, false otherwise. */
4934 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4935 struct iv_cand
*cand
)
4941 if (use
->type
!= USE_ADDRESS
)
4944 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4945 &can_autoinc
, NULL
);
4947 BITMAP_FREE (depends_on
);
4949 return !infinite_cost_p (cost
) && can_autoinc
;
4952 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4953 use that allows autoincrement, and set their AINC_USE if possible. */
4956 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4960 for (i
= 0; i
< n_iv_cands (data
); i
++)
4962 struct iv_cand
*cand
= iv_cand (data
, i
);
4963 struct iv_use
*closest_before
= NULL
;
4964 struct iv_use
*closest_after
= NULL
;
4965 if (cand
->pos
!= IP_ORIGINAL
)
4968 for (j
= 0; j
< n_iv_uses (data
); j
++)
4970 struct iv_use
*use
= iv_use (data
, j
);
4971 unsigned uid
= gimple_uid (use
->stmt
);
4973 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4976 if (uid
< gimple_uid (cand
->incremented_at
)
4977 && (closest_before
== NULL
4978 || uid
> gimple_uid (closest_before
->stmt
)))
4979 closest_before
= use
;
4981 if (uid
> gimple_uid (cand
->incremented_at
)
4982 && (closest_after
== NULL
4983 || uid
< gimple_uid (closest_after
->stmt
)))
4984 closest_after
= use
;
4987 if (closest_before
!= NULL
4988 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4989 cand
->ainc_use
= closest_before
;
4990 else if (closest_after
!= NULL
4991 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4992 cand
->ainc_use
= closest_after
;
4996 /* Finds the candidates for the induction variables. */
4999 find_iv_candidates (struct ivopts_data
*data
)
5001 /* Add commonly used ivs. */
5002 add_standard_iv_candidates (data
);
5004 /* Add old induction variables. */
5005 add_old_ivs_candidates (data
);
5007 /* Add induction variables derived from uses. */
5008 add_derived_ivs_candidates (data
);
5010 set_autoinc_for_original_candidates (data
);
5012 /* Record the important candidates. */
5013 record_important_candidates (data
);
5016 /* Determines costs of basing the use of the iv on an iv candidate. */
5019 determine_use_iv_costs (struct ivopts_data
*data
)
5023 struct iv_cand
*cand
;
5024 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5026 alloc_use_cost_map (data
);
5028 for (i
= 0; i
< n_iv_uses (data
); i
++)
5030 use
= iv_use (data
, i
);
5032 if (data
->consider_all_candidates
)
5034 for (j
= 0; j
< n_iv_cands (data
); j
++)
5036 cand
= iv_cand (data
, j
);
5037 determine_use_iv_cost (data
, use
, cand
);
5044 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5046 cand
= iv_cand (data
, j
);
5047 if (!determine_use_iv_cost (data
, use
, cand
))
5048 bitmap_set_bit (to_clear
, j
);
5051 /* Remove the candidates for that the cost is infinite from
5052 the list of related candidates. */
5053 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5054 bitmap_clear (to_clear
);
5058 BITMAP_FREE (to_clear
);
5060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5062 fprintf (dump_file
, "Use-candidate costs:\n");
5064 for (i
= 0; i
< n_iv_uses (data
); i
++)
5066 use
= iv_use (data
, i
);
5068 fprintf (dump_file
, "Use %d:\n", i
);
5069 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5070 for (j
= 0; j
< use
->n_map_members
; j
++)
5072 if (!use
->cost_map
[j
].cand
5073 || infinite_cost_p (use
->cost_map
[j
].cost
))
5076 fprintf (dump_file
, " %d\t%d\t%d\t",
5077 use
->cost_map
[j
].cand
->id
,
5078 use
->cost_map
[j
].cost
.cost
,
5079 use
->cost_map
[j
].cost
.complexity
);
5080 if (use
->cost_map
[j
].depends_on
)
5081 bitmap_print (dump_file
,
5082 use
->cost_map
[j
].depends_on
, "","");
5083 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5084 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5085 fprintf (dump_file
, "\n");
5088 fprintf (dump_file
, "\n");
5090 fprintf (dump_file
, "\n");
5094 /* Determines cost of the candidate CAND. */
5097 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5099 comp_cost cost_base
;
5100 unsigned cost
, cost_step
;
5109 /* There are two costs associated with the candidate -- its increment
5110 and its initialization. The second is almost negligible for any loop
5111 that rolls enough, so we take it just very little into account. */
5113 base
= cand
->iv
->base
;
5114 cost_base
= force_var_cost (data
, base
, NULL
);
5115 /* It will be exceptional that the iv register happens to be initialized with
5116 the proper value at no cost. In general, there will at least be a regcopy
5118 if (cost_base
.cost
== 0)
5119 cost_base
.cost
= COSTS_N_INSNS (1);
5120 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5122 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5124 /* Prefer the original ivs unless we may gain something by replacing it.
5125 The reason is to make debugging simpler; so this is not relevant for
5126 artificial ivs created by other optimization passes. */
5127 if (cand
->pos
!= IP_ORIGINAL
5128 || !SSA_NAME_VAR (cand
->var_before
)
5129 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5132 /* Prefer not to insert statements into latch unless there are some
5133 already (so that we do not create unnecessary jumps). */
5134 if (cand
->pos
== IP_END
5135 && empty_block_p (ip_end_pos (data
->current_loop
)))
5139 cand
->cost_step
= cost_step
;
5142 /* Determines costs of computation of the candidates. */
5145 determine_iv_costs (struct ivopts_data
*data
)
5149 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5151 fprintf (dump_file
, "Candidate costs:\n");
5152 fprintf (dump_file
, " cand\tcost\n");
5155 for (i
= 0; i
< n_iv_cands (data
); i
++)
5157 struct iv_cand
*cand
= iv_cand (data
, i
);
5159 determine_iv_cost (data
, cand
);
5161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5162 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5166 fprintf (dump_file
, "\n");
5169 /* Calculates cost for having SIZE induction variables. */
5172 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5174 /* We add size to the cost, so that we prefer eliminating ivs
5176 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5177 data
->body_includes_call
);
5180 /* For each size of the induction variable set determine the penalty. */
5183 determine_set_costs (struct ivopts_data
*data
)
5187 gimple_stmt_iterator psi
;
5189 struct loop
*loop
= data
->current_loop
;
5192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5194 fprintf (dump_file
, "Global costs:\n");
5195 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5196 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5197 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5198 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5202 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5204 phi
= gsi_stmt (psi
);
5205 op
= PHI_RESULT (phi
);
5207 if (virtual_operand_p (op
))
5210 if (get_iv (data
, op
))
5216 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5218 struct version_info
*info
= ver_info (data
, j
);
5220 if (info
->inv_id
&& info
->has_nonlin_use
)
5224 data
->regs_used
= n
;
5225 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5226 fprintf (dump_file
, " regs_used %d\n", n
);
5228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5230 fprintf (dump_file
, " cost for size:\n");
5231 fprintf (dump_file
, " ivs\tcost\n");
5232 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5233 fprintf (dump_file
, " %d\t%d\n", j
,
5234 ivopts_global_cost_for_size (data
, j
));
5235 fprintf (dump_file
, "\n");
5239 /* Returns true if A is a cheaper cost pair than B. */
5242 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5252 cmp
= compare_costs (a
->cost
, b
->cost
);
5259 /* In case the costs are the same, prefer the cheaper candidate. */
5260 if (a
->cand
->cost
< b
->cand
->cost
)
5267 /* Returns candidate by that USE is expressed in IVS. */
5269 static struct cost_pair
*
5270 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5272 return ivs
->cand_for_use
[use
->id
];
5275 /* Computes the cost field of IVS structure. */
5278 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5280 comp_cost cost
= ivs
->cand_use_cost
;
5282 cost
.cost
+= ivs
->cand_cost
;
5284 cost
.cost
+= ivopts_global_cost_for_size (data
,
5285 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5290 /* Remove invariants in set INVS to set IVS. */
5293 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5301 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5303 ivs
->n_invariant_uses
[iid
]--;
5304 if (ivs
->n_invariant_uses
[iid
] == 0)
5309 /* Set USE not to be expressed by any candidate in IVS. */
5312 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5315 unsigned uid
= use
->id
, cid
;
5316 struct cost_pair
*cp
;
5318 cp
= ivs
->cand_for_use
[uid
];
5324 ivs
->cand_for_use
[uid
] = NULL
;
5325 ivs
->n_cand_uses
[cid
]--;
5327 if (ivs
->n_cand_uses
[cid
] == 0)
5329 bitmap_clear_bit (ivs
->cands
, cid
);
5330 /* Do not count the pseudocandidates. */
5334 ivs
->cand_cost
-= cp
->cand
->cost
;
5336 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5339 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5341 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5343 if (cp
->inv_expr_id
!= -1)
5345 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5346 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5347 ivs
->num_used_inv_expr
--;
5349 iv_ca_recount_cost (data
, ivs
);
5352 /* Add invariants in set INVS to set IVS. */
5355 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5363 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5365 ivs
->n_invariant_uses
[iid
]++;
5366 if (ivs
->n_invariant_uses
[iid
] == 1)
5371 /* Set cost pair for USE in set IVS to CP. */
5374 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5375 struct iv_use
*use
, struct cost_pair
*cp
)
5377 unsigned uid
= use
->id
, cid
;
5379 if (ivs
->cand_for_use
[uid
] == cp
)
5382 if (ivs
->cand_for_use
[uid
])
5383 iv_ca_set_no_cp (data
, ivs
, use
);
5390 ivs
->cand_for_use
[uid
] = cp
;
5391 ivs
->n_cand_uses
[cid
]++;
5392 if (ivs
->n_cand_uses
[cid
] == 1)
5394 bitmap_set_bit (ivs
->cands
, cid
);
5395 /* Do not count the pseudocandidates. */
5399 ivs
->cand_cost
+= cp
->cand
->cost
;
5401 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5404 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5405 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5407 if (cp
->inv_expr_id
!= -1)
5409 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5410 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5411 ivs
->num_used_inv_expr
++;
5413 iv_ca_recount_cost (data
, ivs
);
5417 /* Extend set IVS by expressing USE by some of the candidates in it
5418 if possible. All important candidates will be considered
5419 if IMPORTANT_CANDIDATES is true. */
5422 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5423 struct iv_use
*use
, bool important_candidates
)
5425 struct cost_pair
*best_cp
= NULL
, *cp
;
5430 gcc_assert (ivs
->upto
>= use
->id
);
5432 if (ivs
->upto
== use
->id
)
5438 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5439 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5441 struct iv_cand
*cand
= iv_cand (data
, i
);
5443 cp
= get_use_iv_cost (data
, use
, cand
);
5445 if (cheaper_cost_pair (cp
, best_cp
))
5449 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5452 /* Get cost for assignment IVS. */
5455 iv_ca_cost (struct iv_ca
*ivs
)
5457 /* This was a conditional expression but it triggered a bug in
5460 return infinite_cost
;
5465 /* Returns true if all dependences of CP are among invariants in IVS. */
5468 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5473 if (!cp
->depends_on
)
5476 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5478 if (ivs
->n_invariant_uses
[i
] == 0)
5485 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5486 it before NEXT_CHANGE. */
5488 static struct iv_ca_delta
*
5489 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5490 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5492 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5495 change
->old_cp
= old_cp
;
5496 change
->new_cp
= new_cp
;
5497 change
->next_change
= next_change
;
5502 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5505 static struct iv_ca_delta
*
5506 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5508 struct iv_ca_delta
*last
;
5516 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5518 last
->next_change
= l2
;
5523 /* Reverse the list of changes DELTA, forming the inverse to it. */
5525 static struct iv_ca_delta
*
5526 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5528 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5529 struct cost_pair
*tmp
;
5531 for (act
= delta
; act
; act
= next
)
5533 next
= act
->next_change
;
5534 act
->next_change
= prev
;
5538 act
->old_cp
= act
->new_cp
;
5545 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5546 reverted instead. */
5549 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5550 struct iv_ca_delta
*delta
, bool forward
)
5552 struct cost_pair
*from
, *to
;
5553 struct iv_ca_delta
*act
;
5556 delta
= iv_ca_delta_reverse (delta
);
5558 for (act
= delta
; act
; act
= act
->next_change
)
5562 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5563 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5567 iv_ca_delta_reverse (delta
);
5570 /* Returns true if CAND is used in IVS. */
5573 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5575 return ivs
->n_cand_uses
[cand
->id
] > 0;
5578 /* Returns number of induction variable candidates in the set IVS. */
5581 iv_ca_n_cands (struct iv_ca
*ivs
)
5583 return ivs
->n_cands
;
5586 /* Free the list of changes DELTA. */
5589 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5591 struct iv_ca_delta
*act
, *next
;
5593 for (act
= *delta
; act
; act
= next
)
5595 next
= act
->next_change
;
5602 /* Allocates new iv candidates assignment. */
5604 static struct iv_ca
*
5605 iv_ca_new (struct ivopts_data
*data
)
5607 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5611 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5612 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5613 nw
->cands
= BITMAP_ALLOC (NULL
);
5616 nw
->cand_use_cost
= no_cost
;
5618 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5620 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5621 nw
->num_used_inv_expr
= 0;
5626 /* Free memory occupied by the set IVS. */
5629 iv_ca_free (struct iv_ca
**ivs
)
5631 free ((*ivs
)->cand_for_use
);
5632 free ((*ivs
)->n_cand_uses
);
5633 BITMAP_FREE ((*ivs
)->cands
);
5634 free ((*ivs
)->n_invariant_uses
);
5635 free ((*ivs
)->used_inv_expr
);
5640 /* Dumps IVS to FILE. */
5643 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5645 const char *pref
= " invariants ";
5647 comp_cost cost
= iv_ca_cost (ivs
);
5649 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5650 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5651 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5652 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5654 for (i
= 0; i
< ivs
->upto
; i
++)
5656 struct iv_use
*use
= iv_use (data
, i
);
5657 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5659 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5660 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5662 fprintf (file
, " use:%d --> ??\n", use
->id
);
5665 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5666 if (ivs
->n_invariant_uses
[i
])
5668 fprintf (file
, "%s%d", pref
, i
);
5671 fprintf (file
, "\n\n");
5674 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5675 new set, and store differences in DELTA. Number of induction variables
5676 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5677 the function will try to find a solution with mimimal iv candidates. */
5680 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5681 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5682 unsigned *n_ivs
, bool min_ncand
)
5687 struct cost_pair
*old_cp
, *new_cp
;
5690 for (i
= 0; i
< ivs
->upto
; i
++)
5692 use
= iv_use (data
, i
);
5693 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5696 && old_cp
->cand
== cand
)
5699 new_cp
= get_use_iv_cost (data
, use
, cand
);
5703 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5706 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5709 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5712 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5713 cost
= iv_ca_cost (ivs
);
5715 *n_ivs
= iv_ca_n_cands (ivs
);
5716 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5721 /* Try narrowing set IVS by removing CAND. Return the cost of
5722 the new set and store the differences in DELTA. START is
5723 the candidate with which we start narrowing. */
5726 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5727 struct iv_cand
*cand
, struct iv_cand
*start
,
5728 struct iv_ca_delta
**delta
)
5732 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5734 struct iv_cand
*cnd
;
5735 comp_cost cost
, best_cost
, acost
;
5738 for (i
= 0; i
< n_iv_uses (data
); i
++)
5740 use
= iv_use (data
, i
);
5742 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5743 if (old_cp
->cand
!= cand
)
5746 best_cost
= iv_ca_cost (ivs
);
5747 /* Start narrowing with START. */
5748 new_cp
= get_use_iv_cost (data
, use
, start
);
5750 if (data
->consider_all_candidates
)
5752 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5754 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5757 cnd
= iv_cand (data
, ci
);
5759 cp
= get_use_iv_cost (data
, use
, cnd
);
5763 iv_ca_set_cp (data
, ivs
, use
, cp
);
5764 acost
= iv_ca_cost (ivs
);
5766 if (compare_costs (acost
, best_cost
) < 0)
5775 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5777 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5780 cnd
= iv_cand (data
, ci
);
5782 cp
= get_use_iv_cost (data
, use
, cnd
);
5786 iv_ca_set_cp (data
, ivs
, use
, cp
);
5787 acost
= iv_ca_cost (ivs
);
5789 if (compare_costs (acost
, best_cost
) < 0)
5796 /* Restore to old cp for use. */
5797 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5801 iv_ca_delta_free (delta
);
5802 return infinite_cost
;
5805 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5808 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5809 cost
= iv_ca_cost (ivs
);
5810 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5815 /* Try optimizing the set of candidates IVS by removing candidates different
5816 from to EXCEPT_CAND from it. Return cost of the new set, and store
5817 differences in DELTA. */
5820 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5821 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5824 struct iv_ca_delta
*act_delta
, *best_delta
;
5826 comp_cost best_cost
, acost
;
5827 struct iv_cand
*cand
;
5830 best_cost
= iv_ca_cost (ivs
);
5832 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5834 cand
= iv_cand (data
, i
);
5836 if (cand
== except_cand
)
5839 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5841 if (compare_costs (acost
, best_cost
) < 0)
5844 iv_ca_delta_free (&best_delta
);
5845 best_delta
= act_delta
;
5848 iv_ca_delta_free (&act_delta
);
5857 /* Recurse to possibly remove other unnecessary ivs. */
5858 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5859 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5860 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5861 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5865 /* Tries to extend the sets IVS in the best possible way in order
5866 to express the USE. If ORIGINALP is true, prefer candidates from
5867 the original set of IVs, otherwise favor important candidates not
5868 based on any memory object. */
5871 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5872 struct iv_use
*use
, bool originalp
)
5874 comp_cost best_cost
, act_cost
;
5877 struct iv_cand
*cand
;
5878 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5879 struct cost_pair
*cp
;
5881 iv_ca_add_use (data
, ivs
, use
, false);
5882 best_cost
= iv_ca_cost (ivs
);
5884 cp
= iv_ca_cand_for_use (ivs
, use
);
5889 iv_ca_add_use (data
, ivs
, use
, true);
5890 best_cost
= iv_ca_cost (ivs
);
5891 cp
= iv_ca_cand_for_use (ivs
, use
);
5895 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5896 iv_ca_set_no_cp (data
, ivs
, use
);
5899 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5900 first try important candidates not based on any memory object. Only if
5901 this fails, try the specific ones. Rationale -- in loops with many
5902 variables the best choice often is to use just one generic biv. If we
5903 added here many ivs specific to the uses, the optimization algorithm later
5904 would be likely to get stuck in a local minimum, thus causing us to create
5905 too many ivs. The approach from few ivs to more seems more likely to be
5906 successful -- starting from few ivs, replacing an expensive use by a
5907 specific iv should always be a win. */
5908 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5910 cand
= iv_cand (data
, i
);
5912 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5915 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5918 if (iv_ca_cand_used_p (ivs
, cand
))
5921 cp
= get_use_iv_cost (data
, use
, cand
);
5925 iv_ca_set_cp (data
, ivs
, use
, cp
);
5926 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5928 iv_ca_set_no_cp (data
, ivs
, use
);
5929 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5931 if (compare_costs (act_cost
, best_cost
) < 0)
5933 best_cost
= act_cost
;
5935 iv_ca_delta_free (&best_delta
);
5936 best_delta
= act_delta
;
5939 iv_ca_delta_free (&act_delta
);
5942 if (infinite_cost_p (best_cost
))
5944 for (i
= 0; i
< use
->n_map_members
; i
++)
5946 cp
= use
->cost_map
+ i
;
5951 /* Already tried this. */
5952 if (cand
->important
)
5954 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5956 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5960 if (iv_ca_cand_used_p (ivs
, cand
))
5964 iv_ca_set_cp (data
, ivs
, use
, cp
);
5965 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5966 iv_ca_set_no_cp (data
, ivs
, use
);
5967 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5970 if (compare_costs (act_cost
, best_cost
) < 0)
5972 best_cost
= act_cost
;
5975 iv_ca_delta_free (&best_delta
);
5976 best_delta
= act_delta
;
5979 iv_ca_delta_free (&act_delta
);
5983 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5984 iv_ca_delta_free (&best_delta
);
5986 return !infinite_cost_p (best_cost
);
5989 /* Finds an initial assignment of candidates to uses. */
5991 static struct iv_ca
*
5992 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5994 struct iv_ca
*ivs
= iv_ca_new (data
);
5997 for (i
= 0; i
< n_iv_uses (data
); i
++)
5998 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6007 /* Tries to improve set of induction variables IVS. */
6010 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6013 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6014 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6015 struct iv_cand
*cand
;
6017 /* Try extending the set of induction variables by one. */
6018 for (i
= 0; i
< n_iv_cands (data
); i
++)
6020 cand
= iv_cand (data
, i
);
6022 if (iv_ca_cand_used_p (ivs
, cand
))
6025 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6029 /* If we successfully added the candidate and the set is small enough,
6030 try optimizing it by removing other candidates. */
6031 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6033 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6034 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6035 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6036 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6039 if (compare_costs (acost
, best_cost
) < 0)
6042 iv_ca_delta_free (&best_delta
);
6043 best_delta
= act_delta
;
6046 iv_ca_delta_free (&act_delta
);
6051 /* Try removing the candidates from the set instead. */
6052 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6054 /* Nothing more we can do. */
6059 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6060 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6061 iv_ca_delta_free (&best_delta
);
6065 /* Attempts to find the optimal set of induction variables. We do simple
6066 greedy heuristic -- we try to replace at most one candidate in the selected
6067 solution and remove the unused ivs while this improves the cost. */
6069 static struct iv_ca
*
6070 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6074 /* Get the initial solution. */
6075 set
= get_initial_solution (data
, originalp
);
6078 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6079 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6083 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6085 fprintf (dump_file
, "Initial set of candidates:\n");
6086 iv_ca_dump (data
, dump_file
, set
);
6089 while (try_improve_iv_set (data
, set
))
6091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6093 fprintf (dump_file
, "Improved to:\n");
6094 iv_ca_dump (data
, dump_file
, set
);
6101 static struct iv_ca
*
6102 find_optimal_iv_set (struct ivopts_data
*data
)
6105 struct iv_ca
*set
, *origset
;
6107 comp_cost cost
, origcost
;
6109 /* Determine the cost based on a strategy that starts with original IVs,
6110 and try again using a strategy that prefers candidates not based
6112 origset
= find_optimal_iv_set_1 (data
, true);
6113 set
= find_optimal_iv_set_1 (data
, false);
6115 if (!origset
&& !set
)
6118 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6119 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6123 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6124 origcost
.cost
, origcost
.complexity
);
6125 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6126 cost
.cost
, cost
.complexity
);
6129 /* Choose the one with the best cost. */
6130 if (compare_costs (origcost
, cost
) <= 0)
6137 iv_ca_free (&origset
);
6139 for (i
= 0; i
< n_iv_uses (data
); i
++)
6141 use
= iv_use (data
, i
);
6142 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6148 /* Creates a new induction variable corresponding to CAND. */
6151 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6153 gimple_stmt_iterator incr_pos
;
6163 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6167 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6175 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6179 /* Mark that the iv is preserved. */
6180 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6181 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6183 /* Rewrite the increment so that it uses var_before directly. */
6184 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6188 gimple_add_tmp_var (cand
->var_before
);
6190 base
= unshare_expr (cand
->iv
->base
);
6192 create_iv (base
, unshare_expr (cand
->iv
->step
),
6193 cand
->var_before
, data
->current_loop
,
6194 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6197 /* Creates new induction variables described in SET. */
6200 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6203 struct iv_cand
*cand
;
6206 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6208 cand
= iv_cand (data
, i
);
6209 create_new_iv (data
, cand
);
6212 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6214 fprintf (dump_file
, "\nSelected IV set: \n");
6215 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6217 cand
= iv_cand (data
, i
);
6218 dump_cand (dump_file
, cand
);
6220 fprintf (dump_file
, "\n");
6224 /* Rewrites USE (definition of iv used in a nonlinear expression)
6225 using candidate CAND. */
6228 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6229 struct iv_use
*use
, struct iv_cand
*cand
)
6234 gimple_stmt_iterator bsi
;
6236 /* An important special case -- if we are asked to express value of
6237 the original iv by itself, just exit; there is no need to
6238 introduce a new computation (that might also need casting the
6239 variable to unsigned and back). */
6240 if (cand
->pos
== IP_ORIGINAL
6241 && cand
->incremented_at
== use
->stmt
)
6243 enum tree_code stmt_code
;
6245 gcc_assert (is_gimple_assign (use
->stmt
));
6246 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6248 /* Check whether we may leave the computation unchanged.
6249 This is the case only if it does not rely on other
6250 computations in the loop -- otherwise, the computation
6251 we rely upon may be removed in remove_unused_ivs,
6252 thus leading to ICE. */
6253 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6254 if (stmt_code
== PLUS_EXPR
6255 || stmt_code
== MINUS_EXPR
6256 || stmt_code
== POINTER_PLUS_EXPR
)
6258 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6259 op
= gimple_assign_rhs2 (use
->stmt
);
6260 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6261 op
= gimple_assign_rhs1 (use
->stmt
);
6268 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6272 comp
= get_computation (data
->current_loop
, use
, cand
);
6273 gcc_assert (comp
!= NULL_TREE
);
6275 switch (gimple_code (use
->stmt
))
6278 tgt
= PHI_RESULT (use
->stmt
);
6280 /* If we should keep the biv, do not replace it. */
6281 if (name_info (data
, tgt
)->preserve_biv
)
6284 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6288 tgt
= gimple_assign_lhs (use
->stmt
);
6289 bsi
= gsi_for_stmt (use
->stmt
);
6296 if (!valid_gimple_rhs_p (comp
)
6297 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6298 /* We can't allow re-allocating the stmt as it might be pointed
6300 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6301 >= gimple_num_ops (gsi_stmt (bsi
)))))
6303 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6304 true, GSI_SAME_STMT
);
6305 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6307 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6308 /* As this isn't a plain copy we have to reset alignment
6310 if (SSA_NAME_PTR_INFO (comp
))
6311 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6315 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6317 ass
= gimple_build_assign (tgt
, comp
);
6318 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6320 bsi
= gsi_for_stmt (use
->stmt
);
6321 remove_phi_node (&bsi
, false);
6325 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6326 use
->stmt
= gsi_stmt (bsi
);
6330 /* Performs a peephole optimization to reorder the iv update statement with
6331 a mem ref to enable instruction combining in later phases. The mem ref uses
6332 the iv value before the update, so the reordering transformation requires
6333 adjustment of the offset. CAND is the selected IV_CAND.
6337 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6345 directly propagating t over to (1) will introduce overlapping live range
6346 thus increase register pressure. This peephole transform it into:
6350 t = MEM_REF (base, iv2, 8, 8);
6357 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6360 gimple iv_update
, stmt
;
6362 gimple_stmt_iterator gsi
, gsi_iv
;
6364 if (cand
->pos
!= IP_NORMAL
)
6367 var_after
= cand
->var_after
;
6368 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6370 bb
= gimple_bb (iv_update
);
6371 gsi
= gsi_last_nondebug_bb (bb
);
6372 stmt
= gsi_stmt (gsi
);
6374 /* Only handle conditional statement for now. */
6375 if (gimple_code (stmt
) != GIMPLE_COND
)
6378 gsi_prev_nondebug (&gsi
);
6379 stmt
= gsi_stmt (gsi
);
6380 if (stmt
!= iv_update
)
6383 gsi_prev_nondebug (&gsi
);
6384 if (gsi_end_p (gsi
))
6387 stmt
= gsi_stmt (gsi
);
6388 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6391 if (stmt
!= use
->stmt
)
6394 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6397 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6399 fprintf (dump_file
, "Reordering \n");
6400 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6401 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6402 fprintf (dump_file
, "\n");
6405 gsi
= gsi_for_stmt (use
->stmt
);
6406 gsi_iv
= gsi_for_stmt (iv_update
);
6407 gsi_move_before (&gsi_iv
, &gsi
);
6409 cand
->pos
= IP_BEFORE_USE
;
6410 cand
->incremented_at
= use
->stmt
;
6413 /* Rewrites USE (address that is an iv) using candidate CAND. */
6416 rewrite_use_address (struct ivopts_data
*data
,
6417 struct iv_use
*use
, struct iv_cand
*cand
)
6420 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6421 tree base_hint
= NULL_TREE
;
6425 adjust_iv_update_pos (cand
, use
);
6426 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6428 unshare_aff_combination (&aff
);
6430 /* To avoid undefined overflow problems, all IV candidates use unsigned
6431 integer types. The drawback is that this makes it impossible for
6432 create_mem_ref to distinguish an IV that is based on a memory object
6433 from one that represents simply an offset.
6435 To work around this problem, we pass a hint to create_mem_ref that
6436 indicates which variable (if any) in aff is an IV based on a memory
6437 object. Note that we only consider the candidate. If this is not
6438 based on an object, the base of the reference is in some subexpression
6439 of the use -- but these will use pointer types, so they are recognized
6440 by the create_mem_ref heuristics anyway. */
6441 if (cand
->iv
->base_object
)
6442 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6444 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6445 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6446 reference_alias_ptr_type (*use
->op_p
),
6447 iv
, base_hint
, data
->speed
);
6448 copy_ref_info (ref
, *use
->op_p
);
6452 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6456 rewrite_use_compare (struct ivopts_data
*data
,
6457 struct iv_use
*use
, struct iv_cand
*cand
)
6459 tree comp
, *var_p
, op
, bound
;
6460 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6461 enum tree_code compare
;
6462 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6468 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6469 tree var_type
= TREE_TYPE (var
);
6472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6474 fprintf (dump_file
, "Replacing exit test: ");
6475 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6478 bound
= unshare_expr (fold_convert (var_type
, bound
));
6479 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6481 gsi_insert_seq_on_edge_immediate (
6482 loop_preheader_edge (data
->current_loop
),
6485 gimple_cond_set_lhs (use
->stmt
, var
);
6486 gimple_cond_set_code (use
->stmt
, compare
);
6487 gimple_cond_set_rhs (use
->stmt
, op
);
6491 /* The induction variable elimination failed; just express the original
6493 comp
= get_computation (data
->current_loop
, use
, cand
);
6494 gcc_assert (comp
!= NULL_TREE
);
6496 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6499 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6500 true, GSI_SAME_STMT
);
6503 /* Rewrites USE using candidate CAND. */
6506 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6510 case USE_NONLINEAR_EXPR
:
6511 rewrite_use_nonlinear_expr (data
, use
, cand
);
6515 rewrite_use_address (data
, use
, cand
);
6519 rewrite_use_compare (data
, use
, cand
);
6526 update_stmt (use
->stmt
);
6529 /* Rewrite the uses using the selected induction variables. */
6532 rewrite_uses (struct ivopts_data
*data
)
6535 struct iv_cand
*cand
;
6538 for (i
= 0; i
< n_iv_uses (data
); i
++)
6540 use
= iv_use (data
, i
);
6541 cand
= use
->selected
;
6544 rewrite_use (data
, use
, cand
);
6548 /* Removes the ivs that are not used after rewriting. */
6551 remove_unused_ivs (struct ivopts_data
*data
)
6555 bitmap toremove
= BITMAP_ALLOC (NULL
);
6557 /* Figure out an order in which to release SSA DEFs so that we don't
6558 release something that we'd have to propagate into a debug stmt
6560 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6562 struct version_info
*info
;
6564 info
= ver_info (data
, j
);
6566 && !integer_zerop (info
->iv
->step
)
6568 && !info
->iv
->have_use_for
6569 && !info
->preserve_biv
)
6571 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6573 tree def
= info
->iv
->ssa_name
;
6575 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6577 imm_use_iterator imm_iter
;
6578 use_operand_p use_p
;
6582 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6584 if (!gimple_debug_bind_p (stmt
))
6587 /* We just want to determine whether to do nothing
6588 (count == 0), to substitute the computed
6589 expression into a single use of the SSA DEF by
6590 itself (count == 1), or to use a debug temp
6591 because the SSA DEF is used multiple times or as
6592 part of a larger expression (count > 1). */
6594 if (gimple_debug_bind_get_value (stmt
) != def
)
6598 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6604 struct iv_use dummy_use
;
6605 struct iv_cand
*best_cand
= NULL
, *cand
;
6606 unsigned i
, best_pref
= 0, cand_pref
;
6608 memset (&dummy_use
, 0, sizeof (dummy_use
));
6609 dummy_use
.iv
= info
->iv
;
6610 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6612 cand
= iv_use (data
, i
)->selected
;
6613 if (cand
== best_cand
)
6615 cand_pref
= operand_equal_p (cand
->iv
->step
,
6619 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6620 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6623 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6625 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6628 best_pref
= cand_pref
;
6635 tree comp
= get_computation_at (data
->current_loop
,
6636 &dummy_use
, best_cand
,
6637 SSA_NAME_DEF_STMT (def
));
6643 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6644 DECL_ARTIFICIAL (vexpr
) = 1;
6645 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6646 if (SSA_NAME_VAR (def
))
6647 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6649 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6650 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6651 gimple_stmt_iterator gsi
;
6653 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6654 gsi
= gsi_after_labels (gimple_bb
6655 (SSA_NAME_DEF_STMT (def
)));
6657 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6659 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6663 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6665 if (!gimple_debug_bind_p (stmt
))
6668 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6669 SET_USE (use_p
, comp
);
6677 release_defs_bitset (toremove
);
6679 BITMAP_FREE (toremove
);
6682 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6683 for hash_map::traverse. */
6686 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6692 /* Frees data allocated by the optimization of a single loop. */
6695 free_loop_data (struct ivopts_data
*data
)
6703 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6704 delete data
->niters
;
6705 data
->niters
= NULL
;
6708 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6710 struct version_info
*info
;
6712 info
= ver_info (data
, i
);
6715 info
->has_nonlin_use
= false;
6716 info
->preserve_biv
= false;
6719 bitmap_clear (data
->relevant
);
6720 bitmap_clear (data
->important_candidates
);
6722 for (i
= 0; i
< n_iv_uses (data
); i
++)
6724 struct iv_use
*use
= iv_use (data
, i
);
6727 BITMAP_FREE (use
->related_cands
);
6728 for (j
= 0; j
< use
->n_map_members
; j
++)
6729 if (use
->cost_map
[j
].depends_on
)
6730 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6731 free (use
->cost_map
);
6734 data
->iv_uses
.truncate (0);
6736 for (i
= 0; i
< n_iv_cands (data
); i
++)
6738 struct iv_cand
*cand
= iv_cand (data
, i
);
6741 if (cand
->depends_on
)
6742 BITMAP_FREE (cand
->depends_on
);
6745 data
->iv_candidates
.truncate (0);
6747 if (data
->version_info_size
< num_ssa_names
)
6749 data
->version_info_size
= 2 * num_ssa_names
;
6750 free (data
->version_info
);
6751 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6754 data
->max_inv_id
= 0;
6756 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6757 SET_DECL_RTL (obj
, NULL_RTX
);
6759 decl_rtl_to_reset
.truncate (0);
6761 data
->inv_expr_tab
->empty ();
6762 data
->inv_expr_id
= 0;
6765 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6769 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6771 free_loop_data (data
);
6772 free (data
->version_info
);
6773 BITMAP_FREE (data
->relevant
);
6774 BITMAP_FREE (data
->important_candidates
);
6776 decl_rtl_to_reset
.release ();
6777 data
->iv_uses
.release ();
6778 data
->iv_candidates
.release ();
6779 delete data
->inv_expr_tab
;
6780 data
->inv_expr_tab
= NULL
;
6781 free_affine_expand_cache (&data
->name_expansion_cache
);
6784 /* Returns true if the loop body BODY includes any function calls. */
6787 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6789 gimple_stmt_iterator gsi
;
6792 for (i
= 0; i
< num_nodes
; i
++)
6793 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6795 gimple stmt
= gsi_stmt (gsi
);
6796 if (is_gimple_call (stmt
)
6797 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6803 /* Optimizes the LOOP. Returns true if anything changed. */
6806 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6808 bool changed
= false;
6809 struct iv_ca
*iv_ca
;
6810 edge exit
= single_dom_exit (loop
);
6813 gcc_assert (!data
->niters
);
6814 data
->current_loop
= loop
;
6815 data
->speed
= optimize_loop_for_speed_p (loop
);
6817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6819 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6823 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6824 exit
->src
->index
, exit
->dest
->index
);
6825 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6826 fprintf (dump_file
, "\n");
6829 fprintf (dump_file
, "\n");
6832 body
= get_loop_body (loop
);
6833 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6834 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6837 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6839 /* For each ssa name determines whether it behaves as an induction variable
6841 if (!find_induction_variables (data
))
6844 /* Finds interesting uses (item 1). */
6845 find_interesting_uses (data
);
6846 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6849 /* Finds candidates for the induction variables (item 2). */
6850 find_iv_candidates (data
);
6852 /* Calculates the costs (item 3, part 1). */
6853 determine_iv_costs (data
);
6854 determine_use_iv_costs (data
);
6855 determine_set_costs (data
);
6857 /* Find the optimal set of induction variables (item 3, part 2). */
6858 iv_ca
= find_optimal_iv_set (data
);
6863 /* Create the new induction variables (item 4, part 1). */
6864 create_new_ivs (data
, iv_ca
);
6865 iv_ca_free (&iv_ca
);
6867 /* Rewrite the uses (item 4, part 2). */
6868 rewrite_uses (data
);
6870 /* Remove the ivs that are unused after rewriting. */
6871 remove_unused_ivs (data
);
6873 /* We have changed the structure of induction variables; it might happen
6874 that definitions in the scev database refer to some of them that were
6879 free_loop_data (data
);
6884 /* Main entry point. Optimizes induction variables in loops. */
6887 tree_ssa_iv_optimize (void)
6890 struct ivopts_data data
;
6892 tree_ssa_iv_optimize_init (&data
);
6894 /* Optimize the loops starting with the innermost ones. */
6895 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
6897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6898 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6900 tree_ssa_iv_optimize_loop (&data
, loop
);
6903 tree_ssa_iv_optimize_finalize (&data
);