1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
78 #include "gimple-expr.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 bool biv_p
; /* Is it a biv? */
147 bool have_use_for
; /* Do we already have a use for it? */
148 unsigned use_id
; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
154 tree name
; /* The ssa name. */
155 struct iv
*iv
; /* Induction variable description. */
156 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv
; /* For the original biv, whether to preserve it. */
159 unsigned inv_id
; /* Id of an invariant. */
165 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
166 USE_ADDRESS
, /* Use in an address. */
167 USE_COMPARE
/* Use is a compare. */
170 /* Cost of a computation. */
173 int cost
; /* The runtime cost. */
174 unsigned complexity
; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
180 static const comp_cost no_cost
= {0, 0};
181 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
183 /* The candidate - cost pair. */
186 struct iv_cand
*cand
; /* The candidate. */
187 comp_cost cost
; /* The cost. */
188 bitmap depends_on
; /* The list of invariants that have to be
190 tree value
; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp
; /* For iv elimination, the comparison. */
194 int inv_expr_id
; /* Loop invariant expression id. */
200 unsigned id
; /* The id of the use. */
201 enum use_type type
; /* Type of the use. */
202 struct iv
*iv
; /* The induction variable it is based on. */
203 gimple stmt
; /* Statement in that it occurs. */
204 tree
*op_p
; /* The place where it occurs. */
205 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
208 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
209 struct cost_pair
*cost_map
;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand
*selected
;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
219 IP_NORMAL
, /* At the end, just before the exit condition. */
220 IP_END
, /* At the end of the latch block. */
221 IP_BEFORE_USE
, /* Immediately before a specific use. */
222 IP_AFTER_USE
, /* Immediately after a specific use. */
223 IP_ORIGINAL
/* The original biv. */
226 /* The induction variable candidate. */
229 unsigned id
; /* The number of the candidate. */
230 bool important
; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
233 gimple incremented_at
;/* For original biv, the statement where it is
235 tree var_before
; /* The variable used for it before increment. */
236 tree var_after
; /* The variable used for it after increment. */
237 struct iv
*iv
; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost
; /* Cost of the candidate. */
242 unsigned cost_step
; /* Cost of the candidate's increment operation. */
243 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on
; /* The list of invariants that are used in step of the
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use
*iv_use_p
;
261 typedef struct iv_cand
*iv_cand_p
;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
267 typedef iv_inv_expr_ent value_type
;
268 typedef iv_inv_expr_ent compare_type
;
269 static inline hashval_t
hash (const value_type
*);
270 static inline bool equal (const value_type
*, const compare_type
*);
273 /* Hash function for loop invariant expressions. */
276 iv_inv_expr_hasher::hash (const value_type
*expr
)
281 /* Hash table equality function for expressions. */
284 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
286 return expr1
->hash
== expr2
->hash
287 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
292 /* The currently optimized loop. */
293 struct loop
*current_loop
;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map
<edge
, tree_niter_desc
*> *niters
;
298 /* Number of registers used in it. */
301 /* The size of version_info array allocated. */
302 unsigned version_info_size
;
304 /* The array of information for the ssa names. */
305 struct version_info
*version_info
;
307 /* The hashtable of loop invariant expressions created
309 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
311 /* Loop invariant expression id. */
314 /* The bitmap of indices in version_info whose value was changed. */
317 /* The uses of induction variables. */
318 vec
<iv_use_p
> iv_uses
;
320 /* The candidates. */
321 vec
<iv_cand_p
> iv_candidates
;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates
;
326 /* The maximum invariant id. */
329 /* Whether to consider just related and important candidates when replacing a
331 bool consider_all_candidates
;
333 /* Are we optimizing for speed? */
336 /* Whether the loop body includes any function calls. */
337 bool body_includes_call
;
339 /* Whether the loop body can only be exited via single exit. */
340 bool loop_single_exit_p
;
343 /* An assignment of iv candidates to uses. */
347 /* The number of uses covered by the assignment. */
350 /* Number of uses that cannot be expressed by the candidates in the set. */
353 /* Candidate assigned to a use, together with the related costs. */
354 struct cost_pair
**cand_for_use
;
356 /* Number of times each candidate is used. */
357 unsigned *n_cand_uses
;
359 /* The candidates used. */
362 /* The number of candidates in the set. */
365 /* Total number of registers needed. */
368 /* Total cost of expressing uses. */
369 comp_cost cand_use_cost
;
371 /* Total cost of candidates. */
374 /* Number of times each invariant is used. */
375 unsigned *n_invariant_uses
;
377 /* The array holding the number of uses of each loop
378 invariant expressions created by ivopt. */
379 unsigned *used_inv_expr
;
381 /* The number of created loop invariants. */
382 unsigned num_used_inv_expr
;
384 /* Total cost of the assignment. */
388 /* Difference of two iv candidate assignments. */
395 /* An old assignment (for rollback purposes). */
396 struct cost_pair
*old_cp
;
398 /* A new assignment. */
399 struct cost_pair
*new_cp
;
401 /* Next change in the list. */
402 struct iv_ca_delta
*next_change
;
405 /* Bound on number of candidates below that all candidates are considered. */
407 #define CONSIDER_ALL_CANDIDATES_BOUND \
408 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
411 optimizing such a loop would help, and it would take ages). */
413 #define MAX_CONSIDERED_USES \
414 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
416 /* If there are at most this number of ivs in the set, try removing unnecessary
417 ivs from the set always. */
419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
422 /* The list of trees for that the decl_rtl field must be reset is stored
425 static vec
<tree
> decl_rtl_to_reset
;
427 static comp_cost
force_expr_to_var_cost (tree
, bool);
429 /* Number of uses recorded in DATA. */
431 static inline unsigned
432 n_iv_uses (struct ivopts_data
*data
)
434 return data
->iv_uses
.length ();
437 /* Ith use recorded in DATA. */
439 static inline struct iv_use
*
440 iv_use (struct ivopts_data
*data
, unsigned i
)
442 return data
->iv_uses
[i
];
445 /* Number of candidates recorded in DATA. */
447 static inline unsigned
448 n_iv_cands (struct ivopts_data
*data
)
450 return data
->iv_candidates
.length ();
453 /* Ith candidate recorded in DATA. */
455 static inline struct iv_cand
*
456 iv_cand (struct ivopts_data
*data
, unsigned i
)
458 return data
->iv_candidates
[i
];
461 /* The single loop exit if it dominates the latch, NULL otherwise. */
464 single_dom_exit (struct loop
*loop
)
466 edge exit
= single_exit (loop
);
471 if (!just_once_each_iteration_p (loop
, exit
->src
))
477 /* Dumps information about the induction variable IV to FILE. */
480 dump_iv (FILE *file
, struct iv
*iv
)
484 fprintf (file
, "ssa name ");
485 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
486 fprintf (file
, "\n");
489 fprintf (file
, " type ");
490 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
491 fprintf (file
, "\n");
495 fprintf (file
, " base ");
496 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
497 fprintf (file
, "\n");
499 fprintf (file
, " step ");
500 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
501 fprintf (file
, "\n");
505 fprintf (file
, " invariant ");
506 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
507 fprintf (file
, "\n");
512 fprintf (file
, " base object ");
513 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
514 fprintf (file
, "\n");
518 fprintf (file
, " is a biv\n");
521 /* Dumps information about the USE to FILE. */
524 dump_use (FILE *file
, struct iv_use
*use
)
526 fprintf (file
, "use %d\n", use
->id
);
530 case USE_NONLINEAR_EXPR
:
531 fprintf (file
, " generic\n");
535 fprintf (file
, " address\n");
539 fprintf (file
, " compare\n");
546 fprintf (file
, " in statement ");
547 print_gimple_stmt (file
, use
->stmt
, 0, 0);
548 fprintf (file
, "\n");
550 fprintf (file
, " at position ");
552 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
553 fprintf (file
, "\n");
555 dump_iv (file
, use
->iv
);
557 if (use
->related_cands
)
559 fprintf (file
, " related candidates ");
560 dump_bitmap (file
, use
->related_cands
);
564 /* Dumps information about the uses to FILE. */
567 dump_uses (FILE *file
, struct ivopts_data
*data
)
572 for (i
= 0; i
< n_iv_uses (data
); i
++)
574 use
= iv_use (data
, i
);
576 dump_use (file
, use
);
577 fprintf (file
, "\n");
581 /* Dumps information about induction variable candidate CAND to FILE. */
584 dump_cand (FILE *file
, struct iv_cand
*cand
)
586 struct iv
*iv
= cand
->iv
;
588 fprintf (file
, "candidate %d%s\n",
589 cand
->id
, cand
->important
? " (important)" : "");
591 if (cand
->depends_on
)
593 fprintf (file
, " depends on ");
594 dump_bitmap (file
, cand
->depends_on
);
599 fprintf (file
, " final value replacement\n");
603 if (cand
->var_before
)
605 fprintf (file
, " var_before ");
606 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
607 fprintf (file
, "\n");
611 fprintf (file
, " var_after ");
612 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
613 fprintf (file
, "\n");
619 fprintf (file
, " incremented before exit test\n");
623 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
627 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
631 fprintf (file
, " incremented at end\n");
635 fprintf (file
, " original biv\n");
642 /* Returns the info for ssa version VER. */
644 static inline struct version_info
*
645 ver_info (struct ivopts_data
*data
, unsigned ver
)
647 return data
->version_info
+ ver
;
650 /* Returns the info for ssa name NAME. */
652 static inline struct version_info
*
653 name_info (struct ivopts_data
*data
, tree name
)
655 return ver_info (data
, SSA_NAME_VERSION (name
));
658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
664 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
668 if (sbb
== loop
->latch
)
674 return stmt
== last_stmt (bb
);
677 /* Returns true if STMT if after the place where the original induction
678 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
679 if the positions are identical. */
682 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
684 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
685 basic_block stmt_bb
= gimple_bb (stmt
);
687 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
690 if (stmt_bb
!= cand_bb
)
694 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
696 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
699 /* Returns true if STMT if after the place where the induction variable
700 CAND is incremented in LOOP. */
703 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
711 return stmt_after_ip_normal_pos (loop
, stmt
);
715 return stmt_after_inc_pos (cand
, stmt
, false);
718 return stmt_after_inc_pos (cand
, stmt
, true);
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
728 abnormal_ssa_name_p (tree exp
)
733 if (TREE_CODE (exp
) != SSA_NAME
)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
743 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
744 void *data ATTRIBUTE_UNUSED
)
746 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
750 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
754 return !abnormal_ssa_name_p (*index
);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
761 contains_abnormal_ssa_name_p (tree expr
)
764 enum tree_code_class codeclass
;
769 code
= TREE_CODE (expr
);
770 codeclass
= TREE_CODE_CLASS (code
);
772 if (code
== SSA_NAME
)
773 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
775 if (code
== INTEGER_CST
776 || is_gimple_min_invariant (expr
))
779 if (code
== ADDR_EXPR
)
780 return !for_each_index (&TREE_OPERAND (expr
, 0),
781 idx_contains_abnormal_ssa_name_p
,
784 if (code
== COND_EXPR
)
785 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
787 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
798 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
810 /* Returns the structure describing number of iterations determined from
811 EXIT of DATA->current_loop, or NULL if something goes wrong. */
813 static struct tree_niter_desc
*
814 niter_for_exit (struct ivopts_data
*data
, edge exit
)
816 struct tree_niter_desc
*desc
;
817 tree_niter_desc
**slot
;
821 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
825 slot
= data
->niters
->get (exit
);
829 /* Try to determine number of iterations. We cannot safely work with ssa
830 names that appear in phi nodes on abnormal edges, so that we do not
831 create overlapping life ranges for them (PR 27283). */
832 desc
= XNEW (struct tree_niter_desc
);
833 if (!number_of_iterations_exit (data
->current_loop
,
835 || contains_abnormal_ssa_name_p (desc
->niter
))
840 data
->niters
->put (exit
, desc
);
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
852 static struct tree_niter_desc
*
853 niter_for_single_dom_exit (struct ivopts_data
*data
)
855 edge exit
= single_dom_exit (data
->current_loop
);
860 return niter_for_exit (data
, exit
);
863 /* Initializes data structures used by the iv optimization pass, stored
867 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
869 data
->version_info_size
= 2 * num_ssa_names
;
870 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
871 data
->relevant
= BITMAP_ALLOC (NULL
);
872 data
->important_candidates
= BITMAP_ALLOC (NULL
);
873 data
->max_inv_id
= 0;
875 data
->iv_uses
.create (20);
876 data
->iv_candidates
.create (20);
877 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
878 data
->inv_expr_id
= 0;
879 decl_rtl_to_reset
.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
886 determine_base_object (tree expr
)
888 enum tree_code code
= TREE_CODE (expr
);
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr
))
895 return determine_base_object (TREE_OPERAND (expr
, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
906 obj
= TREE_OPERAND (expr
, 0);
907 base
= get_base_address (obj
);
912 if (TREE_CODE (base
) == MEM_REF
)
913 return determine_base_object (TREE_OPERAND (base
, 0));
915 return fold_convert (ptr_type_node
,
916 build_fold_addr_expr (base
));
918 case POINTER_PLUS_EXPR
:
919 return determine_base_object (TREE_OPERAND (expr
, 0));
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
927 return fold_convert (ptr_type_node
, expr
);
931 /* Return true if address expression with non-DECL_P operand appears
935 contain_complex_addr_expr (tree expr
)
940 switch (TREE_CODE (expr
))
942 case POINTER_PLUS_EXPR
:
945 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
946 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
950 return (!DECL_P (TREE_OPERAND (expr
, 0)));
959 /* Allocates an induction variable with given initial value BASE and step STEP
963 alloc_iv (tree base
, tree step
)
966 struct iv
*iv
= XCNEW (struct iv
);
967 gcc_assert (step
!= NULL_TREE
);
969 /* Lower address expression in base except ones with DECL_P as operand.
971 1) More accurate cost can be computed for address expressions;
972 2) Duplicate candidates won't be created for bases in different
973 forms, like &a[0] and &a. */
975 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
976 || contain_complex_addr_expr (expr
))
979 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
980 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
984 iv
->base_object
= determine_base_object (base
);
987 iv
->have_use_for
= false;
989 iv
->ssa_name
= NULL_TREE
;
994 /* Sets STEP and BASE for induction variable IV. */
997 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
999 struct version_info
*info
= name_info (data
, iv
);
1001 gcc_assert (!info
->iv
);
1003 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1004 info
->iv
= alloc_iv (base
, step
);
1005 info
->iv
->ssa_name
= iv
;
1008 /* Finds induction variable declaration for VAR. */
1011 get_iv (struct ivopts_data
*data
, tree var
)
1014 tree type
= TREE_TYPE (var
);
1016 if (!POINTER_TYPE_P (type
)
1017 && !INTEGRAL_TYPE_P (type
))
1020 if (!name_info (data
, var
)->iv
)
1022 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1025 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1026 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1029 return name_info (data
, var
)->iv
;
1032 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1033 not define a simple affine biv with nonzero step. */
1036 determine_biv_step (gimple phi
)
1038 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1039 tree name
= PHI_RESULT (phi
);
1042 if (virtual_operand_p (name
))
1045 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1048 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1051 /* Finds basic ivs. */
1054 find_bivs (struct ivopts_data
*data
)
1057 tree step
, type
, base
;
1059 struct loop
*loop
= data
->current_loop
;
1060 gimple_stmt_iterator psi
;
1062 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1064 phi
= gsi_stmt (psi
);
1066 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1069 step
= determine_biv_step (phi
);
1073 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1074 base
= expand_simple_operations (base
);
1075 if (contains_abnormal_ssa_name_p (base
)
1076 || contains_abnormal_ssa_name_p (step
))
1079 type
= TREE_TYPE (PHI_RESULT (phi
));
1080 base
= fold_convert (type
, base
);
1083 if (POINTER_TYPE_P (type
))
1084 step
= convert_to_ptrofftype (step
);
1086 step
= fold_convert (type
, step
);
1089 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1096 /* Marks basic ivs. */
1099 mark_bivs (struct ivopts_data
*data
)
1103 struct iv
*iv
, *incr_iv
;
1104 struct loop
*loop
= data
->current_loop
;
1105 basic_block incr_bb
;
1106 gimple_stmt_iterator psi
;
1108 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1110 phi
= gsi_stmt (psi
);
1112 iv
= get_iv (data
, PHI_RESULT (phi
));
1116 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1117 def
= SSA_NAME_DEF_STMT (var
);
1118 /* Don't mark iv peeled from other one as biv. */
1120 && gimple_code (def
) == GIMPLE_PHI
1121 && gimple_bb (def
) == loop
->header
)
1124 incr_iv
= get_iv (data
, var
);
1128 /* If the increment is in the subloop, ignore it. */
1129 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1130 if (incr_bb
->loop_father
!= data
->current_loop
1131 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1135 incr_iv
->biv_p
= true;
1139 /* Checks whether STMT defines a linear induction variable and stores its
1140 parameters to IV. */
1143 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1146 struct loop
*loop
= data
->current_loop
;
1148 iv
->base
= NULL_TREE
;
1149 iv
->step
= NULL_TREE
;
1151 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1154 lhs
= gimple_assign_lhs (stmt
);
1155 if (TREE_CODE (lhs
) != SSA_NAME
)
1158 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1160 iv
->base
= expand_simple_operations (iv
->base
);
1162 if (contains_abnormal_ssa_name_p (iv
->base
)
1163 || contains_abnormal_ssa_name_p (iv
->step
))
1166 /* If STMT could throw, then do not consider STMT as defining a GIV.
1167 While this will suppress optimizations, we can not safely delete this
1168 GIV and associated statements, even if it appears it is not used. */
1169 if (stmt_could_throw_p (stmt
))
1175 /* Finds general ivs in statement STMT. */
1178 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1182 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1185 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1188 /* Finds general ivs in basic block BB. */
1191 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1193 gimple_stmt_iterator bsi
;
1195 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1196 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1199 /* Finds general ivs. */
1202 find_givs (struct ivopts_data
*data
)
1204 struct loop
*loop
= data
->current_loop
;
1205 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1208 for (i
= 0; i
< loop
->num_nodes
; i
++)
1209 find_givs_in_bb (data
, body
[i
]);
1213 /* For each ssa name defined in LOOP determines whether it is an induction
1214 variable and if so, its initial value and step. */
1217 find_induction_variables (struct ivopts_data
*data
)
1222 if (!find_bivs (data
))
1228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1230 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1234 fprintf (dump_file
, " number of iterations ");
1235 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1236 if (!integer_zerop (niter
->may_be_zero
))
1238 fprintf (dump_file
, "; zero if ");
1239 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1241 fprintf (dump_file
, "\n\n");
1244 fprintf (dump_file
, "Induction variables:\n\n");
1246 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1248 if (ver_info (data
, i
)->iv
)
1249 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1256 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1258 static struct iv_use
*
1259 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1260 gimple stmt
, enum use_type use_type
)
1262 struct iv_use
*use
= XCNEW (struct iv_use
);
1264 use
->id
= n_iv_uses (data
);
1265 use
->type
= use_type
;
1269 use
->related_cands
= BITMAP_ALLOC (NULL
);
1271 /* To avoid showing ssa name in the dumps, if it was not reset by the
1273 iv
->ssa_name
= NULL_TREE
;
1275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1276 dump_use (dump_file
, use
);
1278 data
->iv_uses
.safe_push (use
);
1283 /* Checks whether OP is a loop-level invariant and if so, records it.
1284 NONLINEAR_USE is true if the invariant is used in a way we do not
1285 handle specially. */
1288 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1291 struct version_info
*info
;
1293 if (TREE_CODE (op
) != SSA_NAME
1294 || virtual_operand_p (op
))
1297 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1299 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1302 info
= name_info (data
, op
);
1304 info
->has_nonlin_use
|= nonlinear_use
;
1306 info
->inv_id
= ++data
->max_inv_id
;
1307 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1310 /* Checks whether the use OP is interesting and if so, records it. */
1312 static struct iv_use
*
1313 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1320 if (TREE_CODE (op
) != SSA_NAME
)
1323 iv
= get_iv (data
, op
);
1327 if (iv
->have_use_for
)
1329 use
= iv_use (data
, iv
->use_id
);
1331 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1335 if (integer_zerop (iv
->step
))
1337 record_invariant (data
, op
, true);
1340 iv
->have_use_for
= true;
1342 civ
= XNEW (struct iv
);
1345 stmt
= SSA_NAME_DEF_STMT (op
);
1346 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1347 || is_gimple_assign (stmt
));
1349 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1350 iv
->use_id
= use
->id
;
1355 /* Given a condition in statement STMT, checks whether it is a compare
1356 of an induction variable and an invariant. If this is the case,
1357 CONTROL_VAR is set to location of the iv, BOUND to the location of
1358 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1359 induction variable descriptions, and true is returned. If this is not
1360 the case, CONTROL_VAR and BOUND are set to the arguments of the
1361 condition and false is returned. */
1364 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1365 tree
**control_var
, tree
**bound
,
1366 struct iv
**iv_var
, struct iv
**iv_bound
)
1368 /* The objects returned when COND has constant operands. */
1369 static struct iv const_iv
;
1371 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1372 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1375 if (gimple_code (stmt
) == GIMPLE_COND
)
1377 op0
= gimple_cond_lhs_ptr (stmt
);
1378 op1
= gimple_cond_rhs_ptr (stmt
);
1382 op0
= gimple_assign_rhs1_ptr (stmt
);
1383 op1
= gimple_assign_rhs2_ptr (stmt
);
1386 zero
= integer_zero_node
;
1387 const_iv
.step
= integer_zero_node
;
1389 if (TREE_CODE (*op0
) == SSA_NAME
)
1390 iv0
= get_iv (data
, *op0
);
1391 if (TREE_CODE (*op1
) == SSA_NAME
)
1392 iv1
= get_iv (data
, *op1
);
1394 /* Exactly one of the compared values must be an iv, and the other one must
1399 if (integer_zerop (iv0
->step
))
1401 /* Control variable may be on the other side. */
1402 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1403 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1405 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1409 *control_var
= op0
;;
1420 /* Checks whether the condition in STMT is interesting and if so,
1424 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1426 tree
*var_p
, *bound_p
;
1427 struct iv
*var_iv
, *civ
;
1429 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1431 find_interesting_uses_op (data
, *var_p
);
1432 find_interesting_uses_op (data
, *bound_p
);
1436 civ
= XNEW (struct iv
);
1438 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1441 /* Returns the outermost loop EXPR is obviously invariant in
1442 relative to the loop LOOP, i.e. if all its operands are defined
1443 outside of the returned loop. Returns NULL if EXPR is not
1444 even obviously invariant in LOOP. */
1447 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1452 if (is_gimple_min_invariant (expr
))
1453 return current_loops
->tree_root
;
1455 if (TREE_CODE (expr
) == SSA_NAME
)
1457 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1460 if (flow_bb_inside_loop_p (loop
, def_bb
))
1462 return superloop_at_depth (loop
,
1463 loop_depth (def_bb
->loop_father
) + 1);
1466 return current_loops
->tree_root
;
1472 unsigned maxdepth
= 0;
1473 len
= TREE_OPERAND_LENGTH (expr
);
1474 for (i
= 0; i
< len
; i
++)
1476 struct loop
*ivloop
;
1477 if (!TREE_OPERAND (expr
, i
))
1480 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1483 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1486 return superloop_at_depth (loop
, maxdepth
);
1489 /* Returns true if expression EXPR is obviously invariant in LOOP,
1490 i.e. if all its operands are defined outside of the LOOP. LOOP
1491 should not be the function body. */
1494 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1499 gcc_assert (loop_depth (loop
) > 0);
1501 if (is_gimple_min_invariant (expr
))
1504 if (TREE_CODE (expr
) == SSA_NAME
)
1506 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1508 && flow_bb_inside_loop_p (loop
, def_bb
))
1517 len
= TREE_OPERAND_LENGTH (expr
);
1518 for (i
= 0; i
< len
; i
++)
1519 if (TREE_OPERAND (expr
, i
)
1520 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1526 /* Cumulates the steps of indices into DATA and replaces their values with the
1527 initial ones. Returns false when the value of the index cannot be determined.
1528 Callback for for_each_index. */
1530 struct ifs_ivopts_data
1532 struct ivopts_data
*ivopts_data
;
1538 idx_find_step (tree base
, tree
*idx
, void *data
)
1540 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1542 tree step
, iv_base
, iv_step
, lbound
, off
;
1543 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1545 /* If base is a component ref, require that the offset of the reference
1547 if (TREE_CODE (base
) == COMPONENT_REF
)
1549 off
= component_ref_field_offset (base
);
1550 return expr_invariant_in_loop_p (loop
, off
);
1553 /* If base is array, first check whether we will be able to move the
1554 reference out of the loop (in order to take its address in strength
1555 reduction). In order for this to work we need both lower bound
1556 and step to be loop invariants. */
1557 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1559 /* Moreover, for a range, the size needs to be invariant as well. */
1560 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1561 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1564 step
= array_ref_element_size (base
);
1565 lbound
= array_ref_low_bound (base
);
1567 if (!expr_invariant_in_loop_p (loop
, step
)
1568 || !expr_invariant_in_loop_p (loop
, lbound
))
1572 if (TREE_CODE (*idx
) != SSA_NAME
)
1575 iv
= get_iv (dta
->ivopts_data
, *idx
);
1579 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1580 *&x[0], which is not folded and does not trigger the
1581 ARRAY_REF path below. */
1584 if (integer_zerop (iv
->step
))
1587 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1589 step
= array_ref_element_size (base
);
1591 /* We only handle addresses whose step is an integer constant. */
1592 if (TREE_CODE (step
) != INTEGER_CST
)
1596 /* The step for pointer arithmetics already is 1 byte. */
1597 step
= size_one_node
;
1601 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1602 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1605 /* The index might wrap. */
1609 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1610 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1615 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1616 object is passed to it in DATA. */
1619 idx_record_use (tree base
, tree
*idx
,
1622 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1623 find_interesting_uses_op (data
, *idx
);
1624 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1626 find_interesting_uses_op (data
, array_ref_element_size (base
));
1627 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1632 /* If we can prove that TOP = cst * BOT for some constant cst,
1633 store cst to MUL and return true. Otherwise return false.
1634 The returned value is always sign-extended, regardless of the
1635 signedness of TOP and BOT. */
1638 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1641 enum tree_code code
;
1642 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1643 widest_int res
, p0
, p1
;
1648 if (operand_equal_p (top
, bot
, 0))
1654 code
= TREE_CODE (top
);
1658 mby
= TREE_OPERAND (top
, 1);
1659 if (TREE_CODE (mby
) != INTEGER_CST
)
1662 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1665 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1670 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1671 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1674 if (code
== MINUS_EXPR
)
1676 *mul
= wi::sext (p0
+ p1
, precision
);
1680 if (TREE_CODE (bot
) != INTEGER_CST
)
1683 p0
= widest_int::from (top
, SIGNED
);
1684 p1
= widest_int::from (bot
, SIGNED
);
1687 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1695 /* Return true if memory reference REF with step STEP may be unaligned. */
1698 may_be_unaligned_p (tree ref
, tree step
)
1700 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1701 thus they are not misaligned. */
1702 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1705 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1706 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1707 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1709 unsigned HOST_WIDE_INT bitpos
;
1710 unsigned int ref_align
;
1711 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1712 if (ref_align
< align
1713 || (bitpos
% align
) != 0
1714 || (bitpos
% BITS_PER_UNIT
) != 0)
1717 unsigned int trailing_zeros
= tree_ctz (step
);
1718 if (trailing_zeros
< HOST_BITS_PER_INT
1719 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1725 /* Return true if EXPR may be non-addressable. */
1728 may_be_nonaddressable_p (tree expr
)
1730 switch (TREE_CODE (expr
))
1732 case TARGET_MEM_REF
:
1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1734 target, thus they are always addressable. */
1738 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1739 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1741 case VIEW_CONVERT_EXPR
:
1742 /* This kind of view-conversions may wrap non-addressable objects
1743 and make them look addressable. After some processing the
1744 non-addressability may be uncovered again, causing ADDR_EXPRs
1745 of inappropriate objects to be built. */
1746 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1747 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1750 /* ... fall through ... */
1753 case ARRAY_RANGE_REF
:
1754 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1766 /* Finds addresses in *OP_P inside STMT. */
1769 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1771 tree base
= *op_p
, step
= size_zero_node
;
1773 struct ifs_ivopts_data ifs_ivopts_data
;
1775 /* Do not play with volatile memory references. A bit too conservative,
1776 perhaps, but safe. */
1777 if (gimple_has_volatile_ops (stmt
))
1780 /* Ignore bitfields for now. Not really something terribly complicated
1782 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1785 base
= unshare_expr (base
);
1787 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1789 tree type
= build_pointer_type (TREE_TYPE (base
));
1793 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1795 civ
= get_iv (data
, TMR_BASE (base
));
1799 TMR_BASE (base
) = civ
->base
;
1802 if (TMR_INDEX2 (base
)
1803 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1805 civ
= get_iv (data
, TMR_INDEX2 (base
));
1809 TMR_INDEX2 (base
) = civ
->base
;
1812 if (TMR_INDEX (base
)
1813 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1815 civ
= get_iv (data
, TMR_INDEX (base
));
1819 TMR_INDEX (base
) = civ
->base
;
1824 if (TMR_STEP (base
))
1825 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1827 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1831 if (integer_zerop (step
))
1833 base
= tree_mem_ref_addr (type
, base
);
1837 ifs_ivopts_data
.ivopts_data
= data
;
1838 ifs_ivopts_data
.stmt
= stmt
;
1839 ifs_ivopts_data
.step
= size_zero_node
;
1840 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1841 || integer_zerop (ifs_ivopts_data
.step
))
1843 step
= ifs_ivopts_data
.step
;
1845 /* Check that the base expression is addressable. This needs
1846 to be done after substituting bases of IVs into it. */
1847 if (may_be_nonaddressable_p (base
))
1850 /* Moreover, on strict alignment platforms, check that it is
1851 sufficiently aligned. */
1852 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1855 base
= build_fold_addr_expr (base
);
1857 /* Substituting bases of IVs into the base expression might
1858 have caused folding opportunities. */
1859 if (TREE_CODE (base
) == ADDR_EXPR
)
1861 tree
*ref
= &TREE_OPERAND (base
, 0);
1862 while (handled_component_p (*ref
))
1863 ref
= &TREE_OPERAND (*ref
, 0);
1864 if (TREE_CODE (*ref
) == MEM_REF
)
1866 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1867 TREE_OPERAND (*ref
, 0),
1868 TREE_OPERAND (*ref
, 1));
1875 civ
= alloc_iv (base
, step
);
1876 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1880 for_each_index (op_p
, idx_record_use
, data
);
1883 /* Finds and records invariants used in STMT. */
1886 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1889 use_operand_p use_p
;
1892 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1894 op
= USE_FROM_PTR (use_p
);
1895 record_invariant (data
, op
, false);
1899 /* Finds interesting uses of induction variables in the statement STMT. */
1902 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1905 tree op
, *lhs
, *rhs
;
1907 use_operand_p use_p
;
1908 enum tree_code code
;
1910 find_invariants_stmt (data
, stmt
);
1912 if (gimple_code (stmt
) == GIMPLE_COND
)
1914 find_interesting_uses_cond (data
, stmt
);
1918 if (is_gimple_assign (stmt
))
1920 lhs
= gimple_assign_lhs_ptr (stmt
);
1921 rhs
= gimple_assign_rhs1_ptr (stmt
);
1923 if (TREE_CODE (*lhs
) == SSA_NAME
)
1925 /* If the statement defines an induction variable, the uses are not
1926 interesting by themselves. */
1928 iv
= get_iv (data
, *lhs
);
1930 if (iv
&& !integer_zerop (iv
->step
))
1934 code
= gimple_assign_rhs_code (stmt
);
1935 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1936 && (REFERENCE_CLASS_P (*rhs
)
1937 || is_gimple_val (*rhs
)))
1939 if (REFERENCE_CLASS_P (*rhs
))
1940 find_interesting_uses_address (data
, stmt
, rhs
);
1942 find_interesting_uses_op (data
, *rhs
);
1944 if (REFERENCE_CLASS_P (*lhs
))
1945 find_interesting_uses_address (data
, stmt
, lhs
);
1948 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1950 find_interesting_uses_cond (data
, stmt
);
1954 /* TODO -- we should also handle address uses of type
1956 memory = call (whatever);
1963 if (gimple_code (stmt
) == GIMPLE_PHI
1964 && gimple_bb (stmt
) == data
->current_loop
->header
)
1966 iv
= get_iv (data
, PHI_RESULT (stmt
));
1968 if (iv
&& !integer_zerop (iv
->step
))
1972 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1974 op
= USE_FROM_PTR (use_p
);
1976 if (TREE_CODE (op
) != SSA_NAME
)
1979 iv
= get_iv (data
, op
);
1983 find_interesting_uses_op (data
, op
);
1987 /* Finds interesting uses of induction variables outside of loops
1988 on loop exit edge EXIT. */
1991 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1994 gimple_stmt_iterator psi
;
1997 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1999 phi
= gsi_stmt (psi
);
2000 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2001 if (!virtual_operand_p (def
))
2002 find_interesting_uses_op (data
, def
);
2006 /* Finds uses of the induction variables that are interesting. */
2009 find_interesting_uses (struct ivopts_data
*data
)
2012 gimple_stmt_iterator bsi
;
2013 basic_block
*body
= get_loop_body (data
->current_loop
);
2015 struct version_info
*info
;
2018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2019 fprintf (dump_file
, "Uses:\n\n");
2021 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2026 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2027 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2028 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2029 find_interesting_uses_outside (data
, e
);
2031 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2032 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2033 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2034 if (!is_gimple_debug (gsi_stmt (bsi
)))
2035 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2042 fprintf (dump_file
, "\n");
2044 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2046 info
= ver_info (data
, i
);
2049 fprintf (dump_file
, " ");
2050 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2051 fprintf (dump_file
, " is invariant (%d)%s\n",
2052 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2056 fprintf (dump_file
, "\n");
2062 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2063 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2064 we are at the top-level of the processed address. */
2067 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2068 HOST_WIDE_INT
*offset
)
2070 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2071 enum tree_code code
;
2072 tree type
, orig_type
= TREE_TYPE (expr
);
2073 HOST_WIDE_INT off0
, off1
, st
;
2074 tree orig_expr
= expr
;
2078 type
= TREE_TYPE (expr
);
2079 code
= TREE_CODE (expr
);
2085 if (!cst_and_fits_in_hwi (expr
)
2086 || integer_zerop (expr
))
2089 *offset
= int_cst_value (expr
);
2090 return build_int_cst (orig_type
, 0);
2092 case POINTER_PLUS_EXPR
:
2095 op0
= TREE_OPERAND (expr
, 0);
2096 op1
= TREE_OPERAND (expr
, 1);
2098 op0
= strip_offset_1 (op0
, false, false, &off0
);
2099 op1
= strip_offset_1 (op1
, false, false, &off1
);
2101 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2102 if (op0
== TREE_OPERAND (expr
, 0)
2103 && op1
== TREE_OPERAND (expr
, 1))
2106 if (integer_zerop (op1
))
2108 else if (integer_zerop (op0
))
2110 if (code
== MINUS_EXPR
)
2111 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2116 expr
= fold_build2 (code
, type
, op0
, op1
);
2118 return fold_convert (orig_type
, expr
);
2121 op1
= TREE_OPERAND (expr
, 1);
2122 if (!cst_and_fits_in_hwi (op1
))
2125 op0
= TREE_OPERAND (expr
, 0);
2126 op0
= strip_offset_1 (op0
, false, false, &off0
);
2127 if (op0
== TREE_OPERAND (expr
, 0))
2130 *offset
= off0
* int_cst_value (op1
);
2131 if (integer_zerop (op0
))
2134 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2136 return fold_convert (orig_type
, expr
);
2139 case ARRAY_RANGE_REF
:
2143 step
= array_ref_element_size (expr
);
2144 if (!cst_and_fits_in_hwi (step
))
2147 st
= int_cst_value (step
);
2148 op1
= TREE_OPERAND (expr
, 1);
2149 op1
= strip_offset_1 (op1
, false, false, &off1
);
2150 *offset
= off1
* st
;
2153 && integer_zerop (op1
))
2155 /* Strip the component reference completely. */
2156 op0
= TREE_OPERAND (expr
, 0);
2157 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2170 tmp
= component_ref_field_offset (expr
);
2171 field
= TREE_OPERAND (expr
, 1);
2173 && cst_and_fits_in_hwi (tmp
)
2174 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2176 HOST_WIDE_INT boffset
, abs_off
;
2178 /* Strip the component reference completely. */
2179 op0
= TREE_OPERAND (expr
, 0);
2180 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2181 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2182 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2186 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2193 op0
= TREE_OPERAND (expr
, 0);
2194 op0
= strip_offset_1 (op0
, true, true, &off0
);
2197 if (op0
== TREE_OPERAND (expr
, 0))
2200 expr
= build_fold_addr_expr (op0
);
2201 return fold_convert (orig_type
, expr
);
2204 /* ??? Offset operand? */
2205 inside_addr
= false;
2212 /* Default handling of expressions for that we want to recurse into
2213 the first operand. */
2214 op0
= TREE_OPERAND (expr
, 0);
2215 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2218 if (op0
== TREE_OPERAND (expr
, 0)
2219 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2222 expr
= copy_node (expr
);
2223 TREE_OPERAND (expr
, 0) = op0
;
2225 TREE_OPERAND (expr
, 1) = op1
;
2227 /* Inside address, we might strip the top level component references,
2228 thus changing type of the expression. Handling of ADDR_EXPR
2230 expr
= fold_convert (orig_type
, expr
);
2235 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2238 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2241 tree core
= strip_offset_1 (expr
, false, false, &off
);
2246 /* Returns variant of TYPE that can be used as base for different uses.
2247 We return unsigned type with the same precision, which avoids problems
2251 generic_type_for (tree type
)
2253 if (POINTER_TYPE_P (type
))
2254 return unsigned_type_for (type
);
2256 if (TYPE_UNSIGNED (type
))
2259 return unsigned_type_for (type
);
2262 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2263 the bitmap to that we should store it. */
2265 static struct ivopts_data
*fd_ivopts_data
;
2267 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2269 bitmap
*depends_on
= (bitmap
*) data
;
2270 struct version_info
*info
;
2272 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2274 info
= name_info (fd_ivopts_data
, *expr_p
);
2276 if (!info
->inv_id
|| info
->has_nonlin_use
)
2280 *depends_on
= BITMAP_ALLOC (NULL
);
2281 bitmap_set_bit (*depends_on
, info
->inv_id
);
2286 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2287 position to POS. If USE is not NULL, the candidate is set as related to
2288 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2289 replacement of the final value of the iv by a direct computation. */
2291 static struct iv_cand
*
2292 add_candidate_1 (struct ivopts_data
*data
,
2293 tree base
, tree step
, bool important
, enum iv_position pos
,
2294 struct iv_use
*use
, gimple incremented_at
)
2297 struct iv_cand
*cand
= NULL
;
2298 tree type
, orig_type
;
2300 /* For non-original variables, make sure their values are computed in a type
2301 that does not invoke undefined behavior on overflows (since in general,
2302 we cannot prove that these induction variables are non-wrapping). */
2303 if (pos
!= IP_ORIGINAL
)
2305 orig_type
= TREE_TYPE (base
);
2306 type
= generic_type_for (orig_type
);
2307 if (type
!= orig_type
)
2309 base
= fold_convert (type
, base
);
2310 step
= fold_convert (type
, step
);
2314 for (i
= 0; i
< n_iv_cands (data
); i
++)
2316 cand
= iv_cand (data
, i
);
2318 if (cand
->pos
!= pos
)
2321 if (cand
->incremented_at
!= incremented_at
2322 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2323 && cand
->ainc_use
!= use
))
2337 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2338 && operand_equal_p (step
, cand
->iv
->step
, 0)
2339 && (TYPE_PRECISION (TREE_TYPE (base
))
2340 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2344 if (i
== n_iv_cands (data
))
2346 cand
= XCNEW (struct iv_cand
);
2352 cand
->iv
= alloc_iv (base
, step
);
2355 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2357 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2358 cand
->var_after
= cand
->var_before
;
2360 cand
->important
= important
;
2361 cand
->incremented_at
= incremented_at
;
2362 data
->iv_candidates
.safe_push (cand
);
2365 && TREE_CODE (step
) != INTEGER_CST
)
2367 fd_ivopts_data
= data
;
2368 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2371 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2372 cand
->ainc_use
= use
;
2374 cand
->ainc_use
= NULL
;
2376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2377 dump_cand (dump_file
, cand
);
2380 if (important
&& !cand
->important
)
2382 cand
->important
= true;
2383 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2384 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2389 bitmap_set_bit (use
->related_cands
, i
);
2390 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2391 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2398 /* Returns true if incrementing the induction variable at the end of the LOOP
2401 The purpose is to avoid splitting latch edge with a biv increment, thus
2402 creating a jump, possibly confusing other optimization passes and leaving
2403 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2404 is not available (so we do not have a better alternative), or if the latch
2405 edge is already nonempty. */
2408 allow_ip_end_pos_p (struct loop
*loop
)
2410 if (!ip_normal_pos (loop
))
2413 if (!empty_block_p (ip_end_pos (loop
)))
2419 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2420 Important field is set to IMPORTANT. */
2423 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2424 bool important
, struct iv_use
*use
)
2426 basic_block use_bb
= gimple_bb (use
->stmt
);
2427 enum machine_mode mem_mode
;
2428 unsigned HOST_WIDE_INT cstepi
;
2430 /* If we insert the increment in any position other than the standard
2431 ones, we must ensure that it is incremented once per iteration.
2432 It must not be in an inner nested loop, or one side of an if
2434 if (use_bb
->loop_father
!= data
->current_loop
2435 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2436 || stmt_could_throw_p (use
->stmt
)
2437 || !cst_and_fits_in_hwi (step
))
2440 cstepi
= int_cst_value (step
);
2442 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2443 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2444 || USE_STORE_PRE_INCREMENT (mem_mode
))
2445 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2446 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2447 || USE_STORE_PRE_DECREMENT (mem_mode
))
2448 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2450 enum tree_code code
= MINUS_EXPR
;
2452 tree new_step
= step
;
2454 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2456 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2457 code
= POINTER_PLUS_EXPR
;
2460 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2461 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2462 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2465 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2466 || USE_STORE_POST_INCREMENT (mem_mode
))
2467 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2468 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2469 || USE_STORE_POST_DECREMENT (mem_mode
))
2470 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2472 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2477 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2478 position to POS. If USE is not NULL, the candidate is set as related to
2479 it. The candidate computation is scheduled on all available positions. */
2482 add_candidate (struct ivopts_data
*data
,
2483 tree base
, tree step
, bool important
, struct iv_use
*use
)
2485 if (ip_normal_pos (data
->current_loop
))
2486 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2487 if (ip_end_pos (data
->current_loop
)
2488 && allow_ip_end_pos_p (data
->current_loop
))
2489 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2491 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2492 add_autoinc_candidates (data
, base
, step
, important
, use
);
2495 /* Adds standard iv candidates. */
2498 add_standard_iv_candidates (struct ivopts_data
*data
)
2500 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2502 /* The same for a double-integer type if it is still fast enough. */
2504 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2505 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2506 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2507 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2509 /* The same for a double-integer type if it is still fast enough. */
2511 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2512 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2513 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2514 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2518 /* Adds candidates bases on the old induction variable IV. */
2521 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2525 struct iv_cand
*cand
;
2527 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2529 /* The same, but with initial value zero. */
2530 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2531 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2533 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2534 iv
->step
, true, NULL
);
2536 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2537 if (gimple_code (phi
) == GIMPLE_PHI
)
2539 /* Additionally record the possibility of leaving the original iv
2541 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2542 /* Don't add candidate if it's from another PHI node because
2543 it's an affine iv appearing in the form of PEELED_CHREC. */
2544 phi
= SSA_NAME_DEF_STMT (def
);
2545 if (gimple_code (phi
) != GIMPLE_PHI
)
2547 cand
= add_candidate_1 (data
,
2548 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2549 SSA_NAME_DEF_STMT (def
));
2550 cand
->var_before
= iv
->ssa_name
;
2551 cand
->var_after
= def
;
2554 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2558 /* Adds candidates based on the old induction variables. */
2561 add_old_ivs_candidates (struct ivopts_data
*data
)
2567 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2569 iv
= ver_info (data
, i
)->iv
;
2570 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2571 add_old_iv_candidates (data
, iv
);
2575 /* Adds candidates based on the value of the induction variable IV and USE. */
2578 add_iv_value_candidates (struct ivopts_data
*data
,
2579 struct iv
*iv
, struct iv_use
*use
)
2581 unsigned HOST_WIDE_INT offset
;
2585 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2587 /* The same, but with initial value zero. Make such variable important,
2588 since it is generic enough so that possibly many uses may be based
2590 basetype
= TREE_TYPE (iv
->base
);
2591 if (POINTER_TYPE_P (basetype
))
2592 basetype
= sizetype
;
2593 add_candidate (data
, build_int_cst (basetype
, 0),
2594 iv
->step
, true, use
);
2596 /* Third, try removing the constant offset. Make sure to even
2597 add a candidate for &a[0] vs. (T *)&a. */
2598 base
= strip_offset (iv
->base
, &offset
);
2600 || base
!= iv
->base
)
2601 add_candidate (data
, base
, iv
->step
, false, use
);
2604 /* Adds candidates based on the uses. */
2607 add_derived_ivs_candidates (struct ivopts_data
*data
)
2611 for (i
= 0; i
< n_iv_uses (data
); i
++)
2613 struct iv_use
*use
= iv_use (data
, i
);
2620 case USE_NONLINEAR_EXPR
:
2623 /* Just add the ivs based on the value of the iv used here. */
2624 add_iv_value_candidates (data
, use
->iv
, use
);
2633 /* Record important candidates and add them to related_cands bitmaps
2637 record_important_candidates (struct ivopts_data
*data
)
2642 for (i
= 0; i
< n_iv_cands (data
); i
++)
2644 struct iv_cand
*cand
= iv_cand (data
, i
);
2646 if (cand
->important
)
2647 bitmap_set_bit (data
->important_candidates
, i
);
2650 data
->consider_all_candidates
= (n_iv_cands (data
)
2651 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2653 if (data
->consider_all_candidates
)
2655 /* We will not need "related_cands" bitmaps in this case,
2656 so release them to decrease peak memory consumption. */
2657 for (i
= 0; i
< n_iv_uses (data
); i
++)
2659 use
= iv_use (data
, i
);
2660 BITMAP_FREE (use
->related_cands
);
2665 /* Add important candidates to the related_cands bitmaps. */
2666 for (i
= 0; i
< n_iv_uses (data
); i
++)
2667 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2668 data
->important_candidates
);
2672 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2673 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2674 we allocate a simple list to every use. */
2677 alloc_use_cost_map (struct ivopts_data
*data
)
2679 unsigned i
, size
, s
;
2681 for (i
= 0; i
< n_iv_uses (data
); i
++)
2683 struct iv_use
*use
= iv_use (data
, i
);
2685 if (data
->consider_all_candidates
)
2686 size
= n_iv_cands (data
);
2689 s
= bitmap_count_bits (use
->related_cands
);
2691 /* Round up to the power of two, so that moduling by it is fast. */
2692 size
= s
? (1 << ceil_log2 (s
)) : 1;
2695 use
->n_map_members
= size
;
2696 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2700 /* Returns description of computation cost of expression whose runtime
2701 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2704 new_cost (unsigned runtime
, unsigned complexity
)
2708 cost
.cost
= runtime
;
2709 cost
.complexity
= complexity
;
2714 /* Adds costs COST1 and COST2. */
2717 add_costs (comp_cost cost1
, comp_cost cost2
)
2719 cost1
.cost
+= cost2
.cost
;
2720 cost1
.complexity
+= cost2
.complexity
;
2724 /* Subtracts costs COST1 and COST2. */
2727 sub_costs (comp_cost cost1
, comp_cost cost2
)
2729 cost1
.cost
-= cost2
.cost
;
2730 cost1
.complexity
-= cost2
.complexity
;
2735 /* Returns a negative number if COST1 < COST2, a positive number if
2736 COST1 > COST2, and 0 if COST1 = COST2. */
2739 compare_costs (comp_cost cost1
, comp_cost cost2
)
2741 if (cost1
.cost
== cost2
.cost
)
2742 return cost1
.complexity
- cost2
.complexity
;
2744 return cost1
.cost
- cost2
.cost
;
2747 /* Returns true if COST is infinite. */
2750 infinite_cost_p (comp_cost cost
)
2752 return cost
.cost
== INFTY
;
2755 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2756 on invariants DEPENDS_ON and that the value used in expressing it
2757 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2760 set_use_iv_cost (struct ivopts_data
*data
,
2761 struct iv_use
*use
, struct iv_cand
*cand
,
2762 comp_cost cost
, bitmap depends_on
, tree value
,
2763 enum tree_code comp
, int inv_expr_id
)
2767 if (infinite_cost_p (cost
))
2769 BITMAP_FREE (depends_on
);
2773 if (data
->consider_all_candidates
)
2775 use
->cost_map
[cand
->id
].cand
= cand
;
2776 use
->cost_map
[cand
->id
].cost
= cost
;
2777 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2778 use
->cost_map
[cand
->id
].value
= value
;
2779 use
->cost_map
[cand
->id
].comp
= comp
;
2780 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2784 /* n_map_members is a power of two, so this computes modulo. */
2785 s
= cand
->id
& (use
->n_map_members
- 1);
2786 for (i
= s
; i
< use
->n_map_members
; i
++)
2787 if (!use
->cost_map
[i
].cand
)
2789 for (i
= 0; i
< s
; i
++)
2790 if (!use
->cost_map
[i
].cand
)
2796 use
->cost_map
[i
].cand
= cand
;
2797 use
->cost_map
[i
].cost
= cost
;
2798 use
->cost_map
[i
].depends_on
= depends_on
;
2799 use
->cost_map
[i
].value
= value
;
2800 use
->cost_map
[i
].comp
= comp
;
2801 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2804 /* Gets cost of (USE, CANDIDATE) pair. */
2806 static struct cost_pair
*
2807 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2808 struct iv_cand
*cand
)
2811 struct cost_pair
*ret
;
2816 if (data
->consider_all_candidates
)
2818 ret
= use
->cost_map
+ cand
->id
;
2825 /* n_map_members is a power of two, so this computes modulo. */
2826 s
= cand
->id
& (use
->n_map_members
- 1);
2827 for (i
= s
; i
< use
->n_map_members
; i
++)
2828 if (use
->cost_map
[i
].cand
== cand
)
2829 return use
->cost_map
+ i
;
2830 else if (use
->cost_map
[i
].cand
== NULL
)
2832 for (i
= 0; i
< s
; i
++)
2833 if (use
->cost_map
[i
].cand
== cand
)
2834 return use
->cost_map
+ i
;
2835 else if (use
->cost_map
[i
].cand
== NULL
)
2841 /* Returns estimate on cost of computing SEQ. */
2844 seq_cost (rtx seq
, bool speed
)
2849 for (; seq
; seq
= NEXT_INSN (seq
))
2851 set
= single_set (seq
);
2853 cost
+= set_src_cost (SET_SRC (set
), speed
);
2861 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2863 produce_memory_decl_rtl (tree obj
, int *regno
)
2865 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2866 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2870 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2872 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2873 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2874 SET_SYMBOL_REF_DECL (x
, obj
);
2875 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2876 set_mem_addr_space (x
, as
);
2877 targetm
.encode_section_info (obj
, x
, true);
2881 x
= gen_raw_REG (address_mode
, (*regno
)++);
2882 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2883 set_mem_addr_space (x
, as
);
2889 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2890 walk_tree. DATA contains the actual fake register number. */
2893 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2895 tree obj
= NULL_TREE
;
2897 int *regno
= (int *) data
;
2899 switch (TREE_CODE (*expr_p
))
2902 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2903 handled_component_p (*expr_p
);
2904 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2907 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2908 x
= produce_memory_decl_rtl (obj
, regno
);
2913 obj
= SSA_NAME_VAR (*expr_p
);
2914 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2917 if (!DECL_RTL_SET_P (obj
))
2918 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2927 if (DECL_RTL_SET_P (obj
))
2930 if (DECL_MODE (obj
) == BLKmode
)
2931 x
= produce_memory_decl_rtl (obj
, regno
);
2933 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2943 decl_rtl_to_reset
.safe_push (obj
);
2944 SET_DECL_RTL (obj
, x
);
2950 /* Determines cost of the computation of EXPR. */
2953 computation_cost (tree expr
, bool speed
)
2956 tree type
= TREE_TYPE (expr
);
2958 /* Avoid using hard regs in ways which may be unsupported. */
2959 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2960 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2961 enum node_frequency real_frequency
= node
->frequency
;
2963 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2964 crtl
->maybe_hot_insn_p
= speed
;
2965 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2967 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2970 default_rtl_profile ();
2971 node
->frequency
= real_frequency
;
2973 cost
= seq_cost (seq
, speed
);
2975 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2976 TYPE_ADDR_SPACE (type
), speed
);
2977 else if (!REG_P (rslt
))
2978 cost
+= set_src_cost (rslt
, speed
);
2983 /* Returns variable containing the value of candidate CAND at statement AT. */
2986 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2988 if (stmt_after_increment (loop
, cand
, stmt
))
2989 return cand
->var_after
;
2991 return cand
->var_before
;
2994 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2995 same precision that is at least as wide as the precision of TYPE, stores
2996 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3000 determine_common_wider_type (tree
*a
, tree
*b
)
3002 tree wider_type
= NULL
;
3004 tree atype
= TREE_TYPE (*a
);
3006 if (CONVERT_EXPR_P (*a
))
3008 suba
= TREE_OPERAND (*a
, 0);
3009 wider_type
= TREE_TYPE (suba
);
3010 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3016 if (CONVERT_EXPR_P (*b
))
3018 subb
= TREE_OPERAND (*b
, 0);
3019 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The expression is stored in a decomposed
3032 form into AFF. Returns false if USE cannot be expressed using CAND. */
3035 get_computation_aff (struct loop
*loop
,
3036 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3037 struct aff_tree
*aff
)
3039 tree ubase
= use
->iv
->base
;
3040 tree ustep
= use
->iv
->step
;
3041 tree cbase
= cand
->iv
->base
;
3042 tree cstep
= cand
->iv
->step
, cstep_common
;
3043 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3044 tree common_type
, var
;
3046 aff_tree cbase_aff
, var_aff
;
3049 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3051 /* We do not have a precision to express the values of use. */
3055 var
= var_at_stmt (loop
, cand
, at
);
3056 uutype
= unsigned_type_for (utype
);
3058 /* If the conversion is not noop, perform it. */
3059 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3061 cstep
= fold_convert (uutype
, cstep
);
3062 cbase
= fold_convert (uutype
, cbase
);
3063 var
= fold_convert (uutype
, var
);
3066 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3069 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3070 type, we achieve better folding by computing their difference in this
3071 wider type, and cast the result to UUTYPE. We do not need to worry about
3072 overflows, as all the arithmetics will in the end be performed in UUTYPE
3074 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3076 /* use = ubase - ratio * cbase + ratio * var. */
3077 tree_to_aff_combination (ubase
, common_type
, aff
);
3078 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3079 tree_to_aff_combination (var
, uutype
, &var_aff
);
3081 /* We need to shift the value if we are after the increment. */
3082 if (stmt_after_increment (loop
, cand
, at
))
3086 if (common_type
!= uutype
)
3087 cstep_common
= fold_convert (common_type
, cstep
);
3089 cstep_common
= cstep
;
3091 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3092 aff_combination_add (&cbase_aff
, &cstep_aff
);
3095 aff_combination_scale (&cbase_aff
, -rat
);
3096 aff_combination_add (aff
, &cbase_aff
);
3097 if (common_type
!= uutype
)
3098 aff_combination_convert (aff
, uutype
);
3100 aff_combination_scale (&var_aff
, rat
);
3101 aff_combination_add (aff
, &var_aff
);
3106 /* Return the type of USE. */
3109 get_use_type (struct iv_use
*use
)
3111 tree base_type
= TREE_TYPE (use
->iv
->base
);
3114 if (use
->type
== USE_ADDRESS
)
3116 /* The base_type may be a void pointer. Create a pointer type based on
3117 the mem_ref instead. */
3118 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3119 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3120 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3128 /* Determines the expression by that USE is expressed from induction variable
3129 CAND at statement AT in LOOP. The computation is unshared. */
3132 get_computation_at (struct loop
*loop
,
3133 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3136 tree type
= get_use_type (use
);
3138 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3140 unshare_aff_combination (&aff
);
3141 return fold_convert (type
, aff_combination_to_tree (&aff
));
3144 /* Determines the expression by that USE is expressed from induction variable
3145 CAND in LOOP. The computation is unshared. */
3148 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3150 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3153 /* Adjust the cost COST for being in loop setup rather than loop body.
3154 If we're optimizing for space, the loop setup overhead is constant;
3155 if we're optimizing for speed, amortize it over the per-iteration cost. */
3157 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3161 else if (optimize_loop_for_speed_p (data
->current_loop
))
3162 return cost
/ avg_loop_niter (data
->current_loop
);
3167 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3168 validity for a memory reference accessing memory of mode MODE in
3169 address space AS. */
3173 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3176 #define MAX_RATIO 128
3177 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3178 static vec
<sbitmap
> valid_mult_list
;
3181 if (data_index
>= valid_mult_list
.length ())
3182 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3184 valid_mult
= valid_mult_list
[data_index
];
3187 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3188 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3189 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3193 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3194 bitmap_clear (valid_mult
);
3195 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3196 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3197 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3199 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3200 if (memory_address_addr_space_p (mode
, addr
, as
)
3201 || memory_address_addr_space_p (mode
, scaled
, as
))
3202 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3207 fprintf (dump_file
, " allowed multipliers:");
3208 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3209 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3210 fprintf (dump_file
, " %d", (int) i
);
3211 fprintf (dump_file
, "\n");
3212 fprintf (dump_file
, "\n");
3215 valid_mult_list
[data_index
] = valid_mult
;
3218 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3221 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. Compute the cost for a memory reference that accesses
3227 a memory location of mode MEM_MODE in address space AS.
3229 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3230 size of MEM_MODE / RATIO) is available. To make this determination, we
3231 look at the size of the increment to be made, which is given in CSTEP.
3232 CSTEP may be zero if the step is unknown.
3233 STMT_AFTER_INC is true iff the statement we're looking at is after the
3234 increment of the original biv.
3236 TODO -- there must be some better way. This all is quite crude. */
3240 AINC_PRE_INC
, /* Pre increment. */
3241 AINC_PRE_DEC
, /* Pre decrement. */
3242 AINC_POST_INC
, /* Post increment. */
3243 AINC_POST_DEC
, /* Post decrement. */
3244 AINC_NONE
/* Also the number of auto increment types. */
3247 typedef struct address_cost_data_s
3249 HOST_WIDE_INT min_offset
, max_offset
;
3250 unsigned costs
[2][2][2][2];
3251 unsigned ainc_costs
[AINC_NONE
];
3252 } *address_cost_data
;
3256 get_address_cost (bool symbol_present
, bool var_present
,
3257 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3258 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3259 addr_space_t as
, bool speed
,
3260 bool stmt_after_inc
, bool *may_autoinc
)
3262 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3263 static vec
<address_cost_data
> address_cost_data_list
;
3264 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3265 address_cost_data data
;
3266 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3267 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3268 unsigned cost
, acost
, complexity
;
3269 enum ainc_type autoinc_type
;
3270 bool offset_p
, ratio_p
, autoinc
;
3271 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3272 unsigned HOST_WIDE_INT mask
;
3275 if (data_index
>= address_cost_data_list
.length ())
3276 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3278 data
= address_cost_data_list
[data_index
];
3282 HOST_WIDE_INT rat
, off
= 0;
3283 int old_cse_not_expected
, width
;
3284 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3285 rtx seq
, addr
, base
;
3288 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3290 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3292 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3293 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3294 width
= HOST_BITS_PER_WIDE_INT
- 1;
3295 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3297 for (i
= width
; i
>= 0; i
--)
3299 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3300 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3301 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3304 data
->min_offset
= (i
== -1? 0 : off
);
3306 for (i
= width
; i
>= 0; i
--)
3308 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3309 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3310 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3312 /* For some TARGET, like ARM THUMB1, the offset should be nature
3313 aligned. Try an aligned offset if address_mode is not QImode. */
3314 off
= (address_mode
== QImode
)
3316 : ((unsigned HOST_WIDE_INT
) 1 << i
)
3317 - GET_MODE_SIZE (address_mode
);
3320 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3321 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3327 data
->max_offset
= off
;
3329 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3331 fprintf (dump_file
, "get_address_cost:\n");
3332 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3333 GET_MODE_NAME (mem_mode
),
3335 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3336 GET_MODE_NAME (mem_mode
),
3341 for (i
= 2; i
<= MAX_RATIO
; i
++)
3342 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3348 /* Compute the cost of various addressing modes. */
3350 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3351 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3353 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3354 || USE_STORE_PRE_DECREMENT (mem_mode
))
3356 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3357 has_predec
[mem_mode
]
3358 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3360 if (has_predec
[mem_mode
])
3361 data
->ainc_costs
[AINC_PRE_DEC
]
3362 = address_cost (addr
, mem_mode
, as
, speed
);
3364 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3365 || USE_STORE_POST_DECREMENT (mem_mode
))
3367 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3368 has_postdec
[mem_mode
]
3369 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3371 if (has_postdec
[mem_mode
])
3372 data
->ainc_costs
[AINC_POST_DEC
]
3373 = address_cost (addr
, mem_mode
, as
, speed
);
3375 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3376 || USE_STORE_PRE_DECREMENT (mem_mode
))
3378 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3379 has_preinc
[mem_mode
]
3380 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3382 if (has_preinc
[mem_mode
])
3383 data
->ainc_costs
[AINC_PRE_INC
]
3384 = address_cost (addr
, mem_mode
, as
, speed
);
3386 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3387 || USE_STORE_POST_INCREMENT (mem_mode
))
3389 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3390 has_postinc
[mem_mode
]
3391 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3393 if (has_postinc
[mem_mode
])
3394 data
->ainc_costs
[AINC_POST_INC
]
3395 = address_cost (addr
, mem_mode
, as
, speed
);
3397 for (i
= 0; i
< 16; i
++)
3400 var_p
= (i
>> 1) & 1;
3401 off_p
= (i
>> 2) & 1;
3402 rat_p
= (i
>> 3) & 1;
3406 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3407 gen_int_mode (rat
, address_mode
));
3410 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3414 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3415 /* ??? We can run into trouble with some backends by presenting
3416 it with symbols which haven't been properly passed through
3417 targetm.encode_section_info. By setting the local bit, we
3418 enhance the probability of things working. */
3419 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3422 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3424 (PLUS
, address_mode
, base
,
3425 gen_int_mode (off
, address_mode
)));
3428 base
= gen_int_mode (off
, address_mode
);
3433 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3436 /* To avoid splitting addressing modes, pretend that no cse will
3438 old_cse_not_expected
= cse_not_expected
;
3439 cse_not_expected
= true;
3440 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3441 cse_not_expected
= old_cse_not_expected
;
3445 acost
= seq_cost (seq
, speed
);
3446 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3450 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3453 /* On some targets, it is quite expensive to load symbol to a register,
3454 which makes addresses that contain symbols look much more expensive.
3455 However, the symbol will have to be loaded in any case before the
3456 loop (and quite likely we have it in register already), so it does not
3457 make much sense to penalize them too heavily. So make some final
3458 tweaks for the SYMBOL_PRESENT modes:
3460 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461 var is cheaper, use this mode with small penalty.
3462 If VAR_PRESENT is true, try whether the mode with
3463 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464 if this is the case, use it. */
3465 add_c
= add_cost (speed
, address_mode
);
3466 for (i
= 0; i
< 8; i
++)
3469 off_p
= (i
>> 1) & 1;
3470 rat_p
= (i
>> 2) & 1;
3472 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3476 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3477 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3480 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3482 fprintf (dump_file
, "Address costs:\n");
3484 for (i
= 0; i
< 16; i
++)
3487 var_p
= (i
>> 1) & 1;
3488 off_p
= (i
>> 2) & 1;
3489 rat_p
= (i
>> 3) & 1;
3491 fprintf (dump_file
, " ");
3493 fprintf (dump_file
, "sym + ");
3495 fprintf (dump_file
, "var + ");
3497 fprintf (dump_file
, "cst + ");
3499 fprintf (dump_file
, "rat * ");
3501 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3502 fprintf (dump_file
, "index costs %d\n", acost
);
3504 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3505 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3506 fprintf (dump_file
, " May include autoinc/dec\n");
3507 fprintf (dump_file
, "\n");
3510 address_cost_data_list
[data_index
] = data
;
3513 bits
= GET_MODE_BITSIZE (address_mode
);
3514 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3516 if ((offset
>> (bits
- 1) & 1))
3521 autoinc_type
= AINC_NONE
;
3522 msize
= GET_MODE_SIZE (mem_mode
);
3523 autoinc_offset
= offset
;
3525 autoinc_offset
+= ratio
* cstep
;
3526 if (symbol_present
|| var_present
|| ratio
!= 1)
3530 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3532 autoinc_type
= AINC_POST_INC
;
3533 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3535 autoinc_type
= AINC_POST_DEC
;
3536 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3538 autoinc_type
= AINC_PRE_INC
;
3539 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3541 autoinc_type
= AINC_PRE_DEC
;
3543 if (autoinc_type
!= AINC_NONE
)
3548 offset_p
= (s_offset
!= 0
3549 && data
->min_offset
<= s_offset
3550 && s_offset
<= data
->max_offset
);
3551 ratio_p
= (ratio
!= 1
3552 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3554 if (ratio
!= 1 && !ratio_p
)
3555 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3557 if (s_offset
&& !offset_p
&& !symbol_present
)
3558 cost
+= add_cost (speed
, address_mode
);
3561 *may_autoinc
= autoinc
;
3563 acost
= data
->ainc_costs
[autoinc_type
];
3565 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3566 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3567 return new_cost (cost
+ acost
, complexity
);
3570 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3571 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3572 calculating the operands of EXPR. Returns true if successful, and returns
3573 the cost in COST. */
3576 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3577 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3580 tree op1
= TREE_OPERAND (expr
, 1);
3581 tree cst
= TREE_OPERAND (mult
, 1);
3582 tree multop
= TREE_OPERAND (mult
, 0);
3583 int m
= exact_log2 (int_cst_value (cst
));
3584 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3586 bool equal_p
= false;
3588 if (!(m
>= 0 && m
< maxm
))
3591 if (operand_equal_p (op1
, mult
, 0))
3594 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3595 ? shiftadd_cost (speed
, mode
, m
)
3597 ? shiftsub1_cost (speed
, mode
, m
)
3598 : shiftsub0_cost (speed
, mode
, m
)));
3599 res
= new_cost (sa_cost
, 0);
3600 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3602 STRIP_NOPS (multop
);
3603 if (!is_gimple_val (multop
))
3604 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3610 /* Estimates cost of forcing expression EXPR into a variable. */
3613 force_expr_to_var_cost (tree expr
, bool speed
)
3615 static bool costs_initialized
= false;
3616 static unsigned integer_cost
[2];
3617 static unsigned symbol_cost
[2];
3618 static unsigned address_cost
[2];
3620 comp_cost cost0
, cost1
, cost
;
3621 enum machine_mode mode
;
3623 if (!costs_initialized
)
3625 tree type
= build_pointer_type (integer_type_node
);
3630 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3631 TREE_STATIC (var
) = 1;
3632 x
= produce_memory_decl_rtl (var
, NULL
);
3633 SET_DECL_RTL (var
, x
);
3635 addr
= build1 (ADDR_EXPR
, type
, var
);
3638 for (i
= 0; i
< 2; i
++)
3640 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3643 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3646 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3647 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3649 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3650 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3651 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3652 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3653 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3654 fprintf (dump_file
, "\n");
3658 costs_initialized
= true;
3663 if (SSA_VAR_P (expr
))
3666 if (is_gimple_min_invariant (expr
))
3668 if (TREE_CODE (expr
) == INTEGER_CST
)
3669 return new_cost (integer_cost
[speed
], 0);
3671 if (TREE_CODE (expr
) == ADDR_EXPR
)
3673 tree obj
= TREE_OPERAND (expr
, 0);
3675 if (TREE_CODE (obj
) == VAR_DECL
3676 || TREE_CODE (obj
) == PARM_DECL
3677 || TREE_CODE (obj
) == RESULT_DECL
)
3678 return new_cost (symbol_cost
[speed
], 0);
3681 return new_cost (address_cost
[speed
], 0);
3684 switch (TREE_CODE (expr
))
3686 case POINTER_PLUS_EXPR
:
3690 op0
= TREE_OPERAND (expr
, 0);
3691 op1
= TREE_OPERAND (expr
, 1);
3698 op0
= TREE_OPERAND (expr
, 0);
3704 /* Just an arbitrary value, FIXME. */
3705 return new_cost (target_spill_cost
[speed
], 0);
3708 if (op0
== NULL_TREE
3709 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3712 cost0
= force_expr_to_var_cost (op0
, speed
);
3714 if (op1
== NULL_TREE
3715 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3718 cost1
= force_expr_to_var_cost (op1
, speed
);
3720 mode
= TYPE_MODE (TREE_TYPE (expr
));
3721 switch (TREE_CODE (expr
))
3723 case POINTER_PLUS_EXPR
:
3727 cost
= new_cost (add_cost (speed
, mode
), 0);
3728 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3730 tree mult
= NULL_TREE
;
3732 if (TREE_CODE (op1
) == MULT_EXPR
)
3734 else if (TREE_CODE (op0
) == MULT_EXPR
)
3737 if (mult
!= NULL_TREE
3738 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3739 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3747 tree inner_mode
, outer_mode
;
3748 outer_mode
= TREE_TYPE (expr
);
3749 inner_mode
= TREE_TYPE (op0
);
3750 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3751 TYPE_MODE (inner_mode
), speed
), 0);
3756 if (cst_and_fits_in_hwi (op0
))
3757 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3759 else if (cst_and_fits_in_hwi (op1
))
3760 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3763 return new_cost (target_spill_cost
[speed
], 0);
3770 cost
= add_costs (cost
, cost0
);
3771 cost
= add_costs (cost
, cost1
);
3773 /* Bound the cost by target_spill_cost. The parts of complicated
3774 computations often are either loop invariant or at least can
3775 be shared between several iv uses, so letting this grow without
3776 limits would not give reasonable results. */
3777 if (cost
.cost
> (int) target_spill_cost
[speed
])
3778 cost
.cost
= target_spill_cost
[speed
];
3783 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3787 force_var_cost (struct ivopts_data
*data
,
3788 tree expr
, bitmap
*depends_on
)
3792 fd_ivopts_data
= data
;
3793 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3796 return force_expr_to_var_cost (expr
, data
->speed
);
3799 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3800 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3802 invariants the computation depends on. */
3805 split_address_cost (struct ivopts_data
*data
,
3806 tree addr
, bool *symbol_present
, bool *var_present
,
3807 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3810 HOST_WIDE_INT bitsize
;
3811 HOST_WIDE_INT bitpos
;
3813 enum machine_mode mode
;
3814 int unsignedp
, volatilep
;
3816 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3817 &unsignedp
, &volatilep
, false);
3820 || bitpos
% BITS_PER_UNIT
!= 0
3821 || TREE_CODE (core
) != VAR_DECL
)
3823 *symbol_present
= false;
3824 *var_present
= true;
3825 fd_ivopts_data
= data
;
3826 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3827 return new_cost (target_spill_cost
[data
->speed
], 0);
3830 *offset
+= bitpos
/ BITS_PER_UNIT
;
3831 if (TREE_STATIC (core
)
3832 || DECL_EXTERNAL (core
))
3834 *symbol_present
= true;
3835 *var_present
= false;
3839 *symbol_present
= false;
3840 *var_present
= true;
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845 var + symbol + offset. The value of offset is added to OFFSET,
3846 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847 part is missing. DEPENDS_ON is a set of the invariants the computation
3851 ptr_difference_cost (struct ivopts_data
*data
,
3852 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3853 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3855 HOST_WIDE_INT diff
= 0;
3856 aff_tree aff_e1
, aff_e2
;
3859 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3861 if (ptr_difference_const (e1
, e2
, &diff
))
3864 *symbol_present
= false;
3865 *var_present
= false;
3869 if (integer_zerop (e2
))
3870 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3871 symbol_present
, var_present
, offset
, depends_on
);
3873 *symbol_present
= false;
3874 *var_present
= true;
3876 type
= signed_type_for (TREE_TYPE (e1
));
3877 tree_to_aff_combination (e1
, type
, &aff_e1
);
3878 tree_to_aff_combination (e2
, type
, &aff_e2
);
3879 aff_combination_scale (&aff_e2
, -1);
3880 aff_combination_add (&aff_e1
, &aff_e2
);
3882 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3885 /* Estimates cost of expressing difference E1 - E2 as
3886 var + symbol + offset. The value of offset is added to OFFSET,
3887 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888 part is missing. DEPENDS_ON is a set of the invariants the computation
3892 difference_cost (struct ivopts_data
*data
,
3893 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3894 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3896 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3897 unsigned HOST_WIDE_INT off1
, off2
;
3898 aff_tree aff_e1
, aff_e2
;
3901 e1
= strip_offset (e1
, &off1
);
3902 e2
= strip_offset (e2
, &off2
);
3903 *offset
+= off1
- off2
;
3908 if (TREE_CODE (e1
) == ADDR_EXPR
)
3909 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3910 offset
, depends_on
);
3911 *symbol_present
= false;
3913 if (operand_equal_p (e1
, e2
, 0))
3915 *var_present
= false;
3919 *var_present
= true;
3921 if (integer_zerop (e2
))
3922 return force_var_cost (data
, e1
, depends_on
);
3924 if (integer_zerop (e1
))
3926 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3927 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3931 type
= signed_type_for (TREE_TYPE (e1
));
3932 tree_to_aff_combination (e1
, type
, &aff_e1
);
3933 tree_to_aff_combination (e2
, type
, &aff_e2
);
3934 aff_combination_scale (&aff_e2
, -1);
3935 aff_combination_add (&aff_e1
, &aff_e2
);
3937 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3940 /* Returns true if AFF1 and AFF2 are identical. */
3943 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3947 if (aff1
->n
!= aff2
->n
)
3950 for (i
= 0; i
< aff1
->n
; i
++)
3952 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3955 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3964 get_expr_id (struct ivopts_data
*data
, tree expr
)
3966 struct iv_inv_expr_ent ent
;
3967 struct iv_inv_expr_ent
**slot
;
3970 ent
.hash
= iterative_hash_expr (expr
, 0);
3971 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3975 *slot
= XNEW (struct iv_inv_expr_ent
);
3976 (*slot
)->expr
= expr
;
3977 (*slot
)->hash
= ent
.hash
;
3978 (*slot
)->id
= data
->inv_expr_id
++;
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983 requires a new compiler generated temporary. Returns -1 otherwise.
3984 ADDRESS_P is a flag indicating if the expression is for address
3988 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3989 tree cbase
, HOST_WIDE_INT ratio
,
3992 aff_tree ubase_aff
, cbase_aff
;
4000 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4001 && (TREE_CODE (cbase
) == INTEGER_CST
))
4004 /* Strips the constant part. */
4005 if (TREE_CODE (ubase
) == PLUS_EXPR
4006 || TREE_CODE (ubase
) == MINUS_EXPR
4007 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4009 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4010 ubase
= TREE_OPERAND (ubase
, 0);
4013 /* Strips the constant part. */
4014 if (TREE_CODE (cbase
) == PLUS_EXPR
4015 || TREE_CODE (cbase
) == MINUS_EXPR
4016 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4018 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4019 cbase
= TREE_OPERAND (cbase
, 0);
4024 if (((TREE_CODE (ubase
) == SSA_NAME
)
4025 || (TREE_CODE (ubase
) == ADDR_EXPR
4026 && is_gimple_min_invariant (ubase
)))
4027 && (TREE_CODE (cbase
) == INTEGER_CST
))
4030 if (((TREE_CODE (cbase
) == SSA_NAME
)
4031 || (TREE_CODE (cbase
) == ADDR_EXPR
4032 && is_gimple_min_invariant (cbase
)))
4033 && (TREE_CODE (ubase
) == INTEGER_CST
))
4039 if (operand_equal_p (ubase
, cbase
, 0))
4042 if (TREE_CODE (ubase
) == ADDR_EXPR
4043 && TREE_CODE (cbase
) == ADDR_EXPR
)
4047 usym
= TREE_OPERAND (ubase
, 0);
4048 csym
= TREE_OPERAND (cbase
, 0);
4049 if (TREE_CODE (usym
) == ARRAY_REF
)
4051 tree ind
= TREE_OPERAND (usym
, 1);
4052 if (TREE_CODE (ind
) == INTEGER_CST
4053 && tree_fits_shwi_p (ind
)
4054 && tree_to_shwi (ind
) == 0)
4055 usym
= TREE_OPERAND (usym
, 0);
4057 if (TREE_CODE (csym
) == ARRAY_REF
)
4059 tree ind
= TREE_OPERAND (csym
, 1);
4060 if (TREE_CODE (ind
) == INTEGER_CST
4061 && tree_fits_shwi_p (ind
)
4062 && tree_to_shwi (ind
) == 0)
4063 csym
= TREE_OPERAND (csym
, 0);
4065 if (operand_equal_p (usym
, csym
, 0))
4068 /* Now do more complex comparison */
4069 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4070 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4071 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4075 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4076 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4078 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4079 aff_combination_add (&ubase_aff
, &cbase_aff
);
4080 expr
= aff_combination_to_tree (&ubase_aff
);
4081 return get_expr_id (data
, expr
);
4086 /* Determines the cost of the computation by that USE is expressed
4087 from induction variable CAND. If ADDRESS_P is true, we just need
4088 to create an address from it, otherwise we want to get it into
4089 register. A set of invariants we depend on is stored in
4090 DEPENDS_ON. AT is the statement at that the value is computed.
4091 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092 addressing is likely. */
4095 get_computation_cost_at (struct ivopts_data
*data
,
4096 struct iv_use
*use
, struct iv_cand
*cand
,
4097 bool address_p
, bitmap
*depends_on
, gimple at
,
4101 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4103 tree utype
= TREE_TYPE (ubase
), ctype
;
4104 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4105 HOST_WIDE_INT ratio
, aratio
;
4106 bool var_present
, symbol_present
, stmt_is_after_inc
;
4109 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4110 enum machine_mode mem_mode
= (address_p
4111 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4116 /* Only consider real candidates. */
4118 return infinite_cost
;
4120 cbase
= cand
->iv
->base
;
4121 cstep
= cand
->iv
->step
;
4122 ctype
= TREE_TYPE (cbase
);
4124 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4126 /* We do not have a precision to express the values of use. */
4127 return infinite_cost
;
4131 || (use
->iv
->base_object
4132 && cand
->iv
->base_object
4133 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4134 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4136 /* Do not try to express address of an object with computation based
4137 on address of a different object. This may cause problems in rtl
4138 level alias analysis (that does not expect this to be happening,
4139 as this is illegal in C), and would be unlikely to be useful
4141 if (use
->iv
->base_object
4142 && cand
->iv
->base_object
4143 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4144 return infinite_cost
;
4147 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4149 /* TODO -- add direct handling of this case. */
4153 /* CSTEPI is removed from the offset in case statement is after the
4154 increment. If the step is not constant, we use zero instead.
4155 This is a bit imprecise (there is the extra addition), but
4156 redundancy elimination is likely to transform the code so that
4157 it uses value of the variable before increment anyway,
4158 so it is not that much unrealistic. */
4159 if (cst_and_fits_in_hwi (cstep
))
4160 cstepi
= int_cst_value (cstep
);
4164 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4165 return infinite_cost
;
4167 if (wi::fits_shwi_p (rat
))
4168 ratio
= rat
.to_shwi ();
4170 return infinite_cost
;
4173 ctype
= TREE_TYPE (cbase
);
4175 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4177 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4178 or ratio == 1, it is better to handle this like
4180 ubase - ratio * cbase + ratio * var
4182 (also holds in the case ratio == -1, TODO. */
4184 if (cst_and_fits_in_hwi (cbase
))
4186 offset
= - ratio
* int_cst_value (cbase
);
4187 cost
= difference_cost (data
,
4188 ubase
, build_int_cst (utype
, 0),
4189 &symbol_present
, &var_present
, &offset
,
4191 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4193 else if (ratio
== 1)
4195 tree real_cbase
= cbase
;
4197 /* Check to see if any adjustment is needed. */
4198 if (cstepi
== 0 && stmt_is_after_inc
)
4200 aff_tree real_cbase_aff
;
4203 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4205 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4207 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4208 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4211 cost
= difference_cost (data
,
4213 &symbol_present
, &var_present
, &offset
,
4215 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4218 && !POINTER_TYPE_P (ctype
)
4219 && multiplier_allowed_in_address_p
4221 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4224 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4225 cost
= difference_cost (data
,
4227 &symbol_present
, &var_present
, &offset
,
4229 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4233 cost
= force_var_cost (data
, cbase
, depends_on
);
4234 cost
= add_costs (cost
,
4235 difference_cost (data
,
4236 ubase
, build_int_cst (utype
, 0),
4237 &symbol_present
, &var_present
,
4238 &offset
, depends_on
));
4239 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4240 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4246 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4247 /* Clear depends on. */
4248 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4249 bitmap_clear (*depends_on
);
4252 /* If we are after the increment, the value of the candidate is higher by
4254 if (stmt_is_after_inc
)
4255 offset
-= ratio
* cstepi
;
4257 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258 (symbol/var1/const parts may be omitted). If we are looking for an
4259 address, find the cost of addressing this. */
4261 return add_costs (cost
,
4262 get_address_cost (symbol_present
, var_present
,
4263 offset
, ratio
, cstepi
,
4265 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4266 speed
, stmt_is_after_inc
,
4269 /* Otherwise estimate the costs for computing the expression. */
4270 if (!symbol_present
&& !var_present
&& !offset
)
4273 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4277 /* Symbol + offset should be compile-time computable so consider that they
4278 are added once to the variable, if present. */
4279 if (var_present
&& (symbol_present
|| offset
))
4280 cost
.cost
+= adjust_setup_cost (data
,
4281 add_cost (speed
, TYPE_MODE (ctype
)));
4283 /* Having offset does not affect runtime cost in case it is added to
4284 symbol, but it increases complexity. */
4288 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4290 aratio
= ratio
> 0 ? ratio
: -ratio
;
4292 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4297 *can_autoinc
= false;
4300 /* Just get the expression, expand it and measure the cost. */
4301 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4304 return infinite_cost
;
4307 comp
= build_simple_mem_ref (comp
);
4309 return new_cost (computation_cost (comp
, speed
), 0);
4313 /* Determines the cost of the computation by that USE is expressed
4314 from induction variable CAND. If ADDRESS_P is true, we just need
4315 to create an address from it, otherwise we want to get it into
4316 register. A set of invariants we depend on is stored in
4317 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4318 autoinc addressing is likely. */
4321 get_computation_cost (struct ivopts_data
*data
,
4322 struct iv_use
*use
, struct iv_cand
*cand
,
4323 bool address_p
, bitmap
*depends_on
,
4324 bool *can_autoinc
, int *inv_expr_id
)
4326 return get_computation_cost_at (data
,
4327 use
, cand
, address_p
, depends_on
, use
->stmt
,
4328 can_autoinc
, inv_expr_id
);
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4335 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4336 struct iv_use
*use
, struct iv_cand
*cand
)
4340 int inv_expr_id
= -1;
4342 /* The simple case first -- if we need to express value of the preserved
4343 original biv, the cost is 0. This also prevents us from counting the
4344 cost of increment twice -- once at this use and once in the cost of
4346 if (cand
->pos
== IP_ORIGINAL
4347 && cand
->incremented_at
== use
->stmt
)
4349 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4354 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4355 NULL
, &inv_expr_id
);
4357 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4360 return !infinite_cost_p (cost
);
4363 /* Determines cost of basing replacement of USE on CAND in an address. */
4366 determine_use_iv_cost_address (struct ivopts_data
*data
,
4367 struct iv_use
*use
, struct iv_cand
*cand
)
4371 int inv_expr_id
= -1;
4372 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4373 &can_autoinc
, &inv_expr_id
);
4375 if (cand
->ainc_use
== use
)
4378 cost
.cost
-= cand
->cost_step
;
4379 /* If we generated the candidate solely for exploiting autoincrement
4380 opportunities, and it turns out it can't be used, set the cost to
4381 infinity to make sure we ignore it. */
4382 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4383 cost
= infinite_cost
;
4385 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4388 return !infinite_cost_p (cost
);
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392 stores it to VAL. */
4395 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4398 aff_tree step
, delta
, nit
;
4399 struct iv
*iv
= cand
->iv
;
4400 tree type
= TREE_TYPE (iv
->base
);
4401 tree steptype
= type
;
4402 if (POINTER_TYPE_P (type
))
4403 steptype
= sizetype
;
4404 steptype
= unsigned_type_for (type
);
4406 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4407 aff_combination_convert (&step
, steptype
);
4408 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4409 aff_combination_convert (&nit
, steptype
);
4410 aff_combination_mult (&nit
, &step
, &delta
);
4411 if (stmt_after_increment (loop
, cand
, at
))
4412 aff_combination_add (&delta
, &step
);
4414 tree_to_aff_combination (iv
->base
, type
, val
);
4415 if (!POINTER_TYPE_P (type
))
4416 aff_combination_convert (val
, steptype
);
4417 aff_combination_add (val
, &delta
);
4420 /* Returns period of induction variable iv. */
4423 iv_period (struct iv
*iv
)
4425 tree step
= iv
->step
, period
, type
;
4428 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4430 type
= unsigned_type_for (TREE_TYPE (step
));
4431 /* Period of the iv is lcm (step, type_range)/step -1,
4432 i.e., N*type_range/step - 1. Since type range is power
4433 of two, N == (step >> num_of_ending_zeros_binary (step),
4434 so the final result is
4436 (type_range >> num_of_ending_zeros_binary (step)) - 1
4439 pow2div
= num_ending_zeros (step
);
4441 period
= build_low_bits_mask (type
,
4442 (TYPE_PRECISION (type
)
4443 - tree_to_uhwi (pow2div
)));
4448 /* Returns the comparison operator used when eliminating the iv USE. */
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4453 struct loop
*loop
= data
->current_loop
;
4457 ex_bb
= gimple_bb (use
->stmt
);
4458 exit
= EDGE_SUCC (ex_bb
, 0);
4459 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4460 exit
= EDGE_SUCC (ex_bb
, 1);
4462 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4466 strip_wrap_conserving_type_conversions (tree exp
)
4468 while (tree_ssa_useless_type_conversion (exp
)
4469 && (nowrap_type_p (TREE_TYPE (exp
))
4470 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4471 exp
= TREE_OPERAND (exp
, 0);
4475 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4476 check for an exact match. */
4479 expr_equal_p (tree e
, tree what
)
4482 enum tree_code code
;
4484 e
= strip_wrap_conserving_type_conversions (e
);
4485 what
= strip_wrap_conserving_type_conversions (what
);
4487 code
= TREE_CODE (what
);
4488 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4491 if (operand_equal_p (e
, what
, 0))
4494 if (TREE_CODE (e
) != SSA_NAME
)
4497 stmt
= SSA_NAME_DEF_STMT (e
);
4498 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4499 || gimple_assign_rhs_code (stmt
) != code
)
4502 switch (get_gimple_rhs_class (code
))
4504 case GIMPLE_BINARY_RHS
:
4505 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4509 case GIMPLE_UNARY_RHS
:
4510 case GIMPLE_SINGLE_RHS
:
4511 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4517 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4518 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4519 calculation is performed in non-wrapping type.
4521 TODO: More generally, we could test for the situation that
4522 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4523 This would require knowing the sign of OFFSET.
4525 Also, we only look for the first addition in the computation of BASE.
4526 More complex analysis would be better, but introducing it just for
4527 this optimization seems like an overkill. */
4530 difference_cannot_overflow_p (tree base
, tree offset
)
4532 enum tree_code code
;
4535 if (!nowrap_type_p (TREE_TYPE (base
)))
4538 base
= expand_simple_operations (base
);
4540 if (TREE_CODE (base
) == SSA_NAME
)
4542 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4544 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4547 code
= gimple_assign_rhs_code (stmt
);
4548 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4551 e1
= gimple_assign_rhs1 (stmt
);
4552 e2
= gimple_assign_rhs2 (stmt
);
4556 code
= TREE_CODE (base
);
4557 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4559 e1
= TREE_OPERAND (base
, 0);
4560 e2
= TREE_OPERAND (base
, 1);
4563 /* TODO: deeper inspection may be necessary to prove the equality. */
4567 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4568 case POINTER_PLUS_EXPR
:
4569 return expr_equal_p (e2
, offset
);
4576 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4577 comparison with CAND. NITER describes the number of iterations of
4578 the loops. If successful, the comparison in COMP_P is altered accordingly.
4580 We aim to handle the following situation:
4596 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4597 We aim to optimize this to
4605 while (p < p_0 - a + b);
4607 This preserves the correctness, since the pointer arithmetics does not
4608 overflow. More precisely:
4610 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4611 overflow in computing it or the values of p.
4612 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4613 overflow. To prove this, we use the fact that p_0 = base + a. */
4616 iv_elimination_compare_lt (struct ivopts_data
*data
,
4617 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4618 struct tree_niter_desc
*niter
)
4620 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4621 struct aff_tree nit
, tmpa
, tmpb
;
4622 enum tree_code comp
;
4625 /* We need to know that the candidate induction variable does not overflow.
4626 While more complex analysis may be used to prove this, for now just
4627 check that the variable appears in the original program and that it
4628 is computed in a type that guarantees no overflows. */
4629 cand_type
= TREE_TYPE (cand
->iv
->base
);
4630 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4633 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4634 the calculation of the BOUND could overflow, making the comparison
4636 if (!data
->loop_single_exit_p
)
4639 /* We need to be able to decide whether candidate is increasing or decreasing
4640 in order to choose the right comparison operator. */
4641 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4643 step
= int_cst_value (cand
->iv
->step
);
4645 /* Check that the number of iterations matches the expected pattern:
4646 a + 1 > b ? 0 : b - a - 1. */
4647 mbz
= niter
->may_be_zero
;
4648 if (TREE_CODE (mbz
) == GT_EXPR
)
4650 /* Handle a + 1 > b. */
4651 tree op0
= TREE_OPERAND (mbz
, 0);
4652 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4654 a
= TREE_OPERAND (op0
, 0);
4655 b
= TREE_OPERAND (mbz
, 1);
4660 else if (TREE_CODE (mbz
) == LT_EXPR
)
4662 tree op1
= TREE_OPERAND (mbz
, 1);
4664 /* Handle b < a + 1. */
4665 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4667 a
= TREE_OPERAND (op1
, 0);
4668 b
= TREE_OPERAND (mbz
, 0);
4676 /* Expected number of iterations is B - A - 1. Check that it matches
4677 the actual number, i.e., that B - A - NITER = 1. */
4678 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4679 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4680 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4681 aff_combination_scale (&nit
, -1);
4682 aff_combination_scale (&tmpa
, -1);
4683 aff_combination_add (&tmpb
, &tmpa
);
4684 aff_combination_add (&tmpb
, &nit
);
4685 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4688 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4690 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4692 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4693 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4696 /* Determine the new comparison operator. */
4697 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4698 if (*comp_p
== NE_EXPR
)
4700 else if (*comp_p
== EQ_EXPR
)
4701 *comp_p
= invert_tree_comparison (comp
, false);
4708 /* Check whether it is possible to express the condition in USE by comparison
4709 of candidate CAND. If so, store the value compared with to BOUND, and the
4710 comparison operator to COMP. */
4713 may_eliminate_iv (struct ivopts_data
*data
,
4714 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4715 enum tree_code
*comp
)
4720 struct loop
*loop
= data
->current_loop
;
4722 struct tree_niter_desc
*desc
= NULL
;
4724 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4727 /* For now works only for exits that dominate the loop latch.
4728 TODO: extend to other conditions inside loop body. */
4729 ex_bb
= gimple_bb (use
->stmt
);
4730 if (use
->stmt
!= last_stmt (ex_bb
)
4731 || gimple_code (use
->stmt
) != GIMPLE_COND
4732 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4735 exit
= EDGE_SUCC (ex_bb
, 0);
4736 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4737 exit
= EDGE_SUCC (ex_bb
, 1);
4738 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4741 desc
= niter_for_exit (data
, exit
);
4745 /* Determine whether we can use the variable to test the exit condition.
4746 This is the case iff the period of the induction variable is greater
4747 than the number of iterations for which the exit condition is true. */
4748 period
= iv_period (cand
->iv
);
4750 /* If the number of iterations is constant, compare against it directly. */
4751 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4753 /* See cand_value_at. */
4754 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4756 if (!tree_int_cst_lt (desc
->niter
, period
))
4761 if (tree_int_cst_lt (period
, desc
->niter
))
4766 /* If not, and if this is the only possible exit of the loop, see whether
4767 we can get a conservative estimate on the number of iterations of the
4768 entire loop and compare against that instead. */
4771 widest_int period_value
, max_niter
;
4773 max_niter
= desc
->max
;
4774 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4776 period_value
= wi::to_widest (period
);
4777 if (wi::gtu_p (max_niter
, period_value
))
4779 /* See if we can take advantage of inferred loop bound information. */
4780 if (data
->loop_single_exit_p
)
4782 if (!max_loop_iterations (loop
, &max_niter
))
4784 /* The loop bound is already adjusted by adding 1. */
4785 if (wi::gtu_p (max_niter
, period_value
))
4793 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4795 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4796 aff_combination_to_tree (&bnd
));
4797 *comp
= iv_elimination_compare (data
, use
);
4799 /* It is unlikely that computing the number of iterations using division
4800 would be more profitable than keeping the original induction variable. */
4801 if (expression_expensive_p (*bound
))
4804 /* Sometimes, it is possible to handle the situation that the number of
4805 iterations may be zero unless additional assumtions by using <
4806 instead of != in the exit condition.
4808 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4809 base the exit condition on it. However, that is often too
4811 if (!integer_zerop (desc
->may_be_zero
))
4812 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4817 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4818 be copied, if is is used in the loop body and DATA->body_includes_call. */
4821 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4823 tree sbound
= bound
;
4824 STRIP_NOPS (sbound
);
4826 if (TREE_CODE (sbound
) == SSA_NAME
4827 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4828 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4829 && data
->body_includes_call
)
4830 return COSTS_N_INSNS (1);
4835 /* Determines cost of basing replacement of USE on CAND in a condition. */
4838 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4839 struct iv_use
*use
, struct iv_cand
*cand
)
4841 tree bound
= NULL_TREE
;
4843 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4844 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4846 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4847 tree
*control_var
, *bound_cst
;
4848 enum tree_code comp
= ERROR_MARK
;
4850 /* Only consider real candidates. */
4853 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4858 /* Try iv elimination. */
4859 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4861 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4862 if (elim_cost
.cost
== 0)
4863 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4864 else if (TREE_CODE (bound
) == INTEGER_CST
)
4866 /* If we replace a loop condition 'i < n' with 'p < base + n',
4867 depends_on_elim will have 'base' and 'n' set, which implies
4868 that both 'base' and 'n' will be live during the loop. More likely,
4869 'base + n' will be loop invariant, resulting in only one live value
4870 during the loop. So in that case we clear depends_on_elim and set
4871 elim_inv_expr_id instead. */
4872 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4874 elim_inv_expr_id
= get_expr_id (data
, bound
);
4875 bitmap_clear (depends_on_elim
);
4877 /* The bound is a loop invariant, so it will be only computed
4879 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4882 elim_cost
= infinite_cost
;
4884 /* Try expressing the original giv. If it is compared with an invariant,
4885 note that we cannot get rid of it. */
4886 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4890 /* When the condition is a comparison of the candidate IV against
4891 zero, prefer this IV.
4893 TODO: The constant that we're subtracting from the cost should
4894 be target-dependent. This information should be added to the
4895 target costs for each backend. */
4896 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4897 && integer_zerop (*bound_cst
)
4898 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4899 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4900 elim_cost
.cost
-= 1;
4902 express_cost
= get_computation_cost (data
, use
, cand
, false,
4903 &depends_on_express
, NULL
,
4904 &express_inv_expr_id
);
4905 fd_ivopts_data
= data
;
4906 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4908 /* Count the cost of the original bound as well. */
4909 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4910 if (bound_cost
.cost
== 0)
4911 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4912 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4913 bound_cost
.cost
= 0;
4914 express_cost
.cost
+= bound_cost
.cost
;
4916 /* Choose the better approach, preferring the eliminated IV. */
4917 if (compare_costs (elim_cost
, express_cost
) <= 0)
4920 depends_on
= depends_on_elim
;
4921 depends_on_elim
= NULL
;
4922 inv_expr_id
= elim_inv_expr_id
;
4926 cost
= express_cost
;
4927 depends_on
= depends_on_express
;
4928 depends_on_express
= NULL
;
4931 inv_expr_id
= express_inv_expr_id
;
4934 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4936 if (depends_on_elim
)
4937 BITMAP_FREE (depends_on_elim
);
4938 if (depends_on_express
)
4939 BITMAP_FREE (depends_on_express
);
4941 return !infinite_cost_p (cost
);
4944 /* Determines cost of basing replacement of USE on CAND. Returns false
4945 if USE cannot be based on CAND. */
4948 determine_use_iv_cost (struct ivopts_data
*data
,
4949 struct iv_use
*use
, struct iv_cand
*cand
)
4953 case USE_NONLINEAR_EXPR
:
4954 return determine_use_iv_cost_generic (data
, use
, cand
);
4957 return determine_use_iv_cost_address (data
, use
, cand
);
4960 return determine_use_iv_cost_condition (data
, use
, cand
);
4967 /* Return true if get_computation_cost indicates that autoincrement is
4968 a possibility for the pair of USE and CAND, false otherwise. */
4971 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4972 struct iv_cand
*cand
)
4978 if (use
->type
!= USE_ADDRESS
)
4981 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4982 &can_autoinc
, NULL
);
4984 BITMAP_FREE (depends_on
);
4986 return !infinite_cost_p (cost
) && can_autoinc
;
4989 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4990 use that allows autoincrement, and set their AINC_USE if possible. */
4993 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4997 for (i
= 0; i
< n_iv_cands (data
); i
++)
4999 struct iv_cand
*cand
= iv_cand (data
, i
);
5000 struct iv_use
*closest_before
= NULL
;
5001 struct iv_use
*closest_after
= NULL
;
5002 if (cand
->pos
!= IP_ORIGINAL
)
5005 for (j
= 0; j
< n_iv_uses (data
); j
++)
5007 struct iv_use
*use
= iv_use (data
, j
);
5008 unsigned uid
= gimple_uid (use
->stmt
);
5010 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5013 if (uid
< gimple_uid (cand
->incremented_at
)
5014 && (closest_before
== NULL
5015 || uid
> gimple_uid (closest_before
->stmt
)))
5016 closest_before
= use
;
5018 if (uid
> gimple_uid (cand
->incremented_at
)
5019 && (closest_after
== NULL
5020 || uid
< gimple_uid (closest_after
->stmt
)))
5021 closest_after
= use
;
5024 if (closest_before
!= NULL
5025 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5026 cand
->ainc_use
= closest_before
;
5027 else if (closest_after
!= NULL
5028 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5029 cand
->ainc_use
= closest_after
;
5033 /* Finds the candidates for the induction variables. */
5036 find_iv_candidates (struct ivopts_data
*data
)
5038 /* Add commonly used ivs. */
5039 add_standard_iv_candidates (data
);
5041 /* Add old induction variables. */
5042 add_old_ivs_candidates (data
);
5044 /* Add induction variables derived from uses. */
5045 add_derived_ivs_candidates (data
);
5047 set_autoinc_for_original_candidates (data
);
5049 /* Record the important candidates. */
5050 record_important_candidates (data
);
5053 /* Determines costs of basing the use of the iv on an iv candidate. */
5056 determine_use_iv_costs (struct ivopts_data
*data
)
5060 struct iv_cand
*cand
;
5061 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5063 alloc_use_cost_map (data
);
5065 for (i
= 0; i
< n_iv_uses (data
); i
++)
5067 use
= iv_use (data
, i
);
5069 if (data
->consider_all_candidates
)
5071 for (j
= 0; j
< n_iv_cands (data
); j
++)
5073 cand
= iv_cand (data
, j
);
5074 determine_use_iv_cost (data
, use
, cand
);
5081 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5083 cand
= iv_cand (data
, j
);
5084 if (!determine_use_iv_cost (data
, use
, cand
))
5085 bitmap_set_bit (to_clear
, j
);
5088 /* Remove the candidates for that the cost is infinite from
5089 the list of related candidates. */
5090 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5091 bitmap_clear (to_clear
);
5095 BITMAP_FREE (to_clear
);
5097 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5099 fprintf (dump_file
, "Use-candidate costs:\n");
5101 for (i
= 0; i
< n_iv_uses (data
); i
++)
5103 use
= iv_use (data
, i
);
5105 fprintf (dump_file
, "Use %d:\n", i
);
5106 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5107 for (j
= 0; j
< use
->n_map_members
; j
++)
5109 if (!use
->cost_map
[j
].cand
5110 || infinite_cost_p (use
->cost_map
[j
].cost
))
5113 fprintf (dump_file
, " %d\t%d\t%d\t",
5114 use
->cost_map
[j
].cand
->id
,
5115 use
->cost_map
[j
].cost
.cost
,
5116 use
->cost_map
[j
].cost
.complexity
);
5117 if (use
->cost_map
[j
].depends_on
)
5118 bitmap_print (dump_file
,
5119 use
->cost_map
[j
].depends_on
, "","");
5120 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5121 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5122 fprintf (dump_file
, "\n");
5125 fprintf (dump_file
, "\n");
5127 fprintf (dump_file
, "\n");
5131 /* Determines cost of the candidate CAND. */
5134 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5136 comp_cost cost_base
;
5137 unsigned cost
, cost_step
;
5146 /* There are two costs associated with the candidate -- its increment
5147 and its initialization. The second is almost negligible for any loop
5148 that rolls enough, so we take it just very little into account. */
5150 base
= cand
->iv
->base
;
5151 cost_base
= force_var_cost (data
, base
, NULL
);
5152 /* It will be exceptional that the iv register happens to be initialized with
5153 the proper value at no cost. In general, there will at least be a regcopy
5155 if (cost_base
.cost
== 0)
5156 cost_base
.cost
= COSTS_N_INSNS (1);
5157 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5159 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5161 /* Prefer the original ivs unless we may gain something by replacing it.
5162 The reason is to make debugging simpler; so this is not relevant for
5163 artificial ivs created by other optimization passes. */
5164 if (cand
->pos
!= IP_ORIGINAL
5165 || !SSA_NAME_VAR (cand
->var_before
)
5166 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5169 /* Prefer not to insert statements into latch unless there are some
5170 already (so that we do not create unnecessary jumps). */
5171 if (cand
->pos
== IP_END
5172 && empty_block_p (ip_end_pos (data
->current_loop
)))
5176 cand
->cost_step
= cost_step
;
5179 /* Determines costs of computation of the candidates. */
5182 determine_iv_costs (struct ivopts_data
*data
)
5186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5188 fprintf (dump_file
, "Candidate costs:\n");
5189 fprintf (dump_file
, " cand\tcost\n");
5192 for (i
= 0; i
< n_iv_cands (data
); i
++)
5194 struct iv_cand
*cand
= iv_cand (data
, i
);
5196 determine_iv_cost (data
, cand
);
5198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5199 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5203 fprintf (dump_file
, "\n");
5206 /* Calculates cost for having SIZE induction variables. */
5209 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5211 /* We add size to the cost, so that we prefer eliminating ivs
5213 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5214 data
->body_includes_call
);
5217 /* For each size of the induction variable set determine the penalty. */
5220 determine_set_costs (struct ivopts_data
*data
)
5224 gimple_stmt_iterator psi
;
5226 struct loop
*loop
= data
->current_loop
;
5229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5231 fprintf (dump_file
, "Global costs:\n");
5232 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5233 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5234 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5235 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5239 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5241 phi
= gsi_stmt (psi
);
5242 op
= PHI_RESULT (phi
);
5244 if (virtual_operand_p (op
))
5247 if (get_iv (data
, op
))
5253 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5255 struct version_info
*info
= ver_info (data
, j
);
5257 if (info
->inv_id
&& info
->has_nonlin_use
)
5261 data
->regs_used
= n
;
5262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5263 fprintf (dump_file
, " regs_used %d\n", n
);
5265 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5267 fprintf (dump_file
, " cost for size:\n");
5268 fprintf (dump_file
, " ivs\tcost\n");
5269 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5270 fprintf (dump_file
, " %d\t%d\n", j
,
5271 ivopts_global_cost_for_size (data
, j
));
5272 fprintf (dump_file
, "\n");
5276 /* Returns true if A is a cheaper cost pair than B. */
5279 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5289 cmp
= compare_costs (a
->cost
, b
->cost
);
5296 /* In case the costs are the same, prefer the cheaper candidate. */
5297 if (a
->cand
->cost
< b
->cand
->cost
)
5304 /* Returns candidate by that USE is expressed in IVS. */
5306 static struct cost_pair
*
5307 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5309 return ivs
->cand_for_use
[use
->id
];
5312 /* Computes the cost field of IVS structure. */
5315 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5317 comp_cost cost
= ivs
->cand_use_cost
;
5319 cost
.cost
+= ivs
->cand_cost
;
5321 cost
.cost
+= ivopts_global_cost_for_size (data
,
5322 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5327 /* Remove invariants in set INVS to set IVS. */
5330 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5338 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5340 ivs
->n_invariant_uses
[iid
]--;
5341 if (ivs
->n_invariant_uses
[iid
] == 0)
5346 /* Set USE not to be expressed by any candidate in IVS. */
5349 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5352 unsigned uid
= use
->id
, cid
;
5353 struct cost_pair
*cp
;
5355 cp
= ivs
->cand_for_use
[uid
];
5361 ivs
->cand_for_use
[uid
] = NULL
;
5362 ivs
->n_cand_uses
[cid
]--;
5364 if (ivs
->n_cand_uses
[cid
] == 0)
5366 bitmap_clear_bit (ivs
->cands
, cid
);
5367 /* Do not count the pseudocandidates. */
5371 ivs
->cand_cost
-= cp
->cand
->cost
;
5373 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5376 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5378 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5380 if (cp
->inv_expr_id
!= -1)
5382 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5383 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5384 ivs
->num_used_inv_expr
--;
5386 iv_ca_recount_cost (data
, ivs
);
5389 /* Add invariants in set INVS to set IVS. */
5392 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5400 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5402 ivs
->n_invariant_uses
[iid
]++;
5403 if (ivs
->n_invariant_uses
[iid
] == 1)
5408 /* Set cost pair for USE in set IVS to CP. */
5411 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5412 struct iv_use
*use
, struct cost_pair
*cp
)
5414 unsigned uid
= use
->id
, cid
;
5416 if (ivs
->cand_for_use
[uid
] == cp
)
5419 if (ivs
->cand_for_use
[uid
])
5420 iv_ca_set_no_cp (data
, ivs
, use
);
5427 ivs
->cand_for_use
[uid
] = cp
;
5428 ivs
->n_cand_uses
[cid
]++;
5429 if (ivs
->n_cand_uses
[cid
] == 1)
5431 bitmap_set_bit (ivs
->cands
, cid
);
5432 /* Do not count the pseudocandidates. */
5436 ivs
->cand_cost
+= cp
->cand
->cost
;
5438 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5441 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5442 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5444 if (cp
->inv_expr_id
!= -1)
5446 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5447 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5448 ivs
->num_used_inv_expr
++;
5450 iv_ca_recount_cost (data
, ivs
);
5454 /* Extend set IVS by expressing USE by some of the candidates in it
5455 if possible. All important candidates will be considered
5456 if IMPORTANT_CANDIDATES is true. */
5459 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5460 struct iv_use
*use
, bool important_candidates
)
5462 struct cost_pair
*best_cp
= NULL
, *cp
;
5467 gcc_assert (ivs
->upto
>= use
->id
);
5469 if (ivs
->upto
== use
->id
)
5475 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5476 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5478 struct iv_cand
*cand
= iv_cand (data
, i
);
5480 cp
= get_use_iv_cost (data
, use
, cand
);
5482 if (cheaper_cost_pair (cp
, best_cp
))
5486 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5489 /* Get cost for assignment IVS. */
5492 iv_ca_cost (struct iv_ca
*ivs
)
5494 /* This was a conditional expression but it triggered a bug in
5497 return infinite_cost
;
5502 /* Returns true if all dependences of CP are among invariants in IVS. */
5505 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5510 if (!cp
->depends_on
)
5513 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5515 if (ivs
->n_invariant_uses
[i
] == 0)
5522 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5523 it before NEXT_CHANGE. */
5525 static struct iv_ca_delta
*
5526 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5527 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5529 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5532 change
->old_cp
= old_cp
;
5533 change
->new_cp
= new_cp
;
5534 change
->next_change
= next_change
;
5539 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5542 static struct iv_ca_delta
*
5543 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5545 struct iv_ca_delta
*last
;
5553 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5555 last
->next_change
= l2
;
5560 /* Reverse the list of changes DELTA, forming the inverse to it. */
5562 static struct iv_ca_delta
*
5563 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5565 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5566 struct cost_pair
*tmp
;
5568 for (act
= delta
; act
; act
= next
)
5570 next
= act
->next_change
;
5571 act
->next_change
= prev
;
5575 act
->old_cp
= act
->new_cp
;
5582 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5583 reverted instead. */
5586 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5587 struct iv_ca_delta
*delta
, bool forward
)
5589 struct cost_pair
*from
, *to
;
5590 struct iv_ca_delta
*act
;
5593 delta
= iv_ca_delta_reverse (delta
);
5595 for (act
= delta
; act
; act
= act
->next_change
)
5599 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5600 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5604 iv_ca_delta_reverse (delta
);
5607 /* Returns true if CAND is used in IVS. */
5610 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5612 return ivs
->n_cand_uses
[cand
->id
] > 0;
5615 /* Returns number of induction variable candidates in the set IVS. */
5618 iv_ca_n_cands (struct iv_ca
*ivs
)
5620 return ivs
->n_cands
;
5623 /* Free the list of changes DELTA. */
5626 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5628 struct iv_ca_delta
*act
, *next
;
5630 for (act
= *delta
; act
; act
= next
)
5632 next
= act
->next_change
;
5639 /* Allocates new iv candidates assignment. */
5641 static struct iv_ca
*
5642 iv_ca_new (struct ivopts_data
*data
)
5644 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5648 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5649 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5650 nw
->cands
= BITMAP_ALLOC (NULL
);
5653 nw
->cand_use_cost
= no_cost
;
5655 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5657 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5658 nw
->num_used_inv_expr
= 0;
5663 /* Free memory occupied by the set IVS. */
5666 iv_ca_free (struct iv_ca
**ivs
)
5668 free ((*ivs
)->cand_for_use
);
5669 free ((*ivs
)->n_cand_uses
);
5670 BITMAP_FREE ((*ivs
)->cands
);
5671 free ((*ivs
)->n_invariant_uses
);
5672 free ((*ivs
)->used_inv_expr
);
5677 /* Dumps IVS to FILE. */
5680 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5682 const char *pref
= " invariants ";
5684 comp_cost cost
= iv_ca_cost (ivs
);
5686 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5687 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5688 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5689 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5691 for (i
= 0; i
< ivs
->upto
; i
++)
5693 struct iv_use
*use
= iv_use (data
, i
);
5694 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5696 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5697 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5699 fprintf (file
, " use:%d --> ??\n", use
->id
);
5702 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5703 if (ivs
->n_invariant_uses
[i
])
5705 fprintf (file
, "%s%d", pref
, i
);
5708 fprintf (file
, "\n\n");
5711 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5712 new set, and store differences in DELTA. Number of induction variables
5713 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5714 the function will try to find a solution with mimimal iv candidates. */
5717 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5718 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5719 unsigned *n_ivs
, bool min_ncand
)
5724 struct cost_pair
*old_cp
, *new_cp
;
5727 for (i
= 0; i
< ivs
->upto
; i
++)
5729 use
= iv_use (data
, i
);
5730 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5733 && old_cp
->cand
== cand
)
5736 new_cp
= get_use_iv_cost (data
, use
, cand
);
5740 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5743 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5746 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5749 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5750 cost
= iv_ca_cost (ivs
);
5752 *n_ivs
= iv_ca_n_cands (ivs
);
5753 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5758 /* Try narrowing set IVS by removing CAND. Return the cost of
5759 the new set and store the differences in DELTA. START is
5760 the candidate with which we start narrowing. */
5763 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5764 struct iv_cand
*cand
, struct iv_cand
*start
,
5765 struct iv_ca_delta
**delta
)
5769 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5771 struct iv_cand
*cnd
;
5772 comp_cost cost
, best_cost
, acost
;
5775 for (i
= 0; i
< n_iv_uses (data
); i
++)
5777 use
= iv_use (data
, i
);
5779 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5780 if (old_cp
->cand
!= cand
)
5783 best_cost
= iv_ca_cost (ivs
);
5784 /* Start narrowing with START. */
5785 new_cp
= get_use_iv_cost (data
, use
, start
);
5787 if (data
->consider_all_candidates
)
5789 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5791 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5794 cnd
= iv_cand (data
, ci
);
5796 cp
= get_use_iv_cost (data
, use
, cnd
);
5800 iv_ca_set_cp (data
, ivs
, use
, cp
);
5801 acost
= iv_ca_cost (ivs
);
5803 if (compare_costs (acost
, best_cost
) < 0)
5812 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5814 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5817 cnd
= iv_cand (data
, ci
);
5819 cp
= get_use_iv_cost (data
, use
, cnd
);
5823 iv_ca_set_cp (data
, ivs
, use
, cp
);
5824 acost
= iv_ca_cost (ivs
);
5826 if (compare_costs (acost
, best_cost
) < 0)
5833 /* Restore to old cp for use. */
5834 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5838 iv_ca_delta_free (delta
);
5839 return infinite_cost
;
5842 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5845 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5846 cost
= iv_ca_cost (ivs
);
5847 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5852 /* Try optimizing the set of candidates IVS by removing candidates different
5853 from to EXCEPT_CAND from it. Return cost of the new set, and store
5854 differences in DELTA. */
5857 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5858 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5861 struct iv_ca_delta
*act_delta
, *best_delta
;
5863 comp_cost best_cost
, acost
;
5864 struct iv_cand
*cand
;
5867 best_cost
= iv_ca_cost (ivs
);
5869 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5871 cand
= iv_cand (data
, i
);
5873 if (cand
== except_cand
)
5876 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5878 if (compare_costs (acost
, best_cost
) < 0)
5881 iv_ca_delta_free (&best_delta
);
5882 best_delta
= act_delta
;
5885 iv_ca_delta_free (&act_delta
);
5894 /* Recurse to possibly remove other unnecessary ivs. */
5895 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5896 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5897 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5898 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5902 /* Tries to extend the sets IVS in the best possible way in order
5903 to express the USE. If ORIGINALP is true, prefer candidates from
5904 the original set of IVs, otherwise favor important candidates not
5905 based on any memory object. */
5908 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5909 struct iv_use
*use
, bool originalp
)
5911 comp_cost best_cost
, act_cost
;
5914 struct iv_cand
*cand
;
5915 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5916 struct cost_pair
*cp
;
5918 iv_ca_add_use (data
, ivs
, use
, false);
5919 best_cost
= iv_ca_cost (ivs
);
5921 cp
= iv_ca_cand_for_use (ivs
, use
);
5926 iv_ca_add_use (data
, ivs
, use
, true);
5927 best_cost
= iv_ca_cost (ivs
);
5928 cp
= iv_ca_cand_for_use (ivs
, use
);
5932 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5933 iv_ca_set_no_cp (data
, ivs
, use
);
5936 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5937 first try important candidates not based on any memory object. Only if
5938 this fails, try the specific ones. Rationale -- in loops with many
5939 variables the best choice often is to use just one generic biv. If we
5940 added here many ivs specific to the uses, the optimization algorithm later
5941 would be likely to get stuck in a local minimum, thus causing us to create
5942 too many ivs. The approach from few ivs to more seems more likely to be
5943 successful -- starting from few ivs, replacing an expensive use by a
5944 specific iv should always be a win. */
5945 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5947 cand
= iv_cand (data
, i
);
5949 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5952 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5955 if (iv_ca_cand_used_p (ivs
, cand
))
5958 cp
= get_use_iv_cost (data
, use
, cand
);
5962 iv_ca_set_cp (data
, ivs
, use
, cp
);
5963 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5965 iv_ca_set_no_cp (data
, ivs
, use
);
5966 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5968 if (compare_costs (act_cost
, best_cost
) < 0)
5970 best_cost
= act_cost
;
5972 iv_ca_delta_free (&best_delta
);
5973 best_delta
= act_delta
;
5976 iv_ca_delta_free (&act_delta
);
5979 if (infinite_cost_p (best_cost
))
5981 for (i
= 0; i
< use
->n_map_members
; i
++)
5983 cp
= use
->cost_map
+ i
;
5988 /* Already tried this. */
5989 if (cand
->important
)
5991 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5993 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5997 if (iv_ca_cand_used_p (ivs
, cand
))
6001 iv_ca_set_cp (data
, ivs
, use
, cp
);
6002 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6003 iv_ca_set_no_cp (data
, ivs
, use
);
6004 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6007 if (compare_costs (act_cost
, best_cost
) < 0)
6009 best_cost
= act_cost
;
6012 iv_ca_delta_free (&best_delta
);
6013 best_delta
= act_delta
;
6016 iv_ca_delta_free (&act_delta
);
6020 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6021 iv_ca_delta_free (&best_delta
);
6023 return !infinite_cost_p (best_cost
);
6026 /* Finds an initial assignment of candidates to uses. */
6028 static struct iv_ca
*
6029 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6031 struct iv_ca
*ivs
= iv_ca_new (data
);
6034 for (i
= 0; i
< n_iv_uses (data
); i
++)
6035 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6044 /* Tries to improve set of induction variables IVS. */
6047 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6050 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6051 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6052 struct iv_cand
*cand
;
6054 /* Try extending the set of induction variables by one. */
6055 for (i
= 0; i
< n_iv_cands (data
); i
++)
6057 cand
= iv_cand (data
, i
);
6059 if (iv_ca_cand_used_p (ivs
, cand
))
6062 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6066 /* If we successfully added the candidate and the set is small enough,
6067 try optimizing it by removing other candidates. */
6068 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6070 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6071 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6072 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6073 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6076 if (compare_costs (acost
, best_cost
) < 0)
6079 iv_ca_delta_free (&best_delta
);
6080 best_delta
= act_delta
;
6083 iv_ca_delta_free (&act_delta
);
6088 /* Try removing the candidates from the set instead. */
6089 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6091 /* Nothing more we can do. */
6096 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6097 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6098 iv_ca_delta_free (&best_delta
);
6102 /* Attempts to find the optimal set of induction variables. We do simple
6103 greedy heuristic -- we try to replace at most one candidate in the selected
6104 solution and remove the unused ivs while this improves the cost. */
6106 static struct iv_ca
*
6107 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6111 /* Get the initial solution. */
6112 set
= get_initial_solution (data
, originalp
);
6115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6116 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6122 fprintf (dump_file
, "Initial set of candidates:\n");
6123 iv_ca_dump (data
, dump_file
, set
);
6126 while (try_improve_iv_set (data
, set
))
6128 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6130 fprintf (dump_file
, "Improved to:\n");
6131 iv_ca_dump (data
, dump_file
, set
);
6138 static struct iv_ca
*
6139 find_optimal_iv_set (struct ivopts_data
*data
)
6142 struct iv_ca
*set
, *origset
;
6144 comp_cost cost
, origcost
;
6146 /* Determine the cost based on a strategy that starts with original IVs,
6147 and try again using a strategy that prefers candidates not based
6149 origset
= find_optimal_iv_set_1 (data
, true);
6150 set
= find_optimal_iv_set_1 (data
, false);
6152 if (!origset
&& !set
)
6155 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6156 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6160 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6161 origcost
.cost
, origcost
.complexity
);
6162 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6163 cost
.cost
, cost
.complexity
);
6166 /* Choose the one with the best cost. */
6167 if (compare_costs (origcost
, cost
) <= 0)
6174 iv_ca_free (&origset
);
6176 for (i
= 0; i
< n_iv_uses (data
); i
++)
6178 use
= iv_use (data
, i
);
6179 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6185 /* Creates a new induction variable corresponding to CAND. */
6188 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6190 gimple_stmt_iterator incr_pos
;
6200 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6204 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6212 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6216 /* Mark that the iv is preserved. */
6217 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6218 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6220 /* Rewrite the increment so that it uses var_before directly. */
6221 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6225 gimple_add_tmp_var (cand
->var_before
);
6227 base
= unshare_expr (cand
->iv
->base
);
6229 create_iv (base
, unshare_expr (cand
->iv
->step
),
6230 cand
->var_before
, data
->current_loop
,
6231 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6234 /* Creates new induction variables described in SET. */
6237 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6240 struct iv_cand
*cand
;
6243 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6245 cand
= iv_cand (data
, i
);
6246 create_new_iv (data
, cand
);
6249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6251 fprintf (dump_file
, "\nSelected IV set: \n");
6252 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6254 cand
= iv_cand (data
, i
);
6255 dump_cand (dump_file
, cand
);
6257 fprintf (dump_file
, "\n");
6261 /* Rewrites USE (definition of iv used in a nonlinear expression)
6262 using candidate CAND. */
6265 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6266 struct iv_use
*use
, struct iv_cand
*cand
)
6271 gimple_stmt_iterator bsi
;
6273 /* An important special case -- if we are asked to express value of
6274 the original iv by itself, just exit; there is no need to
6275 introduce a new computation (that might also need casting the
6276 variable to unsigned and back). */
6277 if (cand
->pos
== IP_ORIGINAL
6278 && cand
->incremented_at
== use
->stmt
)
6280 enum tree_code stmt_code
;
6282 gcc_assert (is_gimple_assign (use
->stmt
));
6283 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6285 /* Check whether we may leave the computation unchanged.
6286 This is the case only if it does not rely on other
6287 computations in the loop -- otherwise, the computation
6288 we rely upon may be removed in remove_unused_ivs,
6289 thus leading to ICE. */
6290 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6291 if (stmt_code
== PLUS_EXPR
6292 || stmt_code
== MINUS_EXPR
6293 || stmt_code
== POINTER_PLUS_EXPR
)
6295 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6296 op
= gimple_assign_rhs2 (use
->stmt
);
6297 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6298 op
= gimple_assign_rhs1 (use
->stmt
);
6305 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6309 comp
= get_computation (data
->current_loop
, use
, cand
);
6310 gcc_assert (comp
!= NULL_TREE
);
6312 switch (gimple_code (use
->stmt
))
6315 tgt
= PHI_RESULT (use
->stmt
);
6317 /* If we should keep the biv, do not replace it. */
6318 if (name_info (data
, tgt
)->preserve_biv
)
6321 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6325 tgt
= gimple_assign_lhs (use
->stmt
);
6326 bsi
= gsi_for_stmt (use
->stmt
);
6333 if (!valid_gimple_rhs_p (comp
)
6334 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6335 /* We can't allow re-allocating the stmt as it might be pointed
6337 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6338 >= gimple_num_ops (gsi_stmt (bsi
)))))
6340 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6341 true, GSI_SAME_STMT
);
6342 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6344 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6345 /* As this isn't a plain copy we have to reset alignment
6347 if (SSA_NAME_PTR_INFO (comp
))
6348 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6352 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6354 ass
= gimple_build_assign (tgt
, comp
);
6355 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6357 bsi
= gsi_for_stmt (use
->stmt
);
6358 remove_phi_node (&bsi
, false);
6362 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6363 use
->stmt
= gsi_stmt (bsi
);
6367 /* Performs a peephole optimization to reorder the iv update statement with
6368 a mem ref to enable instruction combining in later phases. The mem ref uses
6369 the iv value before the update, so the reordering transformation requires
6370 adjustment of the offset. CAND is the selected IV_CAND.
6374 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6382 directly propagating t over to (1) will introduce overlapping live range
6383 thus increase register pressure. This peephole transform it into:
6387 t = MEM_REF (base, iv2, 8, 8);
6394 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6397 gimple iv_update
, stmt
;
6399 gimple_stmt_iterator gsi
, gsi_iv
;
6401 if (cand
->pos
!= IP_NORMAL
)
6404 var_after
= cand
->var_after
;
6405 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6407 bb
= gimple_bb (iv_update
);
6408 gsi
= gsi_last_nondebug_bb (bb
);
6409 stmt
= gsi_stmt (gsi
);
6411 /* Only handle conditional statement for now. */
6412 if (gimple_code (stmt
) != GIMPLE_COND
)
6415 gsi_prev_nondebug (&gsi
);
6416 stmt
= gsi_stmt (gsi
);
6417 if (stmt
!= iv_update
)
6420 gsi_prev_nondebug (&gsi
);
6421 if (gsi_end_p (gsi
))
6424 stmt
= gsi_stmt (gsi
);
6425 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6428 if (stmt
!= use
->stmt
)
6431 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6434 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6436 fprintf (dump_file
, "Reordering \n");
6437 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6438 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6439 fprintf (dump_file
, "\n");
6442 gsi
= gsi_for_stmt (use
->stmt
);
6443 gsi_iv
= gsi_for_stmt (iv_update
);
6444 gsi_move_before (&gsi_iv
, &gsi
);
6446 cand
->pos
= IP_BEFORE_USE
;
6447 cand
->incremented_at
= use
->stmt
;
6450 /* Rewrites USE (address that is an iv) using candidate CAND. */
6453 rewrite_use_address (struct ivopts_data
*data
,
6454 struct iv_use
*use
, struct iv_cand
*cand
)
6457 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6458 tree base_hint
= NULL_TREE
;
6462 adjust_iv_update_pos (cand
, use
);
6463 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6465 unshare_aff_combination (&aff
);
6467 /* To avoid undefined overflow problems, all IV candidates use unsigned
6468 integer types. The drawback is that this makes it impossible for
6469 create_mem_ref to distinguish an IV that is based on a memory object
6470 from one that represents simply an offset.
6472 To work around this problem, we pass a hint to create_mem_ref that
6473 indicates which variable (if any) in aff is an IV based on a memory
6474 object. Note that we only consider the candidate. If this is not
6475 based on an object, the base of the reference is in some subexpression
6476 of the use -- but these will use pointer types, so they are recognized
6477 by the create_mem_ref heuristics anyway. */
6478 if (cand
->iv
->base_object
)
6479 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6481 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6482 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6483 reference_alias_ptr_type (*use
->op_p
),
6484 iv
, base_hint
, data
->speed
);
6485 copy_ref_info (ref
, *use
->op_p
);
6489 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6493 rewrite_use_compare (struct ivopts_data
*data
,
6494 struct iv_use
*use
, struct iv_cand
*cand
)
6496 tree comp
, *var_p
, op
, bound
;
6497 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6498 enum tree_code compare
;
6499 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6505 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6506 tree var_type
= TREE_TYPE (var
);
6509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6511 fprintf (dump_file
, "Replacing exit test: ");
6512 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6515 bound
= unshare_expr (fold_convert (var_type
, bound
));
6516 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6518 gsi_insert_seq_on_edge_immediate (
6519 loop_preheader_edge (data
->current_loop
),
6522 gimple_cond_set_lhs (use
->stmt
, var
);
6523 gimple_cond_set_code (use
->stmt
, compare
);
6524 gimple_cond_set_rhs (use
->stmt
, op
);
6528 /* The induction variable elimination failed; just express the original
6530 comp
= get_computation (data
->current_loop
, use
, cand
);
6531 gcc_assert (comp
!= NULL_TREE
);
6533 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6536 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6537 true, GSI_SAME_STMT
);
6540 /* Rewrites USE using candidate CAND. */
6543 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6547 case USE_NONLINEAR_EXPR
:
6548 rewrite_use_nonlinear_expr (data
, use
, cand
);
6552 rewrite_use_address (data
, use
, cand
);
6556 rewrite_use_compare (data
, use
, cand
);
6563 update_stmt (use
->stmt
);
6566 /* Rewrite the uses using the selected induction variables. */
6569 rewrite_uses (struct ivopts_data
*data
)
6572 struct iv_cand
*cand
;
6575 for (i
= 0; i
< n_iv_uses (data
); i
++)
6577 use
= iv_use (data
, i
);
6578 cand
= use
->selected
;
6581 rewrite_use (data
, use
, cand
);
6585 /* Removes the ivs that are not used after rewriting. */
6588 remove_unused_ivs (struct ivopts_data
*data
)
6592 bitmap toremove
= BITMAP_ALLOC (NULL
);
6594 /* Figure out an order in which to release SSA DEFs so that we don't
6595 release something that we'd have to propagate into a debug stmt
6597 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6599 struct version_info
*info
;
6601 info
= ver_info (data
, j
);
6603 && !integer_zerop (info
->iv
->step
)
6605 && !info
->iv
->have_use_for
6606 && !info
->preserve_biv
)
6608 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6610 tree def
= info
->iv
->ssa_name
;
6612 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6614 imm_use_iterator imm_iter
;
6615 use_operand_p use_p
;
6619 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6621 if (!gimple_debug_bind_p (stmt
))
6624 /* We just want to determine whether to do nothing
6625 (count == 0), to substitute the computed
6626 expression into a single use of the SSA DEF by
6627 itself (count == 1), or to use a debug temp
6628 because the SSA DEF is used multiple times or as
6629 part of a larger expression (count > 1). */
6631 if (gimple_debug_bind_get_value (stmt
) != def
)
6635 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6641 struct iv_use dummy_use
;
6642 struct iv_cand
*best_cand
= NULL
, *cand
;
6643 unsigned i
, best_pref
= 0, cand_pref
;
6645 memset (&dummy_use
, 0, sizeof (dummy_use
));
6646 dummy_use
.iv
= info
->iv
;
6647 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6649 cand
= iv_use (data
, i
)->selected
;
6650 if (cand
== best_cand
)
6652 cand_pref
= operand_equal_p (cand
->iv
->step
,
6656 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6657 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6660 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6662 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6665 best_pref
= cand_pref
;
6672 tree comp
= get_computation_at (data
->current_loop
,
6673 &dummy_use
, best_cand
,
6674 SSA_NAME_DEF_STMT (def
));
6680 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6681 DECL_ARTIFICIAL (vexpr
) = 1;
6682 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6683 if (SSA_NAME_VAR (def
))
6684 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6686 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6687 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6688 gimple_stmt_iterator gsi
;
6690 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6691 gsi
= gsi_after_labels (gimple_bb
6692 (SSA_NAME_DEF_STMT (def
)));
6694 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6696 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6700 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6702 if (!gimple_debug_bind_p (stmt
))
6705 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6706 SET_USE (use_p
, comp
);
6714 release_defs_bitset (toremove
);
6716 BITMAP_FREE (toremove
);
6719 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6720 for hash_map::traverse. */
6723 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6729 /* Frees data allocated by the optimization of a single loop. */
6732 free_loop_data (struct ivopts_data
*data
)
6740 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6741 delete data
->niters
;
6742 data
->niters
= NULL
;
6745 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6747 struct version_info
*info
;
6749 info
= ver_info (data
, i
);
6752 info
->has_nonlin_use
= false;
6753 info
->preserve_biv
= false;
6756 bitmap_clear (data
->relevant
);
6757 bitmap_clear (data
->important_candidates
);
6759 for (i
= 0; i
< n_iv_uses (data
); i
++)
6761 struct iv_use
*use
= iv_use (data
, i
);
6764 BITMAP_FREE (use
->related_cands
);
6765 for (j
= 0; j
< use
->n_map_members
; j
++)
6766 if (use
->cost_map
[j
].depends_on
)
6767 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6768 free (use
->cost_map
);
6771 data
->iv_uses
.truncate (0);
6773 for (i
= 0; i
< n_iv_cands (data
); i
++)
6775 struct iv_cand
*cand
= iv_cand (data
, i
);
6778 if (cand
->depends_on
)
6779 BITMAP_FREE (cand
->depends_on
);
6782 data
->iv_candidates
.truncate (0);
6784 if (data
->version_info_size
< num_ssa_names
)
6786 data
->version_info_size
= 2 * num_ssa_names
;
6787 free (data
->version_info
);
6788 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6791 data
->max_inv_id
= 0;
6793 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6794 SET_DECL_RTL (obj
, NULL_RTX
);
6796 decl_rtl_to_reset
.truncate (0);
6798 data
->inv_expr_tab
->empty ();
6799 data
->inv_expr_id
= 0;
6802 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6806 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6808 free_loop_data (data
);
6809 free (data
->version_info
);
6810 BITMAP_FREE (data
->relevant
);
6811 BITMAP_FREE (data
->important_candidates
);
6813 decl_rtl_to_reset
.release ();
6814 data
->iv_uses
.release ();
6815 data
->iv_candidates
.release ();
6816 delete data
->inv_expr_tab
;
6817 data
->inv_expr_tab
= NULL
;
6820 /* Returns true if the loop body BODY includes any function calls. */
6823 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6825 gimple_stmt_iterator gsi
;
6828 for (i
= 0; i
< num_nodes
; i
++)
6829 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6831 gimple stmt
= gsi_stmt (gsi
);
6832 if (is_gimple_call (stmt
)
6833 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6839 /* Optimizes the LOOP. Returns true if anything changed. */
6842 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6844 bool changed
= false;
6845 struct iv_ca
*iv_ca
;
6846 edge exit
= single_dom_exit (loop
);
6849 gcc_assert (!data
->niters
);
6850 data
->current_loop
= loop
;
6851 data
->speed
= optimize_loop_for_speed_p (loop
);
6853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6855 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6859 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6860 exit
->src
->index
, exit
->dest
->index
);
6861 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6862 fprintf (dump_file
, "\n");
6865 fprintf (dump_file
, "\n");
6868 body
= get_loop_body (loop
);
6869 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6870 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6873 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6875 /* For each ssa name determines whether it behaves as an induction variable
6877 if (!find_induction_variables (data
))
6880 /* Finds interesting uses (item 1). */
6881 find_interesting_uses (data
);
6882 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6885 /* Finds candidates for the induction variables (item 2). */
6886 find_iv_candidates (data
);
6888 /* Calculates the costs (item 3, part 1). */
6889 determine_iv_costs (data
);
6890 determine_use_iv_costs (data
);
6891 determine_set_costs (data
);
6893 /* Find the optimal set of induction variables (item 3, part 2). */
6894 iv_ca
= find_optimal_iv_set (data
);
6899 /* Create the new induction variables (item 4, part 1). */
6900 create_new_ivs (data
, iv_ca
);
6901 iv_ca_free (&iv_ca
);
6903 /* Rewrite the uses (item 4, part 2). */
6904 rewrite_uses (data
);
6906 /* Remove the ivs that are unused after rewriting. */
6907 remove_unused_ivs (data
);
6909 /* We have changed the structure of induction variables; it might happen
6910 that definitions in the scev database refer to some of them that were
6915 free_loop_data (data
);
6920 /* Main entry point. Optimizes induction variables in loops. */
6923 tree_ssa_iv_optimize (void)
6926 struct ivopts_data data
;
6928 tree_ssa_iv_optimize_init (&data
);
6930 /* Optimize the loops starting with the innermost ones. */
6931 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
6933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6934 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6936 tree_ssa_iv_optimize_loop (&data
, loop
);
6939 tree_ssa_iv_optimize_finalize (&data
);