1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
78 #include "gimple-expr.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 bool biv_p
; /* Is it a biv? */
147 bool have_use_for
; /* Do we already have a use for it? */
148 unsigned use_id
; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
154 tree name
; /* The ssa name. */
155 struct iv
*iv
; /* Induction variable description. */
156 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv
; /* For the original biv, whether to preserve it. */
159 unsigned inv_id
; /* Id of an invariant. */
165 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
166 USE_ADDRESS
, /* Use in an address. */
167 USE_COMPARE
/* Use is a compare. */
170 /* Cost of a computation. */
173 int cost
; /* The runtime cost. */
174 unsigned complexity
; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
180 static const comp_cost no_cost
= {0, 0};
181 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
183 /* The candidate - cost pair. */
186 struct iv_cand
*cand
; /* The candidate. */
187 comp_cost cost
; /* The cost. */
188 bitmap depends_on
; /* The list of invariants that have to be
190 tree value
; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp
; /* For iv elimination, the comparison. */
194 int inv_expr_id
; /* Loop invariant expression id. */
200 unsigned id
; /* The id of the use. */
201 enum use_type type
; /* Type of the use. */
202 struct iv
*iv
; /* The induction variable it is based on. */
203 gimple stmt
; /* Statement in that it occurs. */
204 tree
*op_p
; /* The place where it occurs. */
205 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
208 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
209 struct cost_pair
*cost_map
;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand
*selected
;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
219 IP_NORMAL
, /* At the end, just before the exit condition. */
220 IP_END
, /* At the end of the latch block. */
221 IP_BEFORE_USE
, /* Immediately before a specific use. */
222 IP_AFTER_USE
, /* Immediately after a specific use. */
223 IP_ORIGINAL
/* The original biv. */
226 /* The induction variable candidate. */
229 unsigned id
; /* The number of the candidate. */
230 bool important
; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
233 gimple incremented_at
;/* For original biv, the statement where it is
235 tree var_before
; /* The variable used for it before increment. */
236 tree var_after
; /* The variable used for it after increment. */
237 struct iv
*iv
; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost
; /* Cost of the candidate. */
242 unsigned cost_step
; /* Cost of the candidate's increment operation. */
243 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on
; /* The list of invariants that are used in step of the
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use
*iv_use_p
;
261 typedef struct iv_cand
*iv_cand_p
;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
267 typedef iv_inv_expr_ent value_type
;
268 typedef iv_inv_expr_ent compare_type
;
269 static inline hashval_t
hash (const value_type
*);
270 static inline bool equal (const value_type
*, const compare_type
*);
273 /* Hash function for loop invariant expressions. */
276 iv_inv_expr_hasher::hash (const value_type
*expr
)
281 /* Hash table equality function for expressions. */
284 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
286 return expr1
->hash
== expr2
->hash
287 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
292 /* The currently optimized loop. */
293 struct loop
*current_loop
;
295 /* Numbers of iterations for all exits of the current loop. */
296 struct pointer_map_t
*niters
;
298 /* Number of registers used in it. */
301 /* The size of version_info array allocated. */
302 unsigned version_info_size
;
304 /* The array of information for the ssa names. */
305 struct version_info
*version_info
;
307 /* The hashtable of loop invariant expressions created
309 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
311 /* Loop invariant expression id. */
314 /* The bitmap of indices in version_info whose value was changed. */
317 /* The uses of induction variables. */
318 vec
<iv_use_p
> iv_uses
;
320 /* The candidates. */
321 vec
<iv_cand_p
> iv_candidates
;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates
;
326 /* The maximum invariant id. */
329 /* Whether to consider just related and important candidates when replacing a
331 bool consider_all_candidates
;
333 /* Are we optimizing for speed? */
336 /* Whether the loop body includes any function calls. */
337 bool body_includes_call
;
339 /* Whether the loop body can only be exited via single exit. */
340 bool loop_single_exit_p
;
343 /* An assignment of iv candidates to uses. */
347 /* The number of uses covered by the assignment. */
350 /* Number of uses that cannot be expressed by the candidates in the set. */
353 /* Candidate assigned to a use, together with the related costs. */
354 struct cost_pair
**cand_for_use
;
356 /* Number of times each candidate is used. */
357 unsigned *n_cand_uses
;
359 /* The candidates used. */
362 /* The number of candidates in the set. */
365 /* Total number of registers needed. */
368 /* Total cost of expressing uses. */
369 comp_cost cand_use_cost
;
371 /* Total cost of candidates. */
374 /* Number of times each invariant is used. */
375 unsigned *n_invariant_uses
;
377 /* The array holding the number of uses of each loop
378 invariant expressions created by ivopt. */
379 unsigned *used_inv_expr
;
381 /* The number of created loop invariants. */
382 unsigned num_used_inv_expr
;
384 /* Total cost of the assignment. */
388 /* Difference of two iv candidate assignments. */
395 /* An old assignment (for rollback purposes). */
396 struct cost_pair
*old_cp
;
398 /* A new assignment. */
399 struct cost_pair
*new_cp
;
401 /* Next change in the list. */
402 struct iv_ca_delta
*next_change
;
405 /* Bound on number of candidates below that all candidates are considered. */
407 #define CONSIDER_ALL_CANDIDATES_BOUND \
408 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
411 optimizing such a loop would help, and it would take ages). */
413 #define MAX_CONSIDERED_USES \
414 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
416 /* If there are at most this number of ivs in the set, try removing unnecessary
417 ivs from the set always. */
419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
422 /* The list of trees for that the decl_rtl field must be reset is stored
425 static vec
<tree
> decl_rtl_to_reset
;
427 static comp_cost
force_expr_to_var_cost (tree
, bool);
429 /* Number of uses recorded in DATA. */
431 static inline unsigned
432 n_iv_uses (struct ivopts_data
*data
)
434 return data
->iv_uses
.length ();
437 /* Ith use recorded in DATA. */
439 static inline struct iv_use
*
440 iv_use (struct ivopts_data
*data
, unsigned i
)
442 return data
->iv_uses
[i
];
445 /* Number of candidates recorded in DATA. */
447 static inline unsigned
448 n_iv_cands (struct ivopts_data
*data
)
450 return data
->iv_candidates
.length ();
453 /* Ith candidate recorded in DATA. */
455 static inline struct iv_cand
*
456 iv_cand (struct ivopts_data
*data
, unsigned i
)
458 return data
->iv_candidates
[i
];
461 /* The single loop exit if it dominates the latch, NULL otherwise. */
464 single_dom_exit (struct loop
*loop
)
466 edge exit
= single_exit (loop
);
471 if (!just_once_each_iteration_p (loop
, exit
->src
))
477 /* Dumps information about the induction variable IV to FILE. */
480 dump_iv (FILE *file
, struct iv
*iv
)
484 fprintf (file
, "ssa name ");
485 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
486 fprintf (file
, "\n");
489 fprintf (file
, " type ");
490 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
491 fprintf (file
, "\n");
495 fprintf (file
, " base ");
496 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
497 fprintf (file
, "\n");
499 fprintf (file
, " step ");
500 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
501 fprintf (file
, "\n");
505 fprintf (file
, " invariant ");
506 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
507 fprintf (file
, "\n");
512 fprintf (file
, " base object ");
513 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
514 fprintf (file
, "\n");
518 fprintf (file
, " is a biv\n");
521 /* Dumps information about the USE to FILE. */
524 dump_use (FILE *file
, struct iv_use
*use
)
526 fprintf (file
, "use %d\n", use
->id
);
530 case USE_NONLINEAR_EXPR
:
531 fprintf (file
, " generic\n");
535 fprintf (file
, " address\n");
539 fprintf (file
, " compare\n");
546 fprintf (file
, " in statement ");
547 print_gimple_stmt (file
, use
->stmt
, 0, 0);
548 fprintf (file
, "\n");
550 fprintf (file
, " at position ");
552 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
553 fprintf (file
, "\n");
555 dump_iv (file
, use
->iv
);
557 if (use
->related_cands
)
559 fprintf (file
, " related candidates ");
560 dump_bitmap (file
, use
->related_cands
);
564 /* Dumps information about the uses to FILE. */
567 dump_uses (FILE *file
, struct ivopts_data
*data
)
572 for (i
= 0; i
< n_iv_uses (data
); i
++)
574 use
= iv_use (data
, i
);
576 dump_use (file
, use
);
577 fprintf (file
, "\n");
581 /* Dumps information about induction variable candidate CAND to FILE. */
584 dump_cand (FILE *file
, struct iv_cand
*cand
)
586 struct iv
*iv
= cand
->iv
;
588 fprintf (file
, "candidate %d%s\n",
589 cand
->id
, cand
->important
? " (important)" : "");
591 if (cand
->depends_on
)
593 fprintf (file
, " depends on ");
594 dump_bitmap (file
, cand
->depends_on
);
599 fprintf (file
, " final value replacement\n");
603 if (cand
->var_before
)
605 fprintf (file
, " var_before ");
606 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
607 fprintf (file
, "\n");
611 fprintf (file
, " var_after ");
612 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
613 fprintf (file
, "\n");
619 fprintf (file
, " incremented before exit test\n");
623 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
627 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
631 fprintf (file
, " incremented at end\n");
635 fprintf (file
, " original biv\n");
642 /* Returns the info for ssa version VER. */
644 static inline struct version_info
*
645 ver_info (struct ivopts_data
*data
, unsigned ver
)
647 return data
->version_info
+ ver
;
650 /* Returns the info for ssa name NAME. */
652 static inline struct version_info
*
653 name_info (struct ivopts_data
*data
, tree name
)
655 return ver_info (data
, SSA_NAME_VERSION (name
));
658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
664 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
668 if (sbb
== loop
->latch
)
674 return stmt
== last_stmt (bb
);
677 /* Returns true if STMT if after the place where the original induction
678 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
679 if the positions are identical. */
682 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
684 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
685 basic_block stmt_bb
= gimple_bb (stmt
);
687 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
690 if (stmt_bb
!= cand_bb
)
694 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
696 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
699 /* Returns true if STMT if after the place where the induction variable
700 CAND is incremented in LOOP. */
703 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
711 return stmt_after_ip_normal_pos (loop
, stmt
);
715 return stmt_after_inc_pos (cand
, stmt
, false);
718 return stmt_after_inc_pos (cand
, stmt
, true);
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
728 abnormal_ssa_name_p (tree exp
)
733 if (TREE_CODE (exp
) != SSA_NAME
)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
743 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
744 void *data ATTRIBUTE_UNUSED
)
746 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
750 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
754 return !abnormal_ssa_name_p (*index
);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
761 contains_abnormal_ssa_name_p (tree expr
)
764 enum tree_code_class codeclass
;
769 code
= TREE_CODE (expr
);
770 codeclass
= TREE_CODE_CLASS (code
);
772 if (code
== SSA_NAME
)
773 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
775 if (code
== INTEGER_CST
776 || is_gimple_min_invariant (expr
))
779 if (code
== ADDR_EXPR
)
780 return !for_each_index (&TREE_OPERAND (expr
, 0),
781 idx_contains_abnormal_ssa_name_p
,
784 if (code
== COND_EXPR
)
785 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
787 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
798 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
810 /* Returns the structure describing number of iterations determined from
811 EXIT of DATA->current_loop, or NULL if something goes wrong. */
813 static struct tree_niter_desc
*
814 niter_for_exit (struct ivopts_data
*data
, edge exit
)
816 struct tree_niter_desc
*desc
;
821 data
->niters
= pointer_map_create ();
825 slot
= pointer_map_contains (data
->niters
, exit
);
829 /* Try to determine number of iterations. We cannot safely work with ssa
830 names that appear in phi nodes on abnormal edges, so that we do not
831 create overlapping life ranges for them (PR 27283). */
832 desc
= XNEW (struct tree_niter_desc
);
833 if (!number_of_iterations_exit (data
->current_loop
,
835 || contains_abnormal_ssa_name_p (desc
->niter
))
840 slot
= pointer_map_insert (data
->niters
, exit
);
844 desc
= (struct tree_niter_desc
*) *slot
;
849 /* Returns the structure describing number of iterations determined from
850 single dominating exit of DATA->current_loop, or NULL if something
853 static struct tree_niter_desc
*
854 niter_for_single_dom_exit (struct ivopts_data
*data
)
856 edge exit
= single_dom_exit (data
->current_loop
);
861 return niter_for_exit (data
, exit
);
864 /* Initializes data structures used by the iv optimization pass, stored
868 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
870 data
->version_info_size
= 2 * num_ssa_names
;
871 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
872 data
->relevant
= BITMAP_ALLOC (NULL
);
873 data
->important_candidates
= BITMAP_ALLOC (NULL
);
874 data
->max_inv_id
= 0;
876 data
->iv_uses
.create (20);
877 data
->iv_candidates
.create (20);
878 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
879 data
->inv_expr_id
= 0;
880 decl_rtl_to_reset
.create (20);
883 /* Returns a memory object to that EXPR points. In case we are able to
884 determine that it does not point to any such object, NULL is returned. */
887 determine_base_object (tree expr
)
889 enum tree_code code
= TREE_CODE (expr
);
892 /* If this is a pointer casted to any type, we need to determine
893 the base object for the pointer; so handle conversions before
894 throwing away non-pointer expressions. */
895 if (CONVERT_EXPR_P (expr
))
896 return determine_base_object (TREE_OPERAND (expr
, 0));
898 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
907 obj
= TREE_OPERAND (expr
, 0);
908 base
= get_base_address (obj
);
913 if (TREE_CODE (base
) == MEM_REF
)
914 return determine_base_object (TREE_OPERAND (base
, 0));
916 return fold_convert (ptr_type_node
,
917 build_fold_addr_expr (base
));
919 case POINTER_PLUS_EXPR
:
920 return determine_base_object (TREE_OPERAND (expr
, 0));
924 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
928 return fold_convert (ptr_type_node
, expr
);
932 /* Return true if address expression with non-DECL_P operand appears
936 contain_complex_addr_expr (tree expr
)
941 switch (TREE_CODE (expr
))
943 case POINTER_PLUS_EXPR
:
946 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
947 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
951 return (!DECL_P (TREE_OPERAND (expr
, 0)));
960 /* Allocates an induction variable with given initial value BASE and step STEP
964 alloc_iv (tree base
, tree step
)
967 struct iv
*iv
= XCNEW (struct iv
);
968 gcc_assert (step
!= NULL_TREE
);
970 /* Lower address expression in base except ones with DECL_P as operand.
972 1) More accurate cost can be computed for address expressions;
973 2) Duplicate candidates won't be created for bases in different
974 forms, like &a[0] and &a. */
976 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
977 || contain_complex_addr_expr (expr
))
980 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
981 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
985 iv
->base_object
= determine_base_object (base
);
988 iv
->have_use_for
= false;
990 iv
->ssa_name
= NULL_TREE
;
995 /* Sets STEP and BASE for induction variable IV. */
998 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1000 struct version_info
*info
= name_info (data
, iv
);
1002 gcc_assert (!info
->iv
);
1004 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1005 info
->iv
= alloc_iv (base
, step
);
1006 info
->iv
->ssa_name
= iv
;
1009 /* Finds induction variable declaration for VAR. */
1012 get_iv (struct ivopts_data
*data
, tree var
)
1015 tree type
= TREE_TYPE (var
);
1017 if (!POINTER_TYPE_P (type
)
1018 && !INTEGRAL_TYPE_P (type
))
1021 if (!name_info (data
, var
)->iv
)
1023 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1026 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1027 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1030 return name_info (data
, var
)->iv
;
1033 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1034 not define a simple affine biv with nonzero step. */
1037 determine_biv_step (gimple phi
)
1039 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1040 tree name
= PHI_RESULT (phi
);
1043 if (virtual_operand_p (name
))
1046 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1049 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1052 /* Finds basic ivs. */
1055 find_bivs (struct ivopts_data
*data
)
1058 tree step
, type
, base
;
1060 struct loop
*loop
= data
->current_loop
;
1061 gimple_stmt_iterator psi
;
1063 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1065 phi
= gsi_stmt (psi
);
1067 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1070 step
= determine_biv_step (phi
);
1074 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1075 base
= expand_simple_operations (base
);
1076 if (contains_abnormal_ssa_name_p (base
)
1077 || contains_abnormal_ssa_name_p (step
))
1080 type
= TREE_TYPE (PHI_RESULT (phi
));
1081 base
= fold_convert (type
, base
);
1084 if (POINTER_TYPE_P (type
))
1085 step
= convert_to_ptrofftype (step
);
1087 step
= fold_convert (type
, step
);
1090 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1097 /* Marks basic ivs. */
1100 mark_bivs (struct ivopts_data
*data
)
1104 struct iv
*iv
, *incr_iv
;
1105 struct loop
*loop
= data
->current_loop
;
1106 basic_block incr_bb
;
1107 gimple_stmt_iterator psi
;
1109 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1111 phi
= gsi_stmt (psi
);
1113 iv
= get_iv (data
, PHI_RESULT (phi
));
1117 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1118 def
= SSA_NAME_DEF_STMT (var
);
1119 /* Don't mark iv peeled from other one as biv. */
1121 && gimple_code (def
) == GIMPLE_PHI
1122 && gimple_bb (def
) == loop
->header
)
1125 incr_iv
= get_iv (data
, var
);
1129 /* If the increment is in the subloop, ignore it. */
1130 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1131 if (incr_bb
->loop_father
!= data
->current_loop
1132 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1136 incr_iv
->biv_p
= true;
1140 /* Checks whether STMT defines a linear induction variable and stores its
1141 parameters to IV. */
1144 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1147 struct loop
*loop
= data
->current_loop
;
1149 iv
->base
= NULL_TREE
;
1150 iv
->step
= NULL_TREE
;
1152 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1155 lhs
= gimple_assign_lhs (stmt
);
1156 if (TREE_CODE (lhs
) != SSA_NAME
)
1159 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1161 iv
->base
= expand_simple_operations (iv
->base
);
1163 if (contains_abnormal_ssa_name_p (iv
->base
)
1164 || contains_abnormal_ssa_name_p (iv
->step
))
1167 /* If STMT could throw, then do not consider STMT as defining a GIV.
1168 While this will suppress optimizations, we can not safely delete this
1169 GIV and associated statements, even if it appears it is not used. */
1170 if (stmt_could_throw_p (stmt
))
1176 /* Finds general ivs in statement STMT. */
1179 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1183 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1186 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1189 /* Finds general ivs in basic block BB. */
1192 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1194 gimple_stmt_iterator bsi
;
1196 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1197 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1200 /* Finds general ivs. */
1203 find_givs (struct ivopts_data
*data
)
1205 struct loop
*loop
= data
->current_loop
;
1206 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1209 for (i
= 0; i
< loop
->num_nodes
; i
++)
1210 find_givs_in_bb (data
, body
[i
]);
1214 /* For each ssa name defined in LOOP determines whether it is an induction
1215 variable and if so, its initial value and step. */
1218 find_induction_variables (struct ivopts_data
*data
)
1223 if (!find_bivs (data
))
1229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1231 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1235 fprintf (dump_file
, " number of iterations ");
1236 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1237 if (!integer_zerop (niter
->may_be_zero
))
1239 fprintf (dump_file
, "; zero if ");
1240 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1242 fprintf (dump_file
, "\n\n");
1245 fprintf (dump_file
, "Induction variables:\n\n");
1247 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1249 if (ver_info (data
, i
)->iv
)
1250 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1257 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1259 static struct iv_use
*
1260 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1261 gimple stmt
, enum use_type use_type
)
1263 struct iv_use
*use
= XCNEW (struct iv_use
);
1265 use
->id
= n_iv_uses (data
);
1266 use
->type
= use_type
;
1270 use
->related_cands
= BITMAP_ALLOC (NULL
);
1272 /* To avoid showing ssa name in the dumps, if it was not reset by the
1274 iv
->ssa_name
= NULL_TREE
;
1276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1277 dump_use (dump_file
, use
);
1279 data
->iv_uses
.safe_push (use
);
1284 /* Checks whether OP is a loop-level invariant and if so, records it.
1285 NONLINEAR_USE is true if the invariant is used in a way we do not
1286 handle specially. */
1289 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1292 struct version_info
*info
;
1294 if (TREE_CODE (op
) != SSA_NAME
1295 || virtual_operand_p (op
))
1298 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1300 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1303 info
= name_info (data
, op
);
1305 info
->has_nonlin_use
|= nonlinear_use
;
1307 info
->inv_id
= ++data
->max_inv_id
;
1308 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1311 /* Checks whether the use OP is interesting and if so, records it. */
1313 static struct iv_use
*
1314 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1321 if (TREE_CODE (op
) != SSA_NAME
)
1324 iv
= get_iv (data
, op
);
1328 if (iv
->have_use_for
)
1330 use
= iv_use (data
, iv
->use_id
);
1332 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1336 if (integer_zerop (iv
->step
))
1338 record_invariant (data
, op
, true);
1341 iv
->have_use_for
= true;
1343 civ
= XNEW (struct iv
);
1346 stmt
= SSA_NAME_DEF_STMT (op
);
1347 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1348 || is_gimple_assign (stmt
));
1350 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1351 iv
->use_id
= use
->id
;
1356 /* Given a condition in statement STMT, checks whether it is a compare
1357 of an induction variable and an invariant. If this is the case,
1358 CONTROL_VAR is set to location of the iv, BOUND to the location of
1359 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1360 induction variable descriptions, and true is returned. If this is not
1361 the case, CONTROL_VAR and BOUND are set to the arguments of the
1362 condition and false is returned. */
1365 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1366 tree
**control_var
, tree
**bound
,
1367 struct iv
**iv_var
, struct iv
**iv_bound
)
1369 /* The objects returned when COND has constant operands. */
1370 static struct iv const_iv
;
1372 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1373 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1376 if (gimple_code (stmt
) == GIMPLE_COND
)
1378 op0
= gimple_cond_lhs_ptr (stmt
);
1379 op1
= gimple_cond_rhs_ptr (stmt
);
1383 op0
= gimple_assign_rhs1_ptr (stmt
);
1384 op1
= gimple_assign_rhs2_ptr (stmt
);
1387 zero
= integer_zero_node
;
1388 const_iv
.step
= integer_zero_node
;
1390 if (TREE_CODE (*op0
) == SSA_NAME
)
1391 iv0
= get_iv (data
, *op0
);
1392 if (TREE_CODE (*op1
) == SSA_NAME
)
1393 iv1
= get_iv (data
, *op1
);
1395 /* Exactly one of the compared values must be an iv, and the other one must
1400 if (integer_zerop (iv0
->step
))
1402 /* Control variable may be on the other side. */
1403 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1404 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1406 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1410 *control_var
= op0
;;
1421 /* Checks whether the condition in STMT is interesting and if so,
1425 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1427 tree
*var_p
, *bound_p
;
1428 struct iv
*var_iv
, *civ
;
1430 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1432 find_interesting_uses_op (data
, *var_p
);
1433 find_interesting_uses_op (data
, *bound_p
);
1437 civ
= XNEW (struct iv
);
1439 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1442 /* Returns the outermost loop EXPR is obviously invariant in
1443 relative to the loop LOOP, i.e. if all its operands are defined
1444 outside of the returned loop. Returns NULL if EXPR is not
1445 even obviously invariant in LOOP. */
1448 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1453 if (is_gimple_min_invariant (expr
))
1454 return current_loops
->tree_root
;
1456 if (TREE_CODE (expr
) == SSA_NAME
)
1458 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1461 if (flow_bb_inside_loop_p (loop
, def_bb
))
1463 return superloop_at_depth (loop
,
1464 loop_depth (def_bb
->loop_father
) + 1);
1467 return current_loops
->tree_root
;
1473 unsigned maxdepth
= 0;
1474 len
= TREE_OPERAND_LENGTH (expr
);
1475 for (i
= 0; i
< len
; i
++)
1477 struct loop
*ivloop
;
1478 if (!TREE_OPERAND (expr
, i
))
1481 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1484 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1487 return superloop_at_depth (loop
, maxdepth
);
1490 /* Returns true if expression EXPR is obviously invariant in LOOP,
1491 i.e. if all its operands are defined outside of the LOOP. LOOP
1492 should not be the function body. */
1495 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1500 gcc_assert (loop_depth (loop
) > 0);
1502 if (is_gimple_min_invariant (expr
))
1505 if (TREE_CODE (expr
) == SSA_NAME
)
1507 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1509 && flow_bb_inside_loop_p (loop
, def_bb
))
1518 len
= TREE_OPERAND_LENGTH (expr
);
1519 for (i
= 0; i
< len
; i
++)
1520 if (TREE_OPERAND (expr
, i
)
1521 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1527 /* Cumulates the steps of indices into DATA and replaces their values with the
1528 initial ones. Returns false when the value of the index cannot be determined.
1529 Callback for for_each_index. */
1531 struct ifs_ivopts_data
1533 struct ivopts_data
*ivopts_data
;
1539 idx_find_step (tree base
, tree
*idx
, void *data
)
1541 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1543 tree step
, iv_base
, iv_step
, lbound
, off
;
1544 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1546 /* If base is a component ref, require that the offset of the reference
1548 if (TREE_CODE (base
) == COMPONENT_REF
)
1550 off
= component_ref_field_offset (base
);
1551 return expr_invariant_in_loop_p (loop
, off
);
1554 /* If base is array, first check whether we will be able to move the
1555 reference out of the loop (in order to take its address in strength
1556 reduction). In order for this to work we need both lower bound
1557 and step to be loop invariants. */
1558 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1560 /* Moreover, for a range, the size needs to be invariant as well. */
1561 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1562 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1565 step
= array_ref_element_size (base
);
1566 lbound
= array_ref_low_bound (base
);
1568 if (!expr_invariant_in_loop_p (loop
, step
)
1569 || !expr_invariant_in_loop_p (loop
, lbound
))
1573 if (TREE_CODE (*idx
) != SSA_NAME
)
1576 iv
= get_iv (dta
->ivopts_data
, *idx
);
1580 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1581 *&x[0], which is not folded and does not trigger the
1582 ARRAY_REF path below. */
1585 if (integer_zerop (iv
->step
))
1588 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1590 step
= array_ref_element_size (base
);
1592 /* We only handle addresses whose step is an integer constant. */
1593 if (TREE_CODE (step
) != INTEGER_CST
)
1597 /* The step for pointer arithmetics already is 1 byte. */
1598 step
= size_one_node
;
1602 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1603 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1606 /* The index might wrap. */
1610 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1611 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1616 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1617 object is passed to it in DATA. */
1620 idx_record_use (tree base
, tree
*idx
,
1623 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1624 find_interesting_uses_op (data
, *idx
);
1625 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1627 find_interesting_uses_op (data
, array_ref_element_size (base
));
1628 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1633 /* If we can prove that TOP = cst * BOT for some constant cst,
1634 store cst to MUL and return true. Otherwise return false.
1635 The returned value is always sign-extended, regardless of the
1636 signedness of TOP and BOT. */
1639 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1642 enum tree_code code
;
1643 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1644 widest_int res
, p0
, p1
;
1649 if (operand_equal_p (top
, bot
, 0))
1655 code
= TREE_CODE (top
);
1659 mby
= TREE_OPERAND (top
, 1);
1660 if (TREE_CODE (mby
) != INTEGER_CST
)
1663 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1666 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1671 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1672 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1675 if (code
== MINUS_EXPR
)
1677 *mul
= wi::sext (p0
+ p1
, precision
);
1681 if (TREE_CODE (bot
) != INTEGER_CST
)
1684 p0
= widest_int::from (top
, SIGNED
);
1685 p1
= widest_int::from (bot
, SIGNED
);
1688 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1696 /* Return true if memory reference REF with step STEP may be unaligned. */
1699 may_be_unaligned_p (tree ref
, tree step
)
1701 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1702 thus they are not misaligned. */
1703 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1706 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1708 unsigned HOST_WIDE_INT bitpos
;
1709 unsigned int ref_align
;
1710 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1711 if (ref_align
< align
1712 || (bitpos
% align
) != 0
1713 || (bitpos
% BITS_PER_UNIT
) != 0)
1716 unsigned int trailing_zeros
= tree_ctz (step
);
1717 if (trailing_zeros
< HOST_BITS_PER_INT
1718 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1724 /* Return true if EXPR may be non-addressable. */
1727 may_be_nonaddressable_p (tree expr
)
1729 switch (TREE_CODE (expr
))
1731 case TARGET_MEM_REF
:
1732 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1733 target, thus they are always addressable. */
1737 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1738 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1740 case VIEW_CONVERT_EXPR
:
1741 /* This kind of view-conversions may wrap non-addressable objects
1742 and make them look addressable. After some processing the
1743 non-addressability may be uncovered again, causing ADDR_EXPRs
1744 of inappropriate objects to be built. */
1745 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1746 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1749 /* ... fall through ... */
1752 case ARRAY_RANGE_REF
:
1753 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1765 /* Finds addresses in *OP_P inside STMT. */
1768 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1770 tree base
= *op_p
, step
= size_zero_node
;
1772 struct ifs_ivopts_data ifs_ivopts_data
;
1774 /* Do not play with volatile memory references. A bit too conservative,
1775 perhaps, but safe. */
1776 if (gimple_has_volatile_ops (stmt
))
1779 /* Ignore bitfields for now. Not really something terribly complicated
1781 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1784 base
= unshare_expr (base
);
1786 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1788 tree type
= build_pointer_type (TREE_TYPE (base
));
1792 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1794 civ
= get_iv (data
, TMR_BASE (base
));
1798 TMR_BASE (base
) = civ
->base
;
1801 if (TMR_INDEX2 (base
)
1802 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1804 civ
= get_iv (data
, TMR_INDEX2 (base
));
1808 TMR_INDEX2 (base
) = civ
->base
;
1811 if (TMR_INDEX (base
)
1812 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1814 civ
= get_iv (data
, TMR_INDEX (base
));
1818 TMR_INDEX (base
) = civ
->base
;
1823 if (TMR_STEP (base
))
1824 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1826 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1830 if (integer_zerop (step
))
1832 base
= tree_mem_ref_addr (type
, base
);
1836 ifs_ivopts_data
.ivopts_data
= data
;
1837 ifs_ivopts_data
.stmt
= stmt
;
1838 ifs_ivopts_data
.step
= size_zero_node
;
1839 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1840 || integer_zerop (ifs_ivopts_data
.step
))
1842 step
= ifs_ivopts_data
.step
;
1844 /* Check that the base expression is addressable. This needs
1845 to be done after substituting bases of IVs into it. */
1846 if (may_be_nonaddressable_p (base
))
1849 /* Moreover, on strict alignment platforms, check that it is
1850 sufficiently aligned. */
1851 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1854 base
= build_fold_addr_expr (base
);
1856 /* Substituting bases of IVs into the base expression might
1857 have caused folding opportunities. */
1858 if (TREE_CODE (base
) == ADDR_EXPR
)
1860 tree
*ref
= &TREE_OPERAND (base
, 0);
1861 while (handled_component_p (*ref
))
1862 ref
= &TREE_OPERAND (*ref
, 0);
1863 if (TREE_CODE (*ref
) == MEM_REF
)
1865 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1866 TREE_OPERAND (*ref
, 0),
1867 TREE_OPERAND (*ref
, 1));
1874 civ
= alloc_iv (base
, step
);
1875 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1879 for_each_index (op_p
, idx_record_use
, data
);
1882 /* Finds and records invariants used in STMT. */
1885 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1888 use_operand_p use_p
;
1891 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1893 op
= USE_FROM_PTR (use_p
);
1894 record_invariant (data
, op
, false);
1898 /* Finds interesting uses of induction variables in the statement STMT. */
1901 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1904 tree op
, *lhs
, *rhs
;
1906 use_operand_p use_p
;
1907 enum tree_code code
;
1909 find_invariants_stmt (data
, stmt
);
1911 if (gimple_code (stmt
) == GIMPLE_COND
)
1913 find_interesting_uses_cond (data
, stmt
);
1917 if (is_gimple_assign (stmt
))
1919 lhs
= gimple_assign_lhs_ptr (stmt
);
1920 rhs
= gimple_assign_rhs1_ptr (stmt
);
1922 if (TREE_CODE (*lhs
) == SSA_NAME
)
1924 /* If the statement defines an induction variable, the uses are not
1925 interesting by themselves. */
1927 iv
= get_iv (data
, *lhs
);
1929 if (iv
&& !integer_zerop (iv
->step
))
1933 code
= gimple_assign_rhs_code (stmt
);
1934 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1935 && (REFERENCE_CLASS_P (*rhs
)
1936 || is_gimple_val (*rhs
)))
1938 if (REFERENCE_CLASS_P (*rhs
))
1939 find_interesting_uses_address (data
, stmt
, rhs
);
1941 find_interesting_uses_op (data
, *rhs
);
1943 if (REFERENCE_CLASS_P (*lhs
))
1944 find_interesting_uses_address (data
, stmt
, lhs
);
1947 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1949 find_interesting_uses_cond (data
, stmt
);
1953 /* TODO -- we should also handle address uses of type
1955 memory = call (whatever);
1962 if (gimple_code (stmt
) == GIMPLE_PHI
1963 && gimple_bb (stmt
) == data
->current_loop
->header
)
1965 iv
= get_iv (data
, PHI_RESULT (stmt
));
1967 if (iv
&& !integer_zerop (iv
->step
))
1971 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1973 op
= USE_FROM_PTR (use_p
);
1975 if (TREE_CODE (op
) != SSA_NAME
)
1978 iv
= get_iv (data
, op
);
1982 find_interesting_uses_op (data
, op
);
1986 /* Finds interesting uses of induction variables outside of loops
1987 on loop exit edge EXIT. */
1990 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1993 gimple_stmt_iterator psi
;
1996 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1998 phi
= gsi_stmt (psi
);
1999 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2000 if (!virtual_operand_p (def
))
2001 find_interesting_uses_op (data
, def
);
2005 /* Finds uses of the induction variables that are interesting. */
2008 find_interesting_uses (struct ivopts_data
*data
)
2011 gimple_stmt_iterator bsi
;
2012 basic_block
*body
= get_loop_body (data
->current_loop
);
2014 struct version_info
*info
;
2017 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2018 fprintf (dump_file
, "Uses:\n\n");
2020 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2025 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2026 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2027 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2028 find_interesting_uses_outside (data
, e
);
2030 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2031 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2032 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2033 if (!is_gimple_debug (gsi_stmt (bsi
)))
2034 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2037 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2041 fprintf (dump_file
, "\n");
2043 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2045 info
= ver_info (data
, i
);
2048 fprintf (dump_file
, " ");
2049 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2050 fprintf (dump_file
, " is invariant (%d)%s\n",
2051 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2055 fprintf (dump_file
, "\n");
2061 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2062 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2063 we are at the top-level of the processed address. */
2066 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2067 HOST_WIDE_INT
*offset
)
2069 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2070 enum tree_code code
;
2071 tree type
, orig_type
= TREE_TYPE (expr
);
2072 HOST_WIDE_INT off0
, off1
, st
;
2073 tree orig_expr
= expr
;
2077 type
= TREE_TYPE (expr
);
2078 code
= TREE_CODE (expr
);
2084 if (!cst_and_fits_in_hwi (expr
)
2085 || integer_zerop (expr
))
2088 *offset
= int_cst_value (expr
);
2089 return build_int_cst (orig_type
, 0);
2091 case POINTER_PLUS_EXPR
:
2094 op0
= TREE_OPERAND (expr
, 0);
2095 op1
= TREE_OPERAND (expr
, 1);
2097 op0
= strip_offset_1 (op0
, false, false, &off0
);
2098 op1
= strip_offset_1 (op1
, false, false, &off1
);
2100 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2101 if (op0
== TREE_OPERAND (expr
, 0)
2102 && op1
== TREE_OPERAND (expr
, 1))
2105 if (integer_zerop (op1
))
2107 else if (integer_zerop (op0
))
2109 if (code
== MINUS_EXPR
)
2110 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2115 expr
= fold_build2 (code
, type
, op0
, op1
);
2117 return fold_convert (orig_type
, expr
);
2120 op1
= TREE_OPERAND (expr
, 1);
2121 if (!cst_and_fits_in_hwi (op1
))
2124 op0
= TREE_OPERAND (expr
, 0);
2125 op0
= strip_offset_1 (op0
, false, false, &off0
);
2126 if (op0
== TREE_OPERAND (expr
, 0))
2129 *offset
= off0
* int_cst_value (op1
);
2130 if (integer_zerop (op0
))
2133 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2135 return fold_convert (orig_type
, expr
);
2138 case ARRAY_RANGE_REF
:
2142 step
= array_ref_element_size (expr
);
2143 if (!cst_and_fits_in_hwi (step
))
2146 st
= int_cst_value (step
);
2147 op1
= TREE_OPERAND (expr
, 1);
2148 op1
= strip_offset_1 (op1
, false, false, &off1
);
2149 *offset
= off1
* st
;
2152 && integer_zerop (op1
))
2154 /* Strip the component reference completely. */
2155 op0
= TREE_OPERAND (expr
, 0);
2156 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2169 tmp
= component_ref_field_offset (expr
);
2170 field
= TREE_OPERAND (expr
, 1);
2172 && cst_and_fits_in_hwi (tmp
)
2173 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2175 HOST_WIDE_INT boffset
, abs_off
;
2177 /* Strip the component reference completely. */
2178 op0
= TREE_OPERAND (expr
, 0);
2179 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2180 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2181 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2185 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2192 op0
= TREE_OPERAND (expr
, 0);
2193 op0
= strip_offset_1 (op0
, true, true, &off0
);
2196 if (op0
== TREE_OPERAND (expr
, 0))
2199 expr
= build_fold_addr_expr (op0
);
2200 return fold_convert (orig_type
, expr
);
2203 /* ??? Offset operand? */
2204 inside_addr
= false;
2211 /* Default handling of expressions for that we want to recurse into
2212 the first operand. */
2213 op0
= TREE_OPERAND (expr
, 0);
2214 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2217 if (op0
== TREE_OPERAND (expr
, 0)
2218 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2221 expr
= copy_node (expr
);
2222 TREE_OPERAND (expr
, 0) = op0
;
2224 TREE_OPERAND (expr
, 1) = op1
;
2226 /* Inside address, we might strip the top level component references,
2227 thus changing type of the expression. Handling of ADDR_EXPR
2229 expr
= fold_convert (orig_type
, expr
);
2234 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2237 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2240 tree core
= strip_offset_1 (expr
, false, false, &off
);
2245 /* Returns variant of TYPE that can be used as base for different uses.
2246 We return unsigned type with the same precision, which avoids problems
2250 generic_type_for (tree type
)
2252 if (POINTER_TYPE_P (type
))
2253 return unsigned_type_for (type
);
2255 if (TYPE_UNSIGNED (type
))
2258 return unsigned_type_for (type
);
2261 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2262 the bitmap to that we should store it. */
2264 static struct ivopts_data
*fd_ivopts_data
;
2266 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2268 bitmap
*depends_on
= (bitmap
*) data
;
2269 struct version_info
*info
;
2271 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2273 info
= name_info (fd_ivopts_data
, *expr_p
);
2275 if (!info
->inv_id
|| info
->has_nonlin_use
)
2279 *depends_on
= BITMAP_ALLOC (NULL
);
2280 bitmap_set_bit (*depends_on
, info
->inv_id
);
2285 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2286 position to POS. If USE is not NULL, the candidate is set as related to
2287 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2288 replacement of the final value of the iv by a direct computation. */
2290 static struct iv_cand
*
2291 add_candidate_1 (struct ivopts_data
*data
,
2292 tree base
, tree step
, bool important
, enum iv_position pos
,
2293 struct iv_use
*use
, gimple incremented_at
)
2296 struct iv_cand
*cand
= NULL
;
2297 tree type
, orig_type
;
2299 /* For non-original variables, make sure their values are computed in a type
2300 that does not invoke undefined behavior on overflows (since in general,
2301 we cannot prove that these induction variables are non-wrapping). */
2302 if (pos
!= IP_ORIGINAL
)
2304 orig_type
= TREE_TYPE (base
);
2305 type
= generic_type_for (orig_type
);
2306 if (type
!= orig_type
)
2308 base
= fold_convert (type
, base
);
2309 step
= fold_convert (type
, step
);
2313 for (i
= 0; i
< n_iv_cands (data
); i
++)
2315 cand
= iv_cand (data
, i
);
2317 if (cand
->pos
!= pos
)
2320 if (cand
->incremented_at
!= incremented_at
2321 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2322 && cand
->ainc_use
!= use
))
2336 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2337 && operand_equal_p (step
, cand
->iv
->step
, 0)
2338 && (TYPE_PRECISION (TREE_TYPE (base
))
2339 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2343 if (i
== n_iv_cands (data
))
2345 cand
= XCNEW (struct iv_cand
);
2351 cand
->iv
= alloc_iv (base
, step
);
2354 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2356 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2357 cand
->var_after
= cand
->var_before
;
2359 cand
->important
= important
;
2360 cand
->incremented_at
= incremented_at
;
2361 data
->iv_candidates
.safe_push (cand
);
2364 && TREE_CODE (step
) != INTEGER_CST
)
2366 fd_ivopts_data
= data
;
2367 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2370 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2371 cand
->ainc_use
= use
;
2373 cand
->ainc_use
= NULL
;
2375 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2376 dump_cand (dump_file
, cand
);
2379 if (important
&& !cand
->important
)
2381 cand
->important
= true;
2382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2383 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2388 bitmap_set_bit (use
->related_cands
, i
);
2389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2390 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2397 /* Returns true if incrementing the induction variable at the end of the LOOP
2400 The purpose is to avoid splitting latch edge with a biv increment, thus
2401 creating a jump, possibly confusing other optimization passes and leaving
2402 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2403 is not available (so we do not have a better alternative), or if the latch
2404 edge is already nonempty. */
2407 allow_ip_end_pos_p (struct loop
*loop
)
2409 if (!ip_normal_pos (loop
))
2412 if (!empty_block_p (ip_end_pos (loop
)))
2418 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2419 Important field is set to IMPORTANT. */
2422 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2423 bool important
, struct iv_use
*use
)
2425 basic_block use_bb
= gimple_bb (use
->stmt
);
2426 enum machine_mode mem_mode
;
2427 unsigned HOST_WIDE_INT cstepi
;
2429 /* If we insert the increment in any position other than the standard
2430 ones, we must ensure that it is incremented once per iteration.
2431 It must not be in an inner nested loop, or one side of an if
2433 if (use_bb
->loop_father
!= data
->current_loop
2434 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2435 || stmt_could_throw_p (use
->stmt
)
2436 || !cst_and_fits_in_hwi (step
))
2439 cstepi
= int_cst_value (step
);
2441 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2442 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2443 || USE_STORE_PRE_INCREMENT (mem_mode
))
2444 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2445 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2446 || USE_STORE_PRE_DECREMENT (mem_mode
))
2447 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2449 enum tree_code code
= MINUS_EXPR
;
2451 tree new_step
= step
;
2453 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2455 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2456 code
= POINTER_PLUS_EXPR
;
2459 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2460 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2461 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2464 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2465 || USE_STORE_POST_INCREMENT (mem_mode
))
2466 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2467 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2468 || USE_STORE_POST_DECREMENT (mem_mode
))
2469 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2471 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2476 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2477 position to POS. If USE is not NULL, the candidate is set as related to
2478 it. The candidate computation is scheduled on all available positions. */
2481 add_candidate (struct ivopts_data
*data
,
2482 tree base
, tree step
, bool important
, struct iv_use
*use
)
2484 if (ip_normal_pos (data
->current_loop
))
2485 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2486 if (ip_end_pos (data
->current_loop
)
2487 && allow_ip_end_pos_p (data
->current_loop
))
2488 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2490 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2491 add_autoinc_candidates (data
, base
, step
, important
, use
);
2494 /* Adds standard iv candidates. */
2497 add_standard_iv_candidates (struct ivopts_data
*data
)
2499 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2501 /* The same for a double-integer type if it is still fast enough. */
2503 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2504 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2505 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2506 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2508 /* The same for a double-integer type if it is still fast enough. */
2510 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2511 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2512 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2513 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2517 /* Adds candidates bases on the old induction variable IV. */
2520 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2524 struct iv_cand
*cand
;
2526 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2528 /* The same, but with initial value zero. */
2529 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2530 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2532 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2533 iv
->step
, true, NULL
);
2535 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2536 if (gimple_code (phi
) == GIMPLE_PHI
)
2538 /* Additionally record the possibility of leaving the original iv
2540 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2541 /* Don't add candidate if it's from another PHI node because
2542 it's an affine iv appearing in the form of PEELED_CHREC. */
2543 phi
= SSA_NAME_DEF_STMT (def
);
2544 if (gimple_code (phi
) != GIMPLE_PHI
)
2546 cand
= add_candidate_1 (data
,
2547 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2548 SSA_NAME_DEF_STMT (def
));
2549 cand
->var_before
= iv
->ssa_name
;
2550 cand
->var_after
= def
;
2553 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2557 /* Adds candidates based on the old induction variables. */
2560 add_old_ivs_candidates (struct ivopts_data
*data
)
2566 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2568 iv
= ver_info (data
, i
)->iv
;
2569 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2570 add_old_iv_candidates (data
, iv
);
2574 /* Adds candidates based on the value of the induction variable IV and USE. */
2577 add_iv_value_candidates (struct ivopts_data
*data
,
2578 struct iv
*iv
, struct iv_use
*use
)
2580 unsigned HOST_WIDE_INT offset
;
2584 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2586 /* The same, but with initial value zero. Make such variable important,
2587 since it is generic enough so that possibly many uses may be based
2589 basetype
= TREE_TYPE (iv
->base
);
2590 if (POINTER_TYPE_P (basetype
))
2591 basetype
= sizetype
;
2592 add_candidate (data
, build_int_cst (basetype
, 0),
2593 iv
->step
, true, use
);
2595 /* Third, try removing the constant offset. Make sure to even
2596 add a candidate for &a[0] vs. (T *)&a. */
2597 base
= strip_offset (iv
->base
, &offset
);
2599 || base
!= iv
->base
)
2600 add_candidate (data
, base
, iv
->step
, false, use
);
2603 /* Adds candidates based on the uses. */
2606 add_derived_ivs_candidates (struct ivopts_data
*data
)
2610 for (i
= 0; i
< n_iv_uses (data
); i
++)
2612 struct iv_use
*use
= iv_use (data
, i
);
2619 case USE_NONLINEAR_EXPR
:
2622 /* Just add the ivs based on the value of the iv used here. */
2623 add_iv_value_candidates (data
, use
->iv
, use
);
2632 /* Record important candidates and add them to related_cands bitmaps
2636 record_important_candidates (struct ivopts_data
*data
)
2641 for (i
= 0; i
< n_iv_cands (data
); i
++)
2643 struct iv_cand
*cand
= iv_cand (data
, i
);
2645 if (cand
->important
)
2646 bitmap_set_bit (data
->important_candidates
, i
);
2649 data
->consider_all_candidates
= (n_iv_cands (data
)
2650 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2652 if (data
->consider_all_candidates
)
2654 /* We will not need "related_cands" bitmaps in this case,
2655 so release them to decrease peak memory consumption. */
2656 for (i
= 0; i
< n_iv_uses (data
); i
++)
2658 use
= iv_use (data
, i
);
2659 BITMAP_FREE (use
->related_cands
);
2664 /* Add important candidates to the related_cands bitmaps. */
2665 for (i
= 0; i
< n_iv_uses (data
); i
++)
2666 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2667 data
->important_candidates
);
2671 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2672 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2673 we allocate a simple list to every use. */
2676 alloc_use_cost_map (struct ivopts_data
*data
)
2678 unsigned i
, size
, s
;
2680 for (i
= 0; i
< n_iv_uses (data
); i
++)
2682 struct iv_use
*use
= iv_use (data
, i
);
2684 if (data
->consider_all_candidates
)
2685 size
= n_iv_cands (data
);
2688 s
= bitmap_count_bits (use
->related_cands
);
2690 /* Round up to the power of two, so that moduling by it is fast. */
2691 size
= s
? (1 << ceil_log2 (s
)) : 1;
2694 use
->n_map_members
= size
;
2695 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2699 /* Returns description of computation cost of expression whose runtime
2700 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2703 new_cost (unsigned runtime
, unsigned complexity
)
2707 cost
.cost
= runtime
;
2708 cost
.complexity
= complexity
;
2713 /* Adds costs COST1 and COST2. */
2716 add_costs (comp_cost cost1
, comp_cost cost2
)
2718 cost1
.cost
+= cost2
.cost
;
2719 cost1
.complexity
+= cost2
.complexity
;
2723 /* Subtracts costs COST1 and COST2. */
2726 sub_costs (comp_cost cost1
, comp_cost cost2
)
2728 cost1
.cost
-= cost2
.cost
;
2729 cost1
.complexity
-= cost2
.complexity
;
2734 /* Returns a negative number if COST1 < COST2, a positive number if
2735 COST1 > COST2, and 0 if COST1 = COST2. */
2738 compare_costs (comp_cost cost1
, comp_cost cost2
)
2740 if (cost1
.cost
== cost2
.cost
)
2741 return cost1
.complexity
- cost2
.complexity
;
2743 return cost1
.cost
- cost2
.cost
;
2746 /* Returns true if COST is infinite. */
2749 infinite_cost_p (comp_cost cost
)
2751 return cost
.cost
== INFTY
;
2754 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2755 on invariants DEPENDS_ON and that the value used in expressing it
2756 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2759 set_use_iv_cost (struct ivopts_data
*data
,
2760 struct iv_use
*use
, struct iv_cand
*cand
,
2761 comp_cost cost
, bitmap depends_on
, tree value
,
2762 enum tree_code comp
, int inv_expr_id
)
2766 if (infinite_cost_p (cost
))
2768 BITMAP_FREE (depends_on
);
2772 if (data
->consider_all_candidates
)
2774 use
->cost_map
[cand
->id
].cand
= cand
;
2775 use
->cost_map
[cand
->id
].cost
= cost
;
2776 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2777 use
->cost_map
[cand
->id
].value
= value
;
2778 use
->cost_map
[cand
->id
].comp
= comp
;
2779 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2783 /* n_map_members is a power of two, so this computes modulo. */
2784 s
= cand
->id
& (use
->n_map_members
- 1);
2785 for (i
= s
; i
< use
->n_map_members
; i
++)
2786 if (!use
->cost_map
[i
].cand
)
2788 for (i
= 0; i
< s
; i
++)
2789 if (!use
->cost_map
[i
].cand
)
2795 use
->cost_map
[i
].cand
= cand
;
2796 use
->cost_map
[i
].cost
= cost
;
2797 use
->cost_map
[i
].depends_on
= depends_on
;
2798 use
->cost_map
[i
].value
= value
;
2799 use
->cost_map
[i
].comp
= comp
;
2800 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2803 /* Gets cost of (USE, CANDIDATE) pair. */
2805 static struct cost_pair
*
2806 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2807 struct iv_cand
*cand
)
2810 struct cost_pair
*ret
;
2815 if (data
->consider_all_candidates
)
2817 ret
= use
->cost_map
+ cand
->id
;
2824 /* n_map_members is a power of two, so this computes modulo. */
2825 s
= cand
->id
& (use
->n_map_members
- 1);
2826 for (i
= s
; i
< use
->n_map_members
; i
++)
2827 if (use
->cost_map
[i
].cand
== cand
)
2828 return use
->cost_map
+ i
;
2829 else if (use
->cost_map
[i
].cand
== NULL
)
2831 for (i
= 0; i
< s
; i
++)
2832 if (use
->cost_map
[i
].cand
== cand
)
2833 return use
->cost_map
+ i
;
2834 else if (use
->cost_map
[i
].cand
== NULL
)
2840 /* Returns estimate on cost of computing SEQ. */
2843 seq_cost (rtx seq
, bool speed
)
2848 for (; seq
; seq
= NEXT_INSN (seq
))
2850 set
= single_set (seq
);
2852 cost
+= set_src_cost (SET_SRC (set
), speed
);
2860 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2862 produce_memory_decl_rtl (tree obj
, int *regno
)
2864 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2865 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2869 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2871 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2872 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2873 SET_SYMBOL_REF_DECL (x
, obj
);
2874 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2875 set_mem_addr_space (x
, as
);
2876 targetm
.encode_section_info (obj
, x
, true);
2880 x
= gen_raw_REG (address_mode
, (*regno
)++);
2881 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2882 set_mem_addr_space (x
, as
);
2888 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2889 walk_tree. DATA contains the actual fake register number. */
2892 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2894 tree obj
= NULL_TREE
;
2896 int *regno
= (int *) data
;
2898 switch (TREE_CODE (*expr_p
))
2901 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2902 handled_component_p (*expr_p
);
2903 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2906 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2907 x
= produce_memory_decl_rtl (obj
, regno
);
2912 obj
= SSA_NAME_VAR (*expr_p
);
2913 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2916 if (!DECL_RTL_SET_P (obj
))
2917 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2926 if (DECL_RTL_SET_P (obj
))
2929 if (DECL_MODE (obj
) == BLKmode
)
2930 x
= produce_memory_decl_rtl (obj
, regno
);
2932 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2942 decl_rtl_to_reset
.safe_push (obj
);
2943 SET_DECL_RTL (obj
, x
);
2949 /* Determines cost of the computation of EXPR. */
2952 computation_cost (tree expr
, bool speed
)
2955 tree type
= TREE_TYPE (expr
);
2957 /* Avoid using hard regs in ways which may be unsupported. */
2958 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2959 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2960 enum node_frequency real_frequency
= node
->frequency
;
2962 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2963 crtl
->maybe_hot_insn_p
= speed
;
2964 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2966 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2969 default_rtl_profile ();
2970 node
->frequency
= real_frequency
;
2972 cost
= seq_cost (seq
, speed
);
2974 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2975 TYPE_ADDR_SPACE (type
), speed
);
2976 else if (!REG_P (rslt
))
2977 cost
+= set_src_cost (rslt
, speed
);
2982 /* Returns variable containing the value of candidate CAND at statement AT. */
2985 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2987 if (stmt_after_increment (loop
, cand
, stmt
))
2988 return cand
->var_after
;
2990 return cand
->var_before
;
2993 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2994 same precision that is at least as wide as the precision of TYPE, stores
2995 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2999 determine_common_wider_type (tree
*a
, tree
*b
)
3001 tree wider_type
= NULL
;
3003 tree atype
= TREE_TYPE (*a
);
3005 if (CONVERT_EXPR_P (*a
))
3007 suba
= TREE_OPERAND (*a
, 0);
3008 wider_type
= TREE_TYPE (suba
);
3009 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3015 if (CONVERT_EXPR_P (*b
))
3017 subb
= TREE_OPERAND (*b
, 0);
3018 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3029 /* Determines the expression by that USE is expressed from induction variable
3030 CAND at statement AT in LOOP. The expression is stored in a decomposed
3031 form into AFF. Returns false if USE cannot be expressed using CAND. */
3034 get_computation_aff (struct loop
*loop
,
3035 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3036 struct aff_tree
*aff
)
3038 tree ubase
= use
->iv
->base
;
3039 tree ustep
= use
->iv
->step
;
3040 tree cbase
= cand
->iv
->base
;
3041 tree cstep
= cand
->iv
->step
, cstep_common
;
3042 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3043 tree common_type
, var
;
3045 aff_tree cbase_aff
, var_aff
;
3048 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3050 /* We do not have a precision to express the values of use. */
3054 var
= var_at_stmt (loop
, cand
, at
);
3055 uutype
= unsigned_type_for (utype
);
3057 /* If the conversion is not noop, perform it. */
3058 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3060 cstep
= fold_convert (uutype
, cstep
);
3061 cbase
= fold_convert (uutype
, cbase
);
3062 var
= fold_convert (uutype
, var
);
3065 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3068 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3069 type, we achieve better folding by computing their difference in this
3070 wider type, and cast the result to UUTYPE. We do not need to worry about
3071 overflows, as all the arithmetics will in the end be performed in UUTYPE
3073 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3075 /* use = ubase - ratio * cbase + ratio * var. */
3076 tree_to_aff_combination (ubase
, common_type
, aff
);
3077 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3078 tree_to_aff_combination (var
, uutype
, &var_aff
);
3080 /* We need to shift the value if we are after the increment. */
3081 if (stmt_after_increment (loop
, cand
, at
))
3085 if (common_type
!= uutype
)
3086 cstep_common
= fold_convert (common_type
, cstep
);
3088 cstep_common
= cstep
;
3090 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3091 aff_combination_add (&cbase_aff
, &cstep_aff
);
3094 aff_combination_scale (&cbase_aff
, -rat
);
3095 aff_combination_add (aff
, &cbase_aff
);
3096 if (common_type
!= uutype
)
3097 aff_combination_convert (aff
, uutype
);
3099 aff_combination_scale (&var_aff
, rat
);
3100 aff_combination_add (aff
, &var_aff
);
3105 /* Return the type of USE. */
3108 get_use_type (struct iv_use
*use
)
3110 tree base_type
= TREE_TYPE (use
->iv
->base
);
3113 if (use
->type
== USE_ADDRESS
)
3115 /* The base_type may be a void pointer. Create a pointer type based on
3116 the mem_ref instead. */
3117 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3118 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3119 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3127 /* Determines the expression by that USE is expressed from induction variable
3128 CAND at statement AT in LOOP. The computation is unshared. */
3131 get_computation_at (struct loop
*loop
,
3132 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3135 tree type
= get_use_type (use
);
3137 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3139 unshare_aff_combination (&aff
);
3140 return fold_convert (type
, aff_combination_to_tree (&aff
));
3143 /* Determines the expression by that USE is expressed from induction variable
3144 CAND in LOOP. The computation is unshared. */
3147 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3149 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3152 /* Adjust the cost COST for being in loop setup rather than loop body.
3153 If we're optimizing for space, the loop setup overhead is constant;
3154 if we're optimizing for speed, amortize it over the per-iteration cost. */
3156 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3160 else if (optimize_loop_for_speed_p (data
->current_loop
))
3161 return cost
/ avg_loop_niter (data
->current_loop
);
3166 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3167 validity for a memory reference accessing memory of mode MODE in
3168 address space AS. */
3172 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3175 #define MAX_RATIO 128
3176 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3177 static vec
<sbitmap
> valid_mult_list
;
3180 if (data_index
>= valid_mult_list
.length ())
3181 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3183 valid_mult
= valid_mult_list
[data_index
];
3186 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3187 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3188 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3192 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3193 bitmap_clear (valid_mult
);
3194 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3195 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3196 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3198 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3199 if (memory_address_addr_space_p (mode
, addr
, as
)
3200 || memory_address_addr_space_p (mode
, scaled
, as
))
3201 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3206 fprintf (dump_file
, " allowed multipliers:");
3207 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3208 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3209 fprintf (dump_file
, " %d", (int) i
);
3210 fprintf (dump_file
, "\n");
3211 fprintf (dump_file
, "\n");
3214 valid_mult_list
[data_index
] = valid_mult
;
3217 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3220 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3225 variable is omitted. Compute the cost for a memory reference that accesses
3226 a memory location of mode MEM_MODE in address space AS.
3228 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229 size of MEM_MODE / RATIO) is available. To make this determination, we
3230 look at the size of the increment to be made, which is given in CSTEP.
3231 CSTEP may be zero if the step is unknown.
3232 STMT_AFTER_INC is true iff the statement we're looking at is after the
3233 increment of the original biv.
3235 TODO -- there must be some better way. This all is quite crude. */
3239 AINC_PRE_INC
, /* Pre increment. */
3240 AINC_PRE_DEC
, /* Pre decrement. */
3241 AINC_POST_INC
, /* Post increment. */
3242 AINC_POST_DEC
, /* Post decrement. */
3243 AINC_NONE
/* Also the number of auto increment types. */
3246 typedef struct address_cost_data_s
3248 HOST_WIDE_INT min_offset
, max_offset
;
3249 unsigned costs
[2][2][2][2];
3250 unsigned ainc_costs
[AINC_NONE
];
3251 } *address_cost_data
;
3255 get_address_cost (bool symbol_present
, bool var_present
,
3256 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3257 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3258 addr_space_t as
, bool speed
,
3259 bool stmt_after_inc
, bool *may_autoinc
)
3261 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3262 static vec
<address_cost_data
> address_cost_data_list
;
3263 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3264 address_cost_data data
;
3265 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3266 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3267 unsigned cost
, acost
, complexity
;
3268 enum ainc_type autoinc_type
;
3269 bool offset_p
, ratio_p
, autoinc
;
3270 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3271 unsigned HOST_WIDE_INT mask
;
3274 if (data_index
>= address_cost_data_list
.length ())
3275 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3277 data
= address_cost_data_list
[data_index
];
3281 HOST_WIDE_INT rat
, off
= 0;
3282 int old_cse_not_expected
, width
;
3283 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3284 rtx seq
, addr
, base
;
3287 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3289 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3291 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3292 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3293 width
= HOST_BITS_PER_WIDE_INT
- 1;
3294 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3296 for (i
= width
; i
>= 0; i
--)
3298 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3299 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3300 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3303 data
->min_offset
= (i
== -1? 0 : off
);
3305 for (i
= width
; i
>= 0; i
--)
3307 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3308 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3309 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3314 data
->max_offset
= off
;
3316 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3318 fprintf (dump_file
, "get_address_cost:\n");
3319 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3320 GET_MODE_NAME (mem_mode
),
3322 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3323 GET_MODE_NAME (mem_mode
),
3328 for (i
= 2; i
<= MAX_RATIO
; i
++)
3329 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3335 /* Compute the cost of various addressing modes. */
3337 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3338 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3340 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3341 || USE_STORE_PRE_DECREMENT (mem_mode
))
3343 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3344 has_predec
[mem_mode
]
3345 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3347 if (has_predec
[mem_mode
])
3348 data
->ainc_costs
[AINC_PRE_DEC
]
3349 = address_cost (addr
, mem_mode
, as
, speed
);
3351 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3352 || USE_STORE_POST_DECREMENT (mem_mode
))
3354 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3355 has_postdec
[mem_mode
]
3356 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3358 if (has_postdec
[mem_mode
])
3359 data
->ainc_costs
[AINC_POST_DEC
]
3360 = address_cost (addr
, mem_mode
, as
, speed
);
3362 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3363 || USE_STORE_PRE_DECREMENT (mem_mode
))
3365 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3366 has_preinc
[mem_mode
]
3367 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3369 if (has_preinc
[mem_mode
])
3370 data
->ainc_costs
[AINC_PRE_INC
]
3371 = address_cost (addr
, mem_mode
, as
, speed
);
3373 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3374 || USE_STORE_POST_INCREMENT (mem_mode
))
3376 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3377 has_postinc
[mem_mode
]
3378 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3380 if (has_postinc
[mem_mode
])
3381 data
->ainc_costs
[AINC_POST_INC
]
3382 = address_cost (addr
, mem_mode
, as
, speed
);
3384 for (i
= 0; i
< 16; i
++)
3387 var_p
= (i
>> 1) & 1;
3388 off_p
= (i
>> 2) & 1;
3389 rat_p
= (i
>> 3) & 1;
3393 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3394 gen_int_mode (rat
, address_mode
));
3397 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3401 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3402 /* ??? We can run into trouble with some backends by presenting
3403 it with symbols which haven't been properly passed through
3404 targetm.encode_section_info. By setting the local bit, we
3405 enhance the probability of things working. */
3406 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3409 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3411 (PLUS
, address_mode
, base
,
3412 gen_int_mode (off
, address_mode
)));
3415 base
= gen_int_mode (off
, address_mode
);
3420 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3423 /* To avoid splitting addressing modes, pretend that no cse will
3425 old_cse_not_expected
= cse_not_expected
;
3426 cse_not_expected
= true;
3427 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3428 cse_not_expected
= old_cse_not_expected
;
3432 acost
= seq_cost (seq
, speed
);
3433 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3437 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3440 /* On some targets, it is quite expensive to load symbol to a register,
3441 which makes addresses that contain symbols look much more expensive.
3442 However, the symbol will have to be loaded in any case before the
3443 loop (and quite likely we have it in register already), so it does not
3444 make much sense to penalize them too heavily. So make some final
3445 tweaks for the SYMBOL_PRESENT modes:
3447 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3448 var is cheaper, use this mode with small penalty.
3449 If VAR_PRESENT is true, try whether the mode with
3450 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3451 if this is the case, use it. */
3452 add_c
= add_cost (speed
, address_mode
);
3453 for (i
= 0; i
< 8; i
++)
3456 off_p
= (i
>> 1) & 1;
3457 rat_p
= (i
>> 2) & 1;
3459 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3463 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3464 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3467 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3469 fprintf (dump_file
, "Address costs:\n");
3471 for (i
= 0; i
< 16; i
++)
3474 var_p
= (i
>> 1) & 1;
3475 off_p
= (i
>> 2) & 1;
3476 rat_p
= (i
>> 3) & 1;
3478 fprintf (dump_file
, " ");
3480 fprintf (dump_file
, "sym + ");
3482 fprintf (dump_file
, "var + ");
3484 fprintf (dump_file
, "cst + ");
3486 fprintf (dump_file
, "rat * ");
3488 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3489 fprintf (dump_file
, "index costs %d\n", acost
);
3491 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3492 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3493 fprintf (dump_file
, " May include autoinc/dec\n");
3494 fprintf (dump_file
, "\n");
3497 address_cost_data_list
[data_index
] = data
;
3500 bits
= GET_MODE_BITSIZE (address_mode
);
3501 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3503 if ((offset
>> (bits
- 1) & 1))
3508 autoinc_type
= AINC_NONE
;
3509 msize
= GET_MODE_SIZE (mem_mode
);
3510 autoinc_offset
= offset
;
3512 autoinc_offset
+= ratio
* cstep
;
3513 if (symbol_present
|| var_present
|| ratio
!= 1)
3517 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3519 autoinc_type
= AINC_POST_INC
;
3520 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3522 autoinc_type
= AINC_POST_DEC
;
3523 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3525 autoinc_type
= AINC_PRE_INC
;
3526 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3528 autoinc_type
= AINC_PRE_DEC
;
3530 if (autoinc_type
!= AINC_NONE
)
3535 offset_p
= (s_offset
!= 0
3536 && data
->min_offset
<= s_offset
3537 && s_offset
<= data
->max_offset
);
3538 ratio_p
= (ratio
!= 1
3539 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3541 if (ratio
!= 1 && !ratio_p
)
3542 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3544 if (s_offset
&& !offset_p
&& !symbol_present
)
3545 cost
+= add_cost (speed
, address_mode
);
3548 *may_autoinc
= autoinc
;
3550 acost
= data
->ainc_costs
[autoinc_type
];
3552 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3553 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3554 return new_cost (cost
+ acost
, complexity
);
3557 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3558 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3559 calculating the operands of EXPR. Returns true if successful, and returns
3560 the cost in COST. */
3563 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3564 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3567 tree op1
= TREE_OPERAND (expr
, 1);
3568 tree cst
= TREE_OPERAND (mult
, 1);
3569 tree multop
= TREE_OPERAND (mult
, 0);
3570 int m
= exact_log2 (int_cst_value (cst
));
3571 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3573 bool equal_p
= false;
3575 if (!(m
>= 0 && m
< maxm
))
3578 if (operand_equal_p (op1
, mult
, 0))
3581 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3582 ? shiftadd_cost (speed
, mode
, m
)
3584 ? shiftsub1_cost (speed
, mode
, m
)
3585 : shiftsub0_cost (speed
, mode
, m
)));
3586 res
= new_cost (sa_cost
, 0);
3587 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3589 STRIP_NOPS (multop
);
3590 if (!is_gimple_val (multop
))
3591 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3597 /* Estimates cost of forcing expression EXPR into a variable. */
3600 force_expr_to_var_cost (tree expr
, bool speed
)
3602 static bool costs_initialized
= false;
3603 static unsigned integer_cost
[2];
3604 static unsigned symbol_cost
[2];
3605 static unsigned address_cost
[2];
3607 comp_cost cost0
, cost1
, cost
;
3608 enum machine_mode mode
;
3610 if (!costs_initialized
)
3612 tree type
= build_pointer_type (integer_type_node
);
3617 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3618 TREE_STATIC (var
) = 1;
3619 x
= produce_memory_decl_rtl (var
, NULL
);
3620 SET_DECL_RTL (var
, x
);
3622 addr
= build1 (ADDR_EXPR
, type
, var
);
3625 for (i
= 0; i
< 2; i
++)
3627 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3630 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3633 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3634 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3636 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3637 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3638 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3639 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3640 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3641 fprintf (dump_file
, "\n");
3645 costs_initialized
= true;
3650 if (SSA_VAR_P (expr
))
3653 if (is_gimple_min_invariant (expr
))
3655 if (TREE_CODE (expr
) == INTEGER_CST
)
3656 return new_cost (integer_cost
[speed
], 0);
3658 if (TREE_CODE (expr
) == ADDR_EXPR
)
3660 tree obj
= TREE_OPERAND (expr
, 0);
3662 if (TREE_CODE (obj
) == VAR_DECL
3663 || TREE_CODE (obj
) == PARM_DECL
3664 || TREE_CODE (obj
) == RESULT_DECL
)
3665 return new_cost (symbol_cost
[speed
], 0);
3668 return new_cost (address_cost
[speed
], 0);
3671 switch (TREE_CODE (expr
))
3673 case POINTER_PLUS_EXPR
:
3677 op0
= TREE_OPERAND (expr
, 0);
3678 op1
= TREE_OPERAND (expr
, 1);
3685 op0
= TREE_OPERAND (expr
, 0);
3691 /* Just an arbitrary value, FIXME. */
3692 return new_cost (target_spill_cost
[speed
], 0);
3695 if (op0
== NULL_TREE
3696 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3699 cost0
= force_expr_to_var_cost (op0
, speed
);
3701 if (op1
== NULL_TREE
3702 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3705 cost1
= force_expr_to_var_cost (op1
, speed
);
3707 mode
= TYPE_MODE (TREE_TYPE (expr
));
3708 switch (TREE_CODE (expr
))
3710 case POINTER_PLUS_EXPR
:
3714 cost
= new_cost (add_cost (speed
, mode
), 0);
3715 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3717 tree mult
= NULL_TREE
;
3719 if (TREE_CODE (op1
) == MULT_EXPR
)
3721 else if (TREE_CODE (op0
) == MULT_EXPR
)
3724 if (mult
!= NULL_TREE
3725 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3726 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3734 tree inner_mode
, outer_mode
;
3735 outer_mode
= TREE_TYPE (expr
);
3736 inner_mode
= TREE_TYPE (op0
);
3737 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3738 TYPE_MODE (inner_mode
), speed
), 0);
3743 if (cst_and_fits_in_hwi (op0
))
3744 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3746 else if (cst_and_fits_in_hwi (op1
))
3747 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3750 return new_cost (target_spill_cost
[speed
], 0);
3757 cost
= add_costs (cost
, cost0
);
3758 cost
= add_costs (cost
, cost1
);
3760 /* Bound the cost by target_spill_cost. The parts of complicated
3761 computations often are either loop invariant or at least can
3762 be shared between several iv uses, so letting this grow without
3763 limits would not give reasonable results. */
3764 if (cost
.cost
> (int) target_spill_cost
[speed
])
3765 cost
.cost
= target_spill_cost
[speed
];
3770 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3771 invariants the computation depends on. */
3774 force_var_cost (struct ivopts_data
*data
,
3775 tree expr
, bitmap
*depends_on
)
3779 fd_ivopts_data
= data
;
3780 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3783 return force_expr_to_var_cost (expr
, data
->speed
);
3786 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3787 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3788 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3789 invariants the computation depends on. */
3792 split_address_cost (struct ivopts_data
*data
,
3793 tree addr
, bool *symbol_present
, bool *var_present
,
3794 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3797 HOST_WIDE_INT bitsize
;
3798 HOST_WIDE_INT bitpos
;
3800 enum machine_mode mode
;
3801 int unsignedp
, volatilep
;
3803 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3804 &unsignedp
, &volatilep
, false);
3807 || bitpos
% BITS_PER_UNIT
!= 0
3808 || TREE_CODE (core
) != VAR_DECL
)
3810 *symbol_present
= false;
3811 *var_present
= true;
3812 fd_ivopts_data
= data
;
3813 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3814 return new_cost (target_spill_cost
[data
->speed
], 0);
3817 *offset
+= bitpos
/ BITS_PER_UNIT
;
3818 if (TREE_STATIC (core
)
3819 || DECL_EXTERNAL (core
))
3821 *symbol_present
= true;
3822 *var_present
= false;
3826 *symbol_present
= false;
3827 *var_present
= true;
3831 /* Estimates cost of expressing difference of addresses E1 - E2 as
3832 var + symbol + offset. The value of offset is added to OFFSET,
3833 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3834 part is missing. DEPENDS_ON is a set of the invariants the computation
3838 ptr_difference_cost (struct ivopts_data
*data
,
3839 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3840 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3842 HOST_WIDE_INT diff
= 0;
3843 aff_tree aff_e1
, aff_e2
;
3846 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3848 if (ptr_difference_const (e1
, e2
, &diff
))
3851 *symbol_present
= false;
3852 *var_present
= false;
3856 if (integer_zerop (e2
))
3857 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3858 symbol_present
, var_present
, offset
, depends_on
);
3860 *symbol_present
= false;
3861 *var_present
= true;
3863 type
= signed_type_for (TREE_TYPE (e1
));
3864 tree_to_aff_combination (e1
, type
, &aff_e1
);
3865 tree_to_aff_combination (e2
, type
, &aff_e2
);
3866 aff_combination_scale (&aff_e2
, -1);
3867 aff_combination_add (&aff_e1
, &aff_e2
);
3869 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3872 /* Estimates cost of expressing difference E1 - E2 as
3873 var + symbol + offset. The value of offset is added to OFFSET,
3874 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3875 part is missing. DEPENDS_ON is a set of the invariants the computation
3879 difference_cost (struct ivopts_data
*data
,
3880 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3881 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3883 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3884 unsigned HOST_WIDE_INT off1
, off2
;
3885 aff_tree aff_e1
, aff_e2
;
3888 e1
= strip_offset (e1
, &off1
);
3889 e2
= strip_offset (e2
, &off2
);
3890 *offset
+= off1
- off2
;
3895 if (TREE_CODE (e1
) == ADDR_EXPR
)
3896 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3897 offset
, depends_on
);
3898 *symbol_present
= false;
3900 if (operand_equal_p (e1
, e2
, 0))
3902 *var_present
= false;
3906 *var_present
= true;
3908 if (integer_zerop (e2
))
3909 return force_var_cost (data
, e1
, depends_on
);
3911 if (integer_zerop (e1
))
3913 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3914 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3918 type
= signed_type_for (TREE_TYPE (e1
));
3919 tree_to_aff_combination (e1
, type
, &aff_e1
);
3920 tree_to_aff_combination (e2
, type
, &aff_e2
);
3921 aff_combination_scale (&aff_e2
, -1);
3922 aff_combination_add (&aff_e1
, &aff_e2
);
3924 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3927 /* Returns true if AFF1 and AFF2 are identical. */
3930 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3934 if (aff1
->n
!= aff2
->n
)
3937 for (i
= 0; i
< aff1
->n
; i
++)
3939 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3942 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3948 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3951 get_expr_id (struct ivopts_data
*data
, tree expr
)
3953 struct iv_inv_expr_ent ent
;
3954 struct iv_inv_expr_ent
**slot
;
3957 ent
.hash
= iterative_hash_expr (expr
, 0);
3958 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3962 *slot
= XNEW (struct iv_inv_expr_ent
);
3963 (*slot
)->expr
= expr
;
3964 (*slot
)->hash
= ent
.hash
;
3965 (*slot
)->id
= data
->inv_expr_id
++;
3969 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3970 requires a new compiler generated temporary. Returns -1 otherwise.
3971 ADDRESS_P is a flag indicating if the expression is for address
3975 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3976 tree cbase
, HOST_WIDE_INT ratio
,
3979 aff_tree ubase_aff
, cbase_aff
;
3987 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3988 && (TREE_CODE (cbase
) == INTEGER_CST
))
3991 /* Strips the constant part. */
3992 if (TREE_CODE (ubase
) == PLUS_EXPR
3993 || TREE_CODE (ubase
) == MINUS_EXPR
3994 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3996 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3997 ubase
= TREE_OPERAND (ubase
, 0);
4000 /* Strips the constant part. */
4001 if (TREE_CODE (cbase
) == PLUS_EXPR
4002 || TREE_CODE (cbase
) == MINUS_EXPR
4003 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4005 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4006 cbase
= TREE_OPERAND (cbase
, 0);
4011 if (((TREE_CODE (ubase
) == SSA_NAME
)
4012 || (TREE_CODE (ubase
) == ADDR_EXPR
4013 && is_gimple_min_invariant (ubase
)))
4014 && (TREE_CODE (cbase
) == INTEGER_CST
))
4017 if (((TREE_CODE (cbase
) == SSA_NAME
)
4018 || (TREE_CODE (cbase
) == ADDR_EXPR
4019 && is_gimple_min_invariant (cbase
)))
4020 && (TREE_CODE (ubase
) == INTEGER_CST
))
4026 if (operand_equal_p (ubase
, cbase
, 0))
4029 if (TREE_CODE (ubase
) == ADDR_EXPR
4030 && TREE_CODE (cbase
) == ADDR_EXPR
)
4034 usym
= TREE_OPERAND (ubase
, 0);
4035 csym
= TREE_OPERAND (cbase
, 0);
4036 if (TREE_CODE (usym
) == ARRAY_REF
)
4038 tree ind
= TREE_OPERAND (usym
, 1);
4039 if (TREE_CODE (ind
) == INTEGER_CST
4040 && tree_fits_shwi_p (ind
)
4041 && tree_to_shwi (ind
) == 0)
4042 usym
= TREE_OPERAND (usym
, 0);
4044 if (TREE_CODE (csym
) == ARRAY_REF
)
4046 tree ind
= TREE_OPERAND (csym
, 1);
4047 if (TREE_CODE (ind
) == INTEGER_CST
4048 && tree_fits_shwi_p (ind
)
4049 && tree_to_shwi (ind
) == 0)
4050 csym
= TREE_OPERAND (csym
, 0);
4052 if (operand_equal_p (usym
, csym
, 0))
4055 /* Now do more complex comparison */
4056 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4057 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4058 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4062 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4063 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4065 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4066 aff_combination_add (&ubase_aff
, &cbase_aff
);
4067 expr
= aff_combination_to_tree (&ubase_aff
);
4068 return get_expr_id (data
, expr
);
4073 /* Determines the cost of the computation by that USE is expressed
4074 from induction variable CAND. If ADDRESS_P is true, we just need
4075 to create an address from it, otherwise we want to get it into
4076 register. A set of invariants we depend on is stored in
4077 DEPENDS_ON. AT is the statement at that the value is computed.
4078 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4079 addressing is likely. */
4082 get_computation_cost_at (struct ivopts_data
*data
,
4083 struct iv_use
*use
, struct iv_cand
*cand
,
4084 bool address_p
, bitmap
*depends_on
, gimple at
,
4088 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4090 tree utype
= TREE_TYPE (ubase
), ctype
;
4091 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4092 HOST_WIDE_INT ratio
, aratio
;
4093 bool var_present
, symbol_present
, stmt_is_after_inc
;
4096 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4097 enum machine_mode mem_mode
= (address_p
4098 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4103 /* Only consider real candidates. */
4105 return infinite_cost
;
4107 cbase
= cand
->iv
->base
;
4108 cstep
= cand
->iv
->step
;
4109 ctype
= TREE_TYPE (cbase
);
4111 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4113 /* We do not have a precision to express the values of use. */
4114 return infinite_cost
;
4118 || (use
->iv
->base_object
4119 && cand
->iv
->base_object
4120 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4121 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4123 /* Do not try to express address of an object with computation based
4124 on address of a different object. This may cause problems in rtl
4125 level alias analysis (that does not expect this to be happening,
4126 as this is illegal in C), and would be unlikely to be useful
4128 if (use
->iv
->base_object
4129 && cand
->iv
->base_object
4130 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4131 return infinite_cost
;
4134 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4136 /* TODO -- add direct handling of this case. */
4140 /* CSTEPI is removed from the offset in case statement is after the
4141 increment. If the step is not constant, we use zero instead.
4142 This is a bit imprecise (there is the extra addition), but
4143 redundancy elimination is likely to transform the code so that
4144 it uses value of the variable before increment anyway,
4145 so it is not that much unrealistic. */
4146 if (cst_and_fits_in_hwi (cstep
))
4147 cstepi
= int_cst_value (cstep
);
4151 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4152 return infinite_cost
;
4154 if (wi::fits_shwi_p (rat
))
4155 ratio
= rat
.to_shwi ();
4157 return infinite_cost
;
4160 ctype
= TREE_TYPE (cbase
);
4162 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4164 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4165 or ratio == 1, it is better to handle this like
4167 ubase - ratio * cbase + ratio * var
4169 (also holds in the case ratio == -1, TODO. */
4171 if (cst_and_fits_in_hwi (cbase
))
4173 offset
= - ratio
* int_cst_value (cbase
);
4174 cost
= difference_cost (data
,
4175 ubase
, build_int_cst (utype
, 0),
4176 &symbol_present
, &var_present
, &offset
,
4178 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4180 else if (ratio
== 1)
4182 tree real_cbase
= cbase
;
4184 /* Check to see if any adjustment is needed. */
4185 if (cstepi
== 0 && stmt_is_after_inc
)
4187 aff_tree real_cbase_aff
;
4190 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4192 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4194 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4195 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4198 cost
= difference_cost (data
,
4200 &symbol_present
, &var_present
, &offset
,
4202 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4205 && !POINTER_TYPE_P (ctype
)
4206 && multiplier_allowed_in_address_p
4208 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4211 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4212 cost
= difference_cost (data
,
4214 &symbol_present
, &var_present
, &offset
,
4216 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4220 cost
= force_var_cost (data
, cbase
, depends_on
);
4221 cost
= add_costs (cost
,
4222 difference_cost (data
,
4223 ubase
, build_int_cst (utype
, 0),
4224 &symbol_present
, &var_present
,
4225 &offset
, depends_on
));
4226 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4227 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4233 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4234 /* Clear depends on. */
4235 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4236 bitmap_clear (*depends_on
);
4239 /* If we are after the increment, the value of the candidate is higher by
4241 if (stmt_is_after_inc
)
4242 offset
-= ratio
* cstepi
;
4244 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4245 (symbol/var1/const parts may be omitted). If we are looking for an
4246 address, find the cost of addressing this. */
4248 return add_costs (cost
,
4249 get_address_cost (symbol_present
, var_present
,
4250 offset
, ratio
, cstepi
,
4252 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4253 speed
, stmt_is_after_inc
,
4256 /* Otherwise estimate the costs for computing the expression. */
4257 if (!symbol_present
&& !var_present
&& !offset
)
4260 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4264 /* Symbol + offset should be compile-time computable so consider that they
4265 are added once to the variable, if present. */
4266 if (var_present
&& (symbol_present
|| offset
))
4267 cost
.cost
+= adjust_setup_cost (data
,
4268 add_cost (speed
, TYPE_MODE (ctype
)));
4270 /* Having offset does not affect runtime cost in case it is added to
4271 symbol, but it increases complexity. */
4275 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4277 aratio
= ratio
> 0 ? ratio
: -ratio
;
4279 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4284 *can_autoinc
= false;
4287 /* Just get the expression, expand it and measure the cost. */
4288 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4291 return infinite_cost
;
4294 comp
= build_simple_mem_ref (comp
);
4296 return new_cost (computation_cost (comp
, speed
), 0);
4300 /* Determines the cost of the computation by that USE is expressed
4301 from induction variable CAND. If ADDRESS_P is true, we just need
4302 to create an address from it, otherwise we want to get it into
4303 register. A set of invariants we depend on is stored in
4304 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4305 autoinc addressing is likely. */
4308 get_computation_cost (struct ivopts_data
*data
,
4309 struct iv_use
*use
, struct iv_cand
*cand
,
4310 bool address_p
, bitmap
*depends_on
,
4311 bool *can_autoinc
, int *inv_expr_id
)
4313 return get_computation_cost_at (data
,
4314 use
, cand
, address_p
, depends_on
, use
->stmt
,
4315 can_autoinc
, inv_expr_id
);
4318 /* Determines cost of basing replacement of USE on CAND in a generic
4322 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4323 struct iv_use
*use
, struct iv_cand
*cand
)
4327 int inv_expr_id
= -1;
4329 /* The simple case first -- if we need to express value of the preserved
4330 original biv, the cost is 0. This also prevents us from counting the
4331 cost of increment twice -- once at this use and once in the cost of
4333 if (cand
->pos
== IP_ORIGINAL
4334 && cand
->incremented_at
== use
->stmt
)
4336 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4341 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4342 NULL
, &inv_expr_id
);
4344 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4347 return !infinite_cost_p (cost
);
4350 /* Determines cost of basing replacement of USE on CAND in an address. */
4353 determine_use_iv_cost_address (struct ivopts_data
*data
,
4354 struct iv_use
*use
, struct iv_cand
*cand
)
4358 int inv_expr_id
= -1;
4359 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4360 &can_autoinc
, &inv_expr_id
);
4362 if (cand
->ainc_use
== use
)
4365 cost
.cost
-= cand
->cost_step
;
4366 /* If we generated the candidate solely for exploiting autoincrement
4367 opportunities, and it turns out it can't be used, set the cost to
4368 infinity to make sure we ignore it. */
4369 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4370 cost
= infinite_cost
;
4372 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4375 return !infinite_cost_p (cost
);
4378 /* Computes value of candidate CAND at position AT in iteration NITER, and
4379 stores it to VAL. */
4382 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4385 aff_tree step
, delta
, nit
;
4386 struct iv
*iv
= cand
->iv
;
4387 tree type
= TREE_TYPE (iv
->base
);
4388 tree steptype
= type
;
4389 if (POINTER_TYPE_P (type
))
4390 steptype
= sizetype
;
4391 steptype
= unsigned_type_for (type
);
4393 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4394 aff_combination_convert (&step
, steptype
);
4395 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4396 aff_combination_convert (&nit
, steptype
);
4397 aff_combination_mult (&nit
, &step
, &delta
);
4398 if (stmt_after_increment (loop
, cand
, at
))
4399 aff_combination_add (&delta
, &step
);
4401 tree_to_aff_combination (iv
->base
, type
, val
);
4402 if (!POINTER_TYPE_P (type
))
4403 aff_combination_convert (val
, steptype
);
4404 aff_combination_add (val
, &delta
);
4407 /* Returns period of induction variable iv. */
4410 iv_period (struct iv
*iv
)
4412 tree step
= iv
->step
, period
, type
;
4415 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4417 type
= unsigned_type_for (TREE_TYPE (step
));
4418 /* Period of the iv is lcm (step, type_range)/step -1,
4419 i.e., N*type_range/step - 1. Since type range is power
4420 of two, N == (step >> num_of_ending_zeros_binary (step),
4421 so the final result is
4423 (type_range >> num_of_ending_zeros_binary (step)) - 1
4426 pow2div
= num_ending_zeros (step
);
4428 period
= build_low_bits_mask (type
,
4429 (TYPE_PRECISION (type
)
4430 - tree_to_uhwi (pow2div
)));
4435 /* Returns the comparison operator used when eliminating the iv USE. */
4437 static enum tree_code
4438 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4440 struct loop
*loop
= data
->current_loop
;
4444 ex_bb
= gimple_bb (use
->stmt
);
4445 exit
= EDGE_SUCC (ex_bb
, 0);
4446 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4447 exit
= EDGE_SUCC (ex_bb
, 1);
4449 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4453 strip_wrap_conserving_type_conversions (tree exp
)
4455 while (tree_ssa_useless_type_conversion (exp
)
4456 && (nowrap_type_p (TREE_TYPE (exp
))
4457 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4458 exp
= TREE_OPERAND (exp
, 0);
4462 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4463 check for an exact match. */
4466 expr_equal_p (tree e
, tree what
)
4469 enum tree_code code
;
4471 e
= strip_wrap_conserving_type_conversions (e
);
4472 what
= strip_wrap_conserving_type_conversions (what
);
4474 code
= TREE_CODE (what
);
4475 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4478 if (operand_equal_p (e
, what
, 0))
4481 if (TREE_CODE (e
) != SSA_NAME
)
4484 stmt
= SSA_NAME_DEF_STMT (e
);
4485 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4486 || gimple_assign_rhs_code (stmt
) != code
)
4489 switch (get_gimple_rhs_class (code
))
4491 case GIMPLE_BINARY_RHS
:
4492 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4496 case GIMPLE_UNARY_RHS
:
4497 case GIMPLE_SINGLE_RHS
:
4498 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4504 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4505 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4506 calculation is performed in non-wrapping type.
4508 TODO: More generally, we could test for the situation that
4509 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4510 This would require knowing the sign of OFFSET.
4512 Also, we only look for the first addition in the computation of BASE.
4513 More complex analysis would be better, but introducing it just for
4514 this optimization seems like an overkill. */
4517 difference_cannot_overflow_p (tree base
, tree offset
)
4519 enum tree_code code
;
4522 if (!nowrap_type_p (TREE_TYPE (base
)))
4525 base
= expand_simple_operations (base
);
4527 if (TREE_CODE (base
) == SSA_NAME
)
4529 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4531 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4534 code
= gimple_assign_rhs_code (stmt
);
4535 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4538 e1
= gimple_assign_rhs1 (stmt
);
4539 e2
= gimple_assign_rhs2 (stmt
);
4543 code
= TREE_CODE (base
);
4544 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4546 e1
= TREE_OPERAND (base
, 0);
4547 e2
= TREE_OPERAND (base
, 1);
4550 /* TODO: deeper inspection may be necessary to prove the equality. */
4554 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4555 case POINTER_PLUS_EXPR
:
4556 return expr_equal_p (e2
, offset
);
4563 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4564 comparison with CAND. NITER describes the number of iterations of
4565 the loops. If successful, the comparison in COMP_P is altered accordingly.
4567 We aim to handle the following situation:
4583 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4584 We aim to optimize this to
4592 while (p < p_0 - a + b);
4594 This preserves the correctness, since the pointer arithmetics does not
4595 overflow. More precisely:
4597 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4598 overflow in computing it or the values of p.
4599 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4600 overflow. To prove this, we use the fact that p_0 = base + a. */
4603 iv_elimination_compare_lt (struct ivopts_data
*data
,
4604 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4605 struct tree_niter_desc
*niter
)
4607 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4608 struct aff_tree nit
, tmpa
, tmpb
;
4609 enum tree_code comp
;
4612 /* We need to know that the candidate induction variable does not overflow.
4613 While more complex analysis may be used to prove this, for now just
4614 check that the variable appears in the original program and that it
4615 is computed in a type that guarantees no overflows. */
4616 cand_type
= TREE_TYPE (cand
->iv
->base
);
4617 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4620 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4621 the calculation of the BOUND could overflow, making the comparison
4623 if (!data
->loop_single_exit_p
)
4626 /* We need to be able to decide whether candidate is increasing or decreasing
4627 in order to choose the right comparison operator. */
4628 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4630 step
= int_cst_value (cand
->iv
->step
);
4632 /* Check that the number of iterations matches the expected pattern:
4633 a + 1 > b ? 0 : b - a - 1. */
4634 mbz
= niter
->may_be_zero
;
4635 if (TREE_CODE (mbz
) == GT_EXPR
)
4637 /* Handle a + 1 > b. */
4638 tree op0
= TREE_OPERAND (mbz
, 0);
4639 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4641 a
= TREE_OPERAND (op0
, 0);
4642 b
= TREE_OPERAND (mbz
, 1);
4647 else if (TREE_CODE (mbz
) == LT_EXPR
)
4649 tree op1
= TREE_OPERAND (mbz
, 1);
4651 /* Handle b < a + 1. */
4652 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4654 a
= TREE_OPERAND (op1
, 0);
4655 b
= TREE_OPERAND (mbz
, 0);
4663 /* Expected number of iterations is B - A - 1. Check that it matches
4664 the actual number, i.e., that B - A - NITER = 1. */
4665 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4666 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4667 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4668 aff_combination_scale (&nit
, -1);
4669 aff_combination_scale (&tmpa
, -1);
4670 aff_combination_add (&tmpb
, &tmpa
);
4671 aff_combination_add (&tmpb
, &nit
);
4672 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4675 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4677 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4679 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4680 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4683 /* Determine the new comparison operator. */
4684 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4685 if (*comp_p
== NE_EXPR
)
4687 else if (*comp_p
== EQ_EXPR
)
4688 *comp_p
= invert_tree_comparison (comp
, false);
4695 /* Check whether it is possible to express the condition in USE by comparison
4696 of candidate CAND. If so, store the value compared with to BOUND, and the
4697 comparison operator to COMP. */
4700 may_eliminate_iv (struct ivopts_data
*data
,
4701 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4702 enum tree_code
*comp
)
4707 struct loop
*loop
= data
->current_loop
;
4709 struct tree_niter_desc
*desc
= NULL
;
4711 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4714 /* For now works only for exits that dominate the loop latch.
4715 TODO: extend to other conditions inside loop body. */
4716 ex_bb
= gimple_bb (use
->stmt
);
4717 if (use
->stmt
!= last_stmt (ex_bb
)
4718 || gimple_code (use
->stmt
) != GIMPLE_COND
4719 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4722 exit
= EDGE_SUCC (ex_bb
, 0);
4723 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4724 exit
= EDGE_SUCC (ex_bb
, 1);
4725 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4728 desc
= niter_for_exit (data
, exit
);
4732 /* Determine whether we can use the variable to test the exit condition.
4733 This is the case iff the period of the induction variable is greater
4734 than the number of iterations for which the exit condition is true. */
4735 period
= iv_period (cand
->iv
);
4737 /* If the number of iterations is constant, compare against it directly. */
4738 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4740 /* See cand_value_at. */
4741 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4743 if (!tree_int_cst_lt (desc
->niter
, period
))
4748 if (tree_int_cst_lt (period
, desc
->niter
))
4753 /* If not, and if this is the only possible exit of the loop, see whether
4754 we can get a conservative estimate on the number of iterations of the
4755 entire loop and compare against that instead. */
4758 widest_int period_value
, max_niter
;
4760 max_niter
= desc
->max
;
4761 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4763 period_value
= wi::to_widest (period
);
4764 if (wi::gtu_p (max_niter
, period_value
))
4766 /* See if we can take advantage of inferred loop bound information. */
4767 if (data
->loop_single_exit_p
)
4769 if (!max_loop_iterations (loop
, &max_niter
))
4771 /* The loop bound is already adjusted by adding 1. */
4772 if (wi::gtu_p (max_niter
, period_value
))
4780 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4782 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4783 aff_combination_to_tree (&bnd
));
4784 *comp
= iv_elimination_compare (data
, use
);
4786 /* It is unlikely that computing the number of iterations using division
4787 would be more profitable than keeping the original induction variable. */
4788 if (expression_expensive_p (*bound
))
4791 /* Sometimes, it is possible to handle the situation that the number of
4792 iterations may be zero unless additional assumtions by using <
4793 instead of != in the exit condition.
4795 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4796 base the exit condition on it. However, that is often too
4798 if (!integer_zerop (desc
->may_be_zero
))
4799 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4804 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4805 be copied, if is is used in the loop body and DATA->body_includes_call. */
4808 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4810 tree sbound
= bound
;
4811 STRIP_NOPS (sbound
);
4813 if (TREE_CODE (sbound
) == SSA_NAME
4814 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4815 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4816 && data
->body_includes_call
)
4817 return COSTS_N_INSNS (1);
4822 /* Determines cost of basing replacement of USE on CAND in a condition. */
4825 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4826 struct iv_use
*use
, struct iv_cand
*cand
)
4828 tree bound
= NULL_TREE
;
4830 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4831 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4833 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4834 tree
*control_var
, *bound_cst
;
4835 enum tree_code comp
= ERROR_MARK
;
4837 /* Only consider real candidates. */
4840 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4845 /* Try iv elimination. */
4846 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4848 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4849 if (elim_cost
.cost
== 0)
4850 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4851 else if (TREE_CODE (bound
) == INTEGER_CST
)
4853 /* If we replace a loop condition 'i < n' with 'p < base + n',
4854 depends_on_elim will have 'base' and 'n' set, which implies
4855 that both 'base' and 'n' will be live during the loop. More likely,
4856 'base + n' will be loop invariant, resulting in only one live value
4857 during the loop. So in that case we clear depends_on_elim and set
4858 elim_inv_expr_id instead. */
4859 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4861 elim_inv_expr_id
= get_expr_id (data
, bound
);
4862 bitmap_clear (depends_on_elim
);
4864 /* The bound is a loop invariant, so it will be only computed
4866 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4869 elim_cost
= infinite_cost
;
4871 /* Try expressing the original giv. If it is compared with an invariant,
4872 note that we cannot get rid of it. */
4873 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4877 /* When the condition is a comparison of the candidate IV against
4878 zero, prefer this IV.
4880 TODO: The constant that we're subtracting from the cost should
4881 be target-dependent. This information should be added to the
4882 target costs for each backend. */
4883 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4884 && integer_zerop (*bound_cst
)
4885 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4886 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4887 elim_cost
.cost
-= 1;
4889 express_cost
= get_computation_cost (data
, use
, cand
, false,
4890 &depends_on_express
, NULL
,
4891 &express_inv_expr_id
);
4892 fd_ivopts_data
= data
;
4893 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4895 /* Count the cost of the original bound as well. */
4896 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4897 if (bound_cost
.cost
== 0)
4898 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4899 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4900 bound_cost
.cost
= 0;
4901 express_cost
.cost
+= bound_cost
.cost
;
4903 /* Choose the better approach, preferring the eliminated IV. */
4904 if (compare_costs (elim_cost
, express_cost
) <= 0)
4907 depends_on
= depends_on_elim
;
4908 depends_on_elim
= NULL
;
4909 inv_expr_id
= elim_inv_expr_id
;
4913 cost
= express_cost
;
4914 depends_on
= depends_on_express
;
4915 depends_on_express
= NULL
;
4918 inv_expr_id
= express_inv_expr_id
;
4921 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4923 if (depends_on_elim
)
4924 BITMAP_FREE (depends_on_elim
);
4925 if (depends_on_express
)
4926 BITMAP_FREE (depends_on_express
);
4928 return !infinite_cost_p (cost
);
4931 /* Determines cost of basing replacement of USE on CAND. Returns false
4932 if USE cannot be based on CAND. */
4935 determine_use_iv_cost (struct ivopts_data
*data
,
4936 struct iv_use
*use
, struct iv_cand
*cand
)
4940 case USE_NONLINEAR_EXPR
:
4941 return determine_use_iv_cost_generic (data
, use
, cand
);
4944 return determine_use_iv_cost_address (data
, use
, cand
);
4947 return determine_use_iv_cost_condition (data
, use
, cand
);
4954 /* Return true if get_computation_cost indicates that autoincrement is
4955 a possibility for the pair of USE and CAND, false otherwise. */
4958 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4959 struct iv_cand
*cand
)
4965 if (use
->type
!= USE_ADDRESS
)
4968 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4969 &can_autoinc
, NULL
);
4971 BITMAP_FREE (depends_on
);
4973 return !infinite_cost_p (cost
) && can_autoinc
;
4976 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4977 use that allows autoincrement, and set their AINC_USE if possible. */
4980 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4984 for (i
= 0; i
< n_iv_cands (data
); i
++)
4986 struct iv_cand
*cand
= iv_cand (data
, i
);
4987 struct iv_use
*closest_before
= NULL
;
4988 struct iv_use
*closest_after
= NULL
;
4989 if (cand
->pos
!= IP_ORIGINAL
)
4992 for (j
= 0; j
< n_iv_uses (data
); j
++)
4994 struct iv_use
*use
= iv_use (data
, j
);
4995 unsigned uid
= gimple_uid (use
->stmt
);
4997 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5000 if (uid
< gimple_uid (cand
->incremented_at
)
5001 && (closest_before
== NULL
5002 || uid
> gimple_uid (closest_before
->stmt
)))
5003 closest_before
= use
;
5005 if (uid
> gimple_uid (cand
->incremented_at
)
5006 && (closest_after
== NULL
5007 || uid
< gimple_uid (closest_after
->stmt
)))
5008 closest_after
= use
;
5011 if (closest_before
!= NULL
5012 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5013 cand
->ainc_use
= closest_before
;
5014 else if (closest_after
!= NULL
5015 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5016 cand
->ainc_use
= closest_after
;
5020 /* Finds the candidates for the induction variables. */
5023 find_iv_candidates (struct ivopts_data
*data
)
5025 /* Add commonly used ivs. */
5026 add_standard_iv_candidates (data
);
5028 /* Add old induction variables. */
5029 add_old_ivs_candidates (data
);
5031 /* Add induction variables derived from uses. */
5032 add_derived_ivs_candidates (data
);
5034 set_autoinc_for_original_candidates (data
);
5036 /* Record the important candidates. */
5037 record_important_candidates (data
);
5040 /* Determines costs of basing the use of the iv on an iv candidate. */
5043 determine_use_iv_costs (struct ivopts_data
*data
)
5047 struct iv_cand
*cand
;
5048 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5050 alloc_use_cost_map (data
);
5052 for (i
= 0; i
< n_iv_uses (data
); i
++)
5054 use
= iv_use (data
, i
);
5056 if (data
->consider_all_candidates
)
5058 for (j
= 0; j
< n_iv_cands (data
); j
++)
5060 cand
= iv_cand (data
, j
);
5061 determine_use_iv_cost (data
, use
, cand
);
5068 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5070 cand
= iv_cand (data
, j
);
5071 if (!determine_use_iv_cost (data
, use
, cand
))
5072 bitmap_set_bit (to_clear
, j
);
5075 /* Remove the candidates for that the cost is infinite from
5076 the list of related candidates. */
5077 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5078 bitmap_clear (to_clear
);
5082 BITMAP_FREE (to_clear
);
5084 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5086 fprintf (dump_file
, "Use-candidate costs:\n");
5088 for (i
= 0; i
< n_iv_uses (data
); i
++)
5090 use
= iv_use (data
, i
);
5092 fprintf (dump_file
, "Use %d:\n", i
);
5093 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5094 for (j
= 0; j
< use
->n_map_members
; j
++)
5096 if (!use
->cost_map
[j
].cand
5097 || infinite_cost_p (use
->cost_map
[j
].cost
))
5100 fprintf (dump_file
, " %d\t%d\t%d\t",
5101 use
->cost_map
[j
].cand
->id
,
5102 use
->cost_map
[j
].cost
.cost
,
5103 use
->cost_map
[j
].cost
.complexity
);
5104 if (use
->cost_map
[j
].depends_on
)
5105 bitmap_print (dump_file
,
5106 use
->cost_map
[j
].depends_on
, "","");
5107 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5108 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5109 fprintf (dump_file
, "\n");
5112 fprintf (dump_file
, "\n");
5114 fprintf (dump_file
, "\n");
5118 /* Determines cost of the candidate CAND. */
5121 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5123 comp_cost cost_base
;
5124 unsigned cost
, cost_step
;
5133 /* There are two costs associated with the candidate -- its increment
5134 and its initialization. The second is almost negligible for any loop
5135 that rolls enough, so we take it just very little into account. */
5137 base
= cand
->iv
->base
;
5138 cost_base
= force_var_cost (data
, base
, NULL
);
5139 /* It will be exceptional that the iv register happens to be initialized with
5140 the proper value at no cost. In general, there will at least be a regcopy
5142 if (cost_base
.cost
== 0)
5143 cost_base
.cost
= COSTS_N_INSNS (1);
5144 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5146 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5148 /* Prefer the original ivs unless we may gain something by replacing it.
5149 The reason is to make debugging simpler; so this is not relevant for
5150 artificial ivs created by other optimization passes. */
5151 if (cand
->pos
!= IP_ORIGINAL
5152 || !SSA_NAME_VAR (cand
->var_before
)
5153 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5156 /* Prefer not to insert statements into latch unless there are some
5157 already (so that we do not create unnecessary jumps). */
5158 if (cand
->pos
== IP_END
5159 && empty_block_p (ip_end_pos (data
->current_loop
)))
5163 cand
->cost_step
= cost_step
;
5166 /* Determines costs of computation of the candidates. */
5169 determine_iv_costs (struct ivopts_data
*data
)
5173 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5175 fprintf (dump_file
, "Candidate costs:\n");
5176 fprintf (dump_file
, " cand\tcost\n");
5179 for (i
= 0; i
< n_iv_cands (data
); i
++)
5181 struct iv_cand
*cand
= iv_cand (data
, i
);
5183 determine_iv_cost (data
, cand
);
5185 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5186 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5190 fprintf (dump_file
, "\n");
5193 /* Calculates cost for having SIZE induction variables. */
5196 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5198 /* We add size to the cost, so that we prefer eliminating ivs
5200 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5201 data
->body_includes_call
);
5204 /* For each size of the induction variable set determine the penalty. */
5207 determine_set_costs (struct ivopts_data
*data
)
5211 gimple_stmt_iterator psi
;
5213 struct loop
*loop
= data
->current_loop
;
5216 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5218 fprintf (dump_file
, "Global costs:\n");
5219 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5220 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5221 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5222 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5226 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5228 phi
= gsi_stmt (psi
);
5229 op
= PHI_RESULT (phi
);
5231 if (virtual_operand_p (op
))
5234 if (get_iv (data
, op
))
5240 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5242 struct version_info
*info
= ver_info (data
, j
);
5244 if (info
->inv_id
&& info
->has_nonlin_use
)
5248 data
->regs_used
= n
;
5249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5250 fprintf (dump_file
, " regs_used %d\n", n
);
5252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5254 fprintf (dump_file
, " cost for size:\n");
5255 fprintf (dump_file
, " ivs\tcost\n");
5256 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5257 fprintf (dump_file
, " %d\t%d\n", j
,
5258 ivopts_global_cost_for_size (data
, j
));
5259 fprintf (dump_file
, "\n");
5263 /* Returns true if A is a cheaper cost pair than B. */
5266 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5276 cmp
= compare_costs (a
->cost
, b
->cost
);
5283 /* In case the costs are the same, prefer the cheaper candidate. */
5284 if (a
->cand
->cost
< b
->cand
->cost
)
5291 /* Returns candidate by that USE is expressed in IVS. */
5293 static struct cost_pair
*
5294 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5296 return ivs
->cand_for_use
[use
->id
];
5299 /* Computes the cost field of IVS structure. */
5302 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5304 comp_cost cost
= ivs
->cand_use_cost
;
5306 cost
.cost
+= ivs
->cand_cost
;
5308 cost
.cost
+= ivopts_global_cost_for_size (data
,
5309 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5314 /* Remove invariants in set INVS to set IVS. */
5317 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5325 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5327 ivs
->n_invariant_uses
[iid
]--;
5328 if (ivs
->n_invariant_uses
[iid
] == 0)
5333 /* Set USE not to be expressed by any candidate in IVS. */
5336 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5339 unsigned uid
= use
->id
, cid
;
5340 struct cost_pair
*cp
;
5342 cp
= ivs
->cand_for_use
[uid
];
5348 ivs
->cand_for_use
[uid
] = NULL
;
5349 ivs
->n_cand_uses
[cid
]--;
5351 if (ivs
->n_cand_uses
[cid
] == 0)
5353 bitmap_clear_bit (ivs
->cands
, cid
);
5354 /* Do not count the pseudocandidates. */
5358 ivs
->cand_cost
-= cp
->cand
->cost
;
5360 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5363 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5365 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5367 if (cp
->inv_expr_id
!= -1)
5369 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5370 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5371 ivs
->num_used_inv_expr
--;
5373 iv_ca_recount_cost (data
, ivs
);
5376 /* Add invariants in set INVS to set IVS. */
5379 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5387 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5389 ivs
->n_invariant_uses
[iid
]++;
5390 if (ivs
->n_invariant_uses
[iid
] == 1)
5395 /* Set cost pair for USE in set IVS to CP. */
5398 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5399 struct iv_use
*use
, struct cost_pair
*cp
)
5401 unsigned uid
= use
->id
, cid
;
5403 if (ivs
->cand_for_use
[uid
] == cp
)
5406 if (ivs
->cand_for_use
[uid
])
5407 iv_ca_set_no_cp (data
, ivs
, use
);
5414 ivs
->cand_for_use
[uid
] = cp
;
5415 ivs
->n_cand_uses
[cid
]++;
5416 if (ivs
->n_cand_uses
[cid
] == 1)
5418 bitmap_set_bit (ivs
->cands
, cid
);
5419 /* Do not count the pseudocandidates. */
5423 ivs
->cand_cost
+= cp
->cand
->cost
;
5425 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5428 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5429 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5431 if (cp
->inv_expr_id
!= -1)
5433 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5434 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5435 ivs
->num_used_inv_expr
++;
5437 iv_ca_recount_cost (data
, ivs
);
5441 /* Extend set IVS by expressing USE by some of the candidates in it
5442 if possible. All important candidates will be considered
5443 if IMPORTANT_CANDIDATES is true. */
5446 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5447 struct iv_use
*use
, bool important_candidates
)
5449 struct cost_pair
*best_cp
= NULL
, *cp
;
5454 gcc_assert (ivs
->upto
>= use
->id
);
5456 if (ivs
->upto
== use
->id
)
5462 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5463 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5465 struct iv_cand
*cand
= iv_cand (data
, i
);
5467 cp
= get_use_iv_cost (data
, use
, cand
);
5469 if (cheaper_cost_pair (cp
, best_cp
))
5473 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5476 /* Get cost for assignment IVS. */
5479 iv_ca_cost (struct iv_ca
*ivs
)
5481 /* This was a conditional expression but it triggered a bug in
5484 return infinite_cost
;
5489 /* Returns true if all dependences of CP are among invariants in IVS. */
5492 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5497 if (!cp
->depends_on
)
5500 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5502 if (ivs
->n_invariant_uses
[i
] == 0)
5509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5510 it before NEXT_CHANGE. */
5512 static struct iv_ca_delta
*
5513 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5514 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5516 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5519 change
->old_cp
= old_cp
;
5520 change
->new_cp
= new_cp
;
5521 change
->next_change
= next_change
;
5526 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5529 static struct iv_ca_delta
*
5530 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5532 struct iv_ca_delta
*last
;
5540 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5542 last
->next_change
= l2
;
5547 /* Reverse the list of changes DELTA, forming the inverse to it. */
5549 static struct iv_ca_delta
*
5550 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5552 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5553 struct cost_pair
*tmp
;
5555 for (act
= delta
; act
; act
= next
)
5557 next
= act
->next_change
;
5558 act
->next_change
= prev
;
5562 act
->old_cp
= act
->new_cp
;
5569 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5570 reverted instead. */
5573 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5574 struct iv_ca_delta
*delta
, bool forward
)
5576 struct cost_pair
*from
, *to
;
5577 struct iv_ca_delta
*act
;
5580 delta
= iv_ca_delta_reverse (delta
);
5582 for (act
= delta
; act
; act
= act
->next_change
)
5586 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5587 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5591 iv_ca_delta_reverse (delta
);
5594 /* Returns true if CAND is used in IVS. */
5597 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5599 return ivs
->n_cand_uses
[cand
->id
] > 0;
5602 /* Returns number of induction variable candidates in the set IVS. */
5605 iv_ca_n_cands (struct iv_ca
*ivs
)
5607 return ivs
->n_cands
;
5610 /* Free the list of changes DELTA. */
5613 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5615 struct iv_ca_delta
*act
, *next
;
5617 for (act
= *delta
; act
; act
= next
)
5619 next
= act
->next_change
;
5626 /* Allocates new iv candidates assignment. */
5628 static struct iv_ca
*
5629 iv_ca_new (struct ivopts_data
*data
)
5631 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5635 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5636 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5637 nw
->cands
= BITMAP_ALLOC (NULL
);
5640 nw
->cand_use_cost
= no_cost
;
5642 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5644 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5645 nw
->num_used_inv_expr
= 0;
5650 /* Free memory occupied by the set IVS. */
5653 iv_ca_free (struct iv_ca
**ivs
)
5655 free ((*ivs
)->cand_for_use
);
5656 free ((*ivs
)->n_cand_uses
);
5657 BITMAP_FREE ((*ivs
)->cands
);
5658 free ((*ivs
)->n_invariant_uses
);
5659 free ((*ivs
)->used_inv_expr
);
5664 /* Dumps IVS to FILE. */
5667 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5669 const char *pref
= " invariants ";
5671 comp_cost cost
= iv_ca_cost (ivs
);
5673 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5674 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5675 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5676 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5678 for (i
= 0; i
< ivs
->upto
; i
++)
5680 struct iv_use
*use
= iv_use (data
, i
);
5681 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5683 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5684 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5686 fprintf (file
, " use:%d --> ??\n", use
->id
);
5689 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5690 if (ivs
->n_invariant_uses
[i
])
5692 fprintf (file
, "%s%d", pref
, i
);
5695 fprintf (file
, "\n\n");
5698 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5699 new set, and store differences in DELTA. Number of induction variables
5700 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5701 the function will try to find a solution with mimimal iv candidates. */
5704 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5705 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5706 unsigned *n_ivs
, bool min_ncand
)
5711 struct cost_pair
*old_cp
, *new_cp
;
5714 for (i
= 0; i
< ivs
->upto
; i
++)
5716 use
= iv_use (data
, i
);
5717 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5720 && old_cp
->cand
== cand
)
5723 new_cp
= get_use_iv_cost (data
, use
, cand
);
5727 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5730 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5733 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5736 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5737 cost
= iv_ca_cost (ivs
);
5739 *n_ivs
= iv_ca_n_cands (ivs
);
5740 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5745 /* Try narrowing set IVS by removing CAND. Return the cost of
5746 the new set and store the differences in DELTA. START is
5747 the candidate with which we start narrowing. */
5750 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5751 struct iv_cand
*cand
, struct iv_cand
*start
,
5752 struct iv_ca_delta
**delta
)
5756 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5758 struct iv_cand
*cnd
;
5759 comp_cost cost
, best_cost
, acost
;
5762 for (i
= 0; i
< n_iv_uses (data
); i
++)
5764 use
= iv_use (data
, i
);
5766 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5767 if (old_cp
->cand
!= cand
)
5770 best_cost
= iv_ca_cost (ivs
);
5771 /* Start narrowing with START. */
5772 new_cp
= get_use_iv_cost (data
, use
, start
);
5774 if (data
->consider_all_candidates
)
5776 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5778 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5781 cnd
= iv_cand (data
, ci
);
5783 cp
= get_use_iv_cost (data
, use
, cnd
);
5787 iv_ca_set_cp (data
, ivs
, use
, cp
);
5788 acost
= iv_ca_cost (ivs
);
5790 if (compare_costs (acost
, best_cost
) < 0)
5799 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5801 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5804 cnd
= iv_cand (data
, ci
);
5806 cp
= get_use_iv_cost (data
, use
, cnd
);
5810 iv_ca_set_cp (data
, ivs
, use
, cp
);
5811 acost
= iv_ca_cost (ivs
);
5813 if (compare_costs (acost
, best_cost
) < 0)
5820 /* Restore to old cp for use. */
5821 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5825 iv_ca_delta_free (delta
);
5826 return infinite_cost
;
5829 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5832 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5833 cost
= iv_ca_cost (ivs
);
5834 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5839 /* Try optimizing the set of candidates IVS by removing candidates different
5840 from to EXCEPT_CAND from it. Return cost of the new set, and store
5841 differences in DELTA. */
5844 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5845 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5848 struct iv_ca_delta
*act_delta
, *best_delta
;
5850 comp_cost best_cost
, acost
;
5851 struct iv_cand
*cand
;
5854 best_cost
= iv_ca_cost (ivs
);
5856 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5858 cand
= iv_cand (data
, i
);
5860 if (cand
== except_cand
)
5863 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5865 if (compare_costs (acost
, best_cost
) < 0)
5868 iv_ca_delta_free (&best_delta
);
5869 best_delta
= act_delta
;
5872 iv_ca_delta_free (&act_delta
);
5881 /* Recurse to possibly remove other unnecessary ivs. */
5882 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5883 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5884 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5885 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5889 /* Tries to extend the sets IVS in the best possible way in order
5890 to express the USE. If ORIGINALP is true, prefer candidates from
5891 the original set of IVs, otherwise favor important candidates not
5892 based on any memory object. */
5895 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5896 struct iv_use
*use
, bool originalp
)
5898 comp_cost best_cost
, act_cost
;
5901 struct iv_cand
*cand
;
5902 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5903 struct cost_pair
*cp
;
5905 iv_ca_add_use (data
, ivs
, use
, false);
5906 best_cost
= iv_ca_cost (ivs
);
5908 cp
= iv_ca_cand_for_use (ivs
, use
);
5913 iv_ca_add_use (data
, ivs
, use
, true);
5914 best_cost
= iv_ca_cost (ivs
);
5915 cp
= iv_ca_cand_for_use (ivs
, use
);
5919 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5920 iv_ca_set_no_cp (data
, ivs
, use
);
5923 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5924 first try important candidates not based on any memory object. Only if
5925 this fails, try the specific ones. Rationale -- in loops with many
5926 variables the best choice often is to use just one generic biv. If we
5927 added here many ivs specific to the uses, the optimization algorithm later
5928 would be likely to get stuck in a local minimum, thus causing us to create
5929 too many ivs. The approach from few ivs to more seems more likely to be
5930 successful -- starting from few ivs, replacing an expensive use by a
5931 specific iv should always be a win. */
5932 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5934 cand
= iv_cand (data
, i
);
5936 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5939 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5942 if (iv_ca_cand_used_p (ivs
, cand
))
5945 cp
= get_use_iv_cost (data
, use
, cand
);
5949 iv_ca_set_cp (data
, ivs
, use
, cp
);
5950 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5952 iv_ca_set_no_cp (data
, ivs
, use
);
5953 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5955 if (compare_costs (act_cost
, best_cost
) < 0)
5957 best_cost
= act_cost
;
5959 iv_ca_delta_free (&best_delta
);
5960 best_delta
= act_delta
;
5963 iv_ca_delta_free (&act_delta
);
5966 if (infinite_cost_p (best_cost
))
5968 for (i
= 0; i
< use
->n_map_members
; i
++)
5970 cp
= use
->cost_map
+ i
;
5975 /* Already tried this. */
5976 if (cand
->important
)
5978 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5980 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5984 if (iv_ca_cand_used_p (ivs
, cand
))
5988 iv_ca_set_cp (data
, ivs
, use
, cp
);
5989 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5990 iv_ca_set_no_cp (data
, ivs
, use
);
5991 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5994 if (compare_costs (act_cost
, best_cost
) < 0)
5996 best_cost
= act_cost
;
5999 iv_ca_delta_free (&best_delta
);
6000 best_delta
= act_delta
;
6003 iv_ca_delta_free (&act_delta
);
6007 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6008 iv_ca_delta_free (&best_delta
);
6010 return !infinite_cost_p (best_cost
);
6013 /* Finds an initial assignment of candidates to uses. */
6015 static struct iv_ca
*
6016 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6018 struct iv_ca
*ivs
= iv_ca_new (data
);
6021 for (i
= 0; i
< n_iv_uses (data
); i
++)
6022 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6031 /* Tries to improve set of induction variables IVS. */
6034 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6037 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6038 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6039 struct iv_cand
*cand
;
6041 /* Try extending the set of induction variables by one. */
6042 for (i
= 0; i
< n_iv_cands (data
); i
++)
6044 cand
= iv_cand (data
, i
);
6046 if (iv_ca_cand_used_p (ivs
, cand
))
6049 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6053 /* If we successfully added the candidate and the set is small enough,
6054 try optimizing it by removing other candidates. */
6055 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6057 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6058 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6059 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6060 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6063 if (compare_costs (acost
, best_cost
) < 0)
6066 iv_ca_delta_free (&best_delta
);
6067 best_delta
= act_delta
;
6070 iv_ca_delta_free (&act_delta
);
6075 /* Try removing the candidates from the set instead. */
6076 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6078 /* Nothing more we can do. */
6083 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6084 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6085 iv_ca_delta_free (&best_delta
);
6089 /* Attempts to find the optimal set of induction variables. We do simple
6090 greedy heuristic -- we try to replace at most one candidate in the selected
6091 solution and remove the unused ivs while this improves the cost. */
6093 static struct iv_ca
*
6094 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6098 /* Get the initial solution. */
6099 set
= get_initial_solution (data
, originalp
);
6102 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6103 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6109 fprintf (dump_file
, "Initial set of candidates:\n");
6110 iv_ca_dump (data
, dump_file
, set
);
6113 while (try_improve_iv_set (data
, set
))
6115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6117 fprintf (dump_file
, "Improved to:\n");
6118 iv_ca_dump (data
, dump_file
, set
);
6125 static struct iv_ca
*
6126 find_optimal_iv_set (struct ivopts_data
*data
)
6129 struct iv_ca
*set
, *origset
;
6131 comp_cost cost
, origcost
;
6133 /* Determine the cost based on a strategy that starts with original IVs,
6134 and try again using a strategy that prefers candidates not based
6136 origset
= find_optimal_iv_set_1 (data
, true);
6137 set
= find_optimal_iv_set_1 (data
, false);
6139 if (!origset
&& !set
)
6142 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6143 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6147 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6148 origcost
.cost
, origcost
.complexity
);
6149 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6150 cost
.cost
, cost
.complexity
);
6153 /* Choose the one with the best cost. */
6154 if (compare_costs (origcost
, cost
) <= 0)
6161 iv_ca_free (&origset
);
6163 for (i
= 0; i
< n_iv_uses (data
); i
++)
6165 use
= iv_use (data
, i
);
6166 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6172 /* Creates a new induction variable corresponding to CAND. */
6175 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6177 gimple_stmt_iterator incr_pos
;
6187 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6191 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6199 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6203 /* Mark that the iv is preserved. */
6204 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6205 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6207 /* Rewrite the increment so that it uses var_before directly. */
6208 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6212 gimple_add_tmp_var (cand
->var_before
);
6214 base
= unshare_expr (cand
->iv
->base
);
6216 create_iv (base
, unshare_expr (cand
->iv
->step
),
6217 cand
->var_before
, data
->current_loop
,
6218 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6221 /* Creates new induction variables described in SET. */
6224 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6227 struct iv_cand
*cand
;
6230 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6232 cand
= iv_cand (data
, i
);
6233 create_new_iv (data
, cand
);
6236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6238 fprintf (dump_file
, "\nSelected IV set: \n");
6239 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6241 cand
= iv_cand (data
, i
);
6242 dump_cand (dump_file
, cand
);
6244 fprintf (dump_file
, "\n");
6248 /* Rewrites USE (definition of iv used in a nonlinear expression)
6249 using candidate CAND. */
6252 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6253 struct iv_use
*use
, struct iv_cand
*cand
)
6258 gimple_stmt_iterator bsi
;
6260 /* An important special case -- if we are asked to express value of
6261 the original iv by itself, just exit; there is no need to
6262 introduce a new computation (that might also need casting the
6263 variable to unsigned and back). */
6264 if (cand
->pos
== IP_ORIGINAL
6265 && cand
->incremented_at
== use
->stmt
)
6267 enum tree_code stmt_code
;
6269 gcc_assert (is_gimple_assign (use
->stmt
));
6270 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6272 /* Check whether we may leave the computation unchanged.
6273 This is the case only if it does not rely on other
6274 computations in the loop -- otherwise, the computation
6275 we rely upon may be removed in remove_unused_ivs,
6276 thus leading to ICE. */
6277 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6278 if (stmt_code
== PLUS_EXPR
6279 || stmt_code
== MINUS_EXPR
6280 || stmt_code
== POINTER_PLUS_EXPR
)
6282 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6283 op
= gimple_assign_rhs2 (use
->stmt
);
6284 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6285 op
= gimple_assign_rhs1 (use
->stmt
);
6292 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6296 comp
= get_computation (data
->current_loop
, use
, cand
);
6297 gcc_assert (comp
!= NULL_TREE
);
6299 switch (gimple_code (use
->stmt
))
6302 tgt
= PHI_RESULT (use
->stmt
);
6304 /* If we should keep the biv, do not replace it. */
6305 if (name_info (data
, tgt
)->preserve_biv
)
6308 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6312 tgt
= gimple_assign_lhs (use
->stmt
);
6313 bsi
= gsi_for_stmt (use
->stmt
);
6320 if (!valid_gimple_rhs_p (comp
)
6321 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6322 /* We can't allow re-allocating the stmt as it might be pointed
6324 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6325 >= gimple_num_ops (gsi_stmt (bsi
)))))
6327 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6328 true, GSI_SAME_STMT
);
6329 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6331 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6332 /* As this isn't a plain copy we have to reset alignment
6334 if (SSA_NAME_PTR_INFO (comp
))
6335 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6339 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6341 ass
= gimple_build_assign (tgt
, comp
);
6342 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6344 bsi
= gsi_for_stmt (use
->stmt
);
6345 remove_phi_node (&bsi
, false);
6349 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6350 use
->stmt
= gsi_stmt (bsi
);
6354 /* Performs a peephole optimization to reorder the iv update statement with
6355 a mem ref to enable instruction combining in later phases. The mem ref uses
6356 the iv value before the update, so the reordering transformation requires
6357 adjustment of the offset. CAND is the selected IV_CAND.
6361 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6369 directly propagating t over to (1) will introduce overlapping live range
6370 thus increase register pressure. This peephole transform it into:
6374 t = MEM_REF (base, iv2, 8, 8);
6381 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6384 gimple iv_update
, stmt
;
6386 gimple_stmt_iterator gsi
, gsi_iv
;
6388 if (cand
->pos
!= IP_NORMAL
)
6391 var_after
= cand
->var_after
;
6392 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6394 bb
= gimple_bb (iv_update
);
6395 gsi
= gsi_last_nondebug_bb (bb
);
6396 stmt
= gsi_stmt (gsi
);
6398 /* Only handle conditional statement for now. */
6399 if (gimple_code (stmt
) != GIMPLE_COND
)
6402 gsi_prev_nondebug (&gsi
);
6403 stmt
= gsi_stmt (gsi
);
6404 if (stmt
!= iv_update
)
6407 gsi_prev_nondebug (&gsi
);
6408 if (gsi_end_p (gsi
))
6411 stmt
= gsi_stmt (gsi
);
6412 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6415 if (stmt
!= use
->stmt
)
6418 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6421 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6423 fprintf (dump_file
, "Reordering \n");
6424 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6425 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6426 fprintf (dump_file
, "\n");
6429 gsi
= gsi_for_stmt (use
->stmt
);
6430 gsi_iv
= gsi_for_stmt (iv_update
);
6431 gsi_move_before (&gsi_iv
, &gsi
);
6433 cand
->pos
= IP_BEFORE_USE
;
6434 cand
->incremented_at
= use
->stmt
;
6437 /* Rewrites USE (address that is an iv) using candidate CAND. */
6440 rewrite_use_address (struct ivopts_data
*data
,
6441 struct iv_use
*use
, struct iv_cand
*cand
)
6444 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6445 tree base_hint
= NULL_TREE
;
6449 adjust_iv_update_pos (cand
, use
);
6450 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6452 unshare_aff_combination (&aff
);
6454 /* To avoid undefined overflow problems, all IV candidates use unsigned
6455 integer types. The drawback is that this makes it impossible for
6456 create_mem_ref to distinguish an IV that is based on a memory object
6457 from one that represents simply an offset.
6459 To work around this problem, we pass a hint to create_mem_ref that
6460 indicates which variable (if any) in aff is an IV based on a memory
6461 object. Note that we only consider the candidate. If this is not
6462 based on an object, the base of the reference is in some subexpression
6463 of the use -- but these will use pointer types, so they are recognized
6464 by the create_mem_ref heuristics anyway. */
6465 if (cand
->iv
->base_object
)
6466 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6468 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6469 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6470 reference_alias_ptr_type (*use
->op_p
),
6471 iv
, base_hint
, data
->speed
);
6472 copy_ref_info (ref
, *use
->op_p
);
6476 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6480 rewrite_use_compare (struct ivopts_data
*data
,
6481 struct iv_use
*use
, struct iv_cand
*cand
)
6483 tree comp
, *var_p
, op
, bound
;
6484 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6485 enum tree_code compare
;
6486 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6492 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6493 tree var_type
= TREE_TYPE (var
);
6496 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6498 fprintf (dump_file
, "Replacing exit test: ");
6499 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6502 bound
= unshare_expr (fold_convert (var_type
, bound
));
6503 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6505 gsi_insert_seq_on_edge_immediate (
6506 loop_preheader_edge (data
->current_loop
),
6509 gimple_cond_set_lhs (use
->stmt
, var
);
6510 gimple_cond_set_code (use
->stmt
, compare
);
6511 gimple_cond_set_rhs (use
->stmt
, op
);
6515 /* The induction variable elimination failed; just express the original
6517 comp
= get_computation (data
->current_loop
, use
, cand
);
6518 gcc_assert (comp
!= NULL_TREE
);
6520 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6523 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6524 true, GSI_SAME_STMT
);
6527 /* Rewrites USE using candidate CAND. */
6530 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6534 case USE_NONLINEAR_EXPR
:
6535 rewrite_use_nonlinear_expr (data
, use
, cand
);
6539 rewrite_use_address (data
, use
, cand
);
6543 rewrite_use_compare (data
, use
, cand
);
6550 update_stmt (use
->stmt
);
6553 /* Rewrite the uses using the selected induction variables. */
6556 rewrite_uses (struct ivopts_data
*data
)
6559 struct iv_cand
*cand
;
6562 for (i
= 0; i
< n_iv_uses (data
); i
++)
6564 use
= iv_use (data
, i
);
6565 cand
= use
->selected
;
6568 rewrite_use (data
, use
, cand
);
6572 /* Removes the ivs that are not used after rewriting. */
6575 remove_unused_ivs (struct ivopts_data
*data
)
6579 bitmap toremove
= BITMAP_ALLOC (NULL
);
6581 /* Figure out an order in which to release SSA DEFs so that we don't
6582 release something that we'd have to propagate into a debug stmt
6584 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6586 struct version_info
*info
;
6588 info
= ver_info (data
, j
);
6590 && !integer_zerop (info
->iv
->step
)
6592 && !info
->iv
->have_use_for
6593 && !info
->preserve_biv
)
6595 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6597 tree def
= info
->iv
->ssa_name
;
6599 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6601 imm_use_iterator imm_iter
;
6602 use_operand_p use_p
;
6606 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6608 if (!gimple_debug_bind_p (stmt
))
6611 /* We just want to determine whether to do nothing
6612 (count == 0), to substitute the computed
6613 expression into a single use of the SSA DEF by
6614 itself (count == 1), or to use a debug temp
6615 because the SSA DEF is used multiple times or as
6616 part of a larger expression (count > 1). */
6618 if (gimple_debug_bind_get_value (stmt
) != def
)
6622 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6628 struct iv_use dummy_use
;
6629 struct iv_cand
*best_cand
= NULL
, *cand
;
6630 unsigned i
, best_pref
= 0, cand_pref
;
6632 memset (&dummy_use
, 0, sizeof (dummy_use
));
6633 dummy_use
.iv
= info
->iv
;
6634 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6636 cand
= iv_use (data
, i
)->selected
;
6637 if (cand
== best_cand
)
6639 cand_pref
= operand_equal_p (cand
->iv
->step
,
6643 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6644 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6647 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6649 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6652 best_pref
= cand_pref
;
6659 tree comp
= get_computation_at (data
->current_loop
,
6660 &dummy_use
, best_cand
,
6661 SSA_NAME_DEF_STMT (def
));
6667 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6668 DECL_ARTIFICIAL (vexpr
) = 1;
6669 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6670 if (SSA_NAME_VAR (def
))
6671 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6673 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6674 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6675 gimple_stmt_iterator gsi
;
6677 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6678 gsi
= gsi_after_labels (gimple_bb
6679 (SSA_NAME_DEF_STMT (def
)));
6681 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6683 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6687 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6689 if (!gimple_debug_bind_p (stmt
))
6692 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6693 SET_USE (use_p
, comp
);
6701 release_defs_bitset (toremove
);
6703 BITMAP_FREE (toremove
);
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707 for pointer_map_traverse. */
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6711 void *data ATTRIBUTE_UNUSED
)
6713 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6719 /* Frees data allocated by the optimization of a single loop. */
6722 free_loop_data (struct ivopts_data
*data
)
6730 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6731 pointer_map_destroy (data
->niters
);
6732 data
->niters
= NULL
;
6735 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6737 struct version_info
*info
;
6739 info
= ver_info (data
, i
);
6742 info
->has_nonlin_use
= false;
6743 info
->preserve_biv
= false;
6746 bitmap_clear (data
->relevant
);
6747 bitmap_clear (data
->important_candidates
);
6749 for (i
= 0; i
< n_iv_uses (data
); i
++)
6751 struct iv_use
*use
= iv_use (data
, i
);
6754 BITMAP_FREE (use
->related_cands
);
6755 for (j
= 0; j
< use
->n_map_members
; j
++)
6756 if (use
->cost_map
[j
].depends_on
)
6757 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6758 free (use
->cost_map
);
6761 data
->iv_uses
.truncate (0);
6763 for (i
= 0; i
< n_iv_cands (data
); i
++)
6765 struct iv_cand
*cand
= iv_cand (data
, i
);
6768 if (cand
->depends_on
)
6769 BITMAP_FREE (cand
->depends_on
);
6772 data
->iv_candidates
.truncate (0);
6774 if (data
->version_info_size
< num_ssa_names
)
6776 data
->version_info_size
= 2 * num_ssa_names
;
6777 free (data
->version_info
);
6778 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6781 data
->max_inv_id
= 0;
6783 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6784 SET_DECL_RTL (obj
, NULL_RTX
);
6786 decl_rtl_to_reset
.truncate (0);
6788 data
->inv_expr_tab
->empty ();
6789 data
->inv_expr_id
= 0;
6792 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6798 free_loop_data (data
);
6799 free (data
->version_info
);
6800 BITMAP_FREE (data
->relevant
);
6801 BITMAP_FREE (data
->important_candidates
);
6803 decl_rtl_to_reset
.release ();
6804 data
->iv_uses
.release ();
6805 data
->iv_candidates
.release ();
6806 delete data
->inv_expr_tab
;
6807 data
->inv_expr_tab
= NULL
;
6810 /* Returns true if the loop body BODY includes any function calls. */
6813 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6815 gimple_stmt_iterator gsi
;
6818 for (i
= 0; i
< num_nodes
; i
++)
6819 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6821 gimple stmt
= gsi_stmt (gsi
);
6822 if (is_gimple_call (stmt
)
6823 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6829 /* Optimizes the LOOP. Returns true if anything changed. */
6832 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6834 bool changed
= false;
6835 struct iv_ca
*iv_ca
;
6836 edge exit
= single_dom_exit (loop
);
6839 gcc_assert (!data
->niters
);
6840 data
->current_loop
= loop
;
6841 data
->speed
= optimize_loop_for_speed_p (loop
);
6843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6845 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6849 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6850 exit
->src
->index
, exit
->dest
->index
);
6851 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6852 fprintf (dump_file
, "\n");
6855 fprintf (dump_file
, "\n");
6858 body
= get_loop_body (loop
);
6859 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6860 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6863 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6865 /* For each ssa name determines whether it behaves as an induction variable
6867 if (!find_induction_variables (data
))
6870 /* Finds interesting uses (item 1). */
6871 find_interesting_uses (data
);
6872 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6875 /* Finds candidates for the induction variables (item 2). */
6876 find_iv_candidates (data
);
6878 /* Calculates the costs (item 3, part 1). */
6879 determine_iv_costs (data
);
6880 determine_use_iv_costs (data
);
6881 determine_set_costs (data
);
6883 /* Find the optimal set of induction variables (item 3, part 2). */
6884 iv_ca
= find_optimal_iv_set (data
);
6889 /* Create the new induction variables (item 4, part 1). */
6890 create_new_ivs (data
, iv_ca
);
6891 iv_ca_free (&iv_ca
);
6893 /* Rewrite the uses (item 4, part 2). */
6894 rewrite_uses (data
);
6896 /* Remove the ivs that are unused after rewriting. */
6897 remove_unused_ivs (data
);
6899 /* We have changed the structure of induction variables; it might happen
6900 that definitions in the scev database refer to some of them that were
6905 free_loop_data (data
);
6910 /* Main entry point. Optimizes induction variables in loops. */
6913 tree_ssa_iv_optimize (void)
6916 struct ivopts_data data
;
6918 tree_ssa_iv_optimize_init (&data
);
6920 /* Optimize the loops starting with the innermost ones. */
6921 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
6923 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6924 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6926 tree_ssa_iv_optimize_loop (&data
, loop
);
6929 tree_ssa_iv_optimize_finalize (&data
);