1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
74 #include "gimple-iterator.h"
75 #include "gimplify-me.h"
76 #include "gimple-ssa.h"
79 #include "tree-phinodes.h"
80 #include "ssa-iterators.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-ivopts.h"
83 #include "tree-ssa-loop-manip.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
89 #include "tree-pass.h"
91 #include "insn-config.h"
92 #include "pointer-set.h"
93 #include "hash-table.h"
94 #include "tree-chrec.h"
95 #include "tree-scalar-evolution.h"
98 #include "langhooks.h"
99 #include "tree-affine.h"
101 #include "tree-inline.h"
102 #include "tree-ssa-propagate.h"
104 #include "tree-ssa-address.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
112 /* The infinite cost. */
113 #define INFTY 10000000
115 #define AVG_LOOP_NITER(LOOP) 5
117 /* Returns the expected number of loop iterations for LOOP.
118 The average trip count is computed from profile data if it
121 static inline HOST_WIDE_INT
122 avg_loop_niter (struct loop
*loop
)
124 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
126 return AVG_LOOP_NITER (loop
);
131 /* Representation of the induction variable. */
134 tree base
; /* Initial value of the iv. */
135 tree base_object
; /* A memory object to that the induction variable points. */
136 tree step
; /* Step of the iv (constant only). */
137 tree ssa_name
; /* The ssa name with the value. */
138 bool biv_p
; /* Is it a biv? */
139 bool have_use_for
; /* Do we already have a use for it? */
140 unsigned use_id
; /* The identifier in the use if it is the case. */
143 /* Per-ssa version information (induction variable descriptions, etc.). */
146 tree name
; /* The ssa name. */
147 struct iv
*iv
; /* Induction variable description. */
148 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
149 an expression that is not an induction variable. */
150 bool preserve_biv
; /* For the original biv, whether to preserve it. */
151 unsigned inv_id
; /* Id of an invariant. */
157 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
158 USE_ADDRESS
, /* Use in an address. */
159 USE_COMPARE
/* Use is a compare. */
162 /* Cost of a computation. */
165 int cost
; /* The runtime cost. */
166 unsigned complexity
; /* The estimate of the complexity of the code for
167 the computation (in no concrete units --
168 complexity field should be larger for more
169 complex expressions and addressing modes). */
172 static const comp_cost no_cost
= {0, 0};
173 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
175 /* The candidate - cost pair. */
178 struct iv_cand
*cand
; /* The candidate. */
179 comp_cost cost
; /* The cost. */
180 bitmap depends_on
; /* The list of invariants that have to be
182 tree value
; /* For final value elimination, the expression for
183 the final value of the iv. For iv elimination,
184 the new bound to compare with. */
185 enum tree_code comp
; /* For iv elimination, the comparison. */
186 int inv_expr_id
; /* Loop invariant expression id. */
192 unsigned id
; /* The id of the use. */
193 enum use_type type
; /* Type of the use. */
194 struct iv
*iv
; /* The induction variable it is based on. */
195 gimple stmt
; /* Statement in that it occurs. */
196 tree
*op_p
; /* The place where it occurs. */
197 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
200 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
201 struct cost_pair
*cost_map
;
202 /* The costs wrto the iv candidates. */
204 struct iv_cand
*selected
;
205 /* The selected candidate. */
208 /* The position where the iv is computed. */
211 IP_NORMAL
, /* At the end, just before the exit condition. */
212 IP_END
, /* At the end of the latch block. */
213 IP_BEFORE_USE
, /* Immediately before a specific use. */
214 IP_AFTER_USE
, /* Immediately after a specific use. */
215 IP_ORIGINAL
/* The original biv. */
218 /* The induction variable candidate. */
221 unsigned id
; /* The number of the candidate. */
222 bool important
; /* Whether this is an "important" candidate, i.e. such
223 that it should be considered by all uses. */
224 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
225 gimple incremented_at
;/* For original biv, the statement where it is
227 tree var_before
; /* The variable used for it before increment. */
228 tree var_after
; /* The variable used for it after increment. */
229 struct iv
*iv
; /* The value of the candidate. NULL for
230 "pseudocandidate" used to indicate the possibility
231 to replace the final value of an iv by direct
232 computation of the value. */
233 unsigned cost
; /* Cost of the candidate. */
234 unsigned cost_step
; /* Cost of the candidate's increment operation. */
235 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
236 where it is incremented. */
237 bitmap depends_on
; /* The list of invariants that are used in step of the
241 /* Loop invariant expression hashtable entry. */
242 struct iv_inv_expr_ent
249 /* The data used by the induction variable optimizations. */
251 typedef struct iv_use
*iv_use_p
;
253 typedef struct iv_cand
*iv_cand_p
;
255 /* Hashtable helpers. */
257 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
259 typedef iv_inv_expr_ent value_type
;
260 typedef iv_inv_expr_ent compare_type
;
261 static inline hashval_t
hash (const value_type
*);
262 static inline bool equal (const value_type
*, const compare_type
*);
265 /* Hash function for loop invariant expressions. */
268 iv_inv_expr_hasher::hash (const value_type
*expr
)
273 /* Hash table equality function for expressions. */
276 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
278 return expr1
->hash
== expr2
->hash
279 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
284 /* The currently optimized loop. */
285 struct loop
*current_loop
;
287 /* Numbers of iterations for all exits of the current loop. */
288 struct pointer_map_t
*niters
;
290 /* Number of registers used in it. */
293 /* The size of version_info array allocated. */
294 unsigned version_info_size
;
296 /* The array of information for the ssa names. */
297 struct version_info
*version_info
;
299 /* The hashtable of loop invariant expressions created
301 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
303 /* Loop invariant expression id. */
306 /* The bitmap of indices in version_info whose value was changed. */
309 /* The uses of induction variables. */
310 vec
<iv_use_p
> iv_uses
;
312 /* The candidates. */
313 vec
<iv_cand_p
> iv_candidates
;
315 /* A bitmap of important candidates. */
316 bitmap important_candidates
;
318 /* The maximum invariant id. */
321 /* Whether to consider just related and important candidates when replacing a
323 bool consider_all_candidates
;
325 /* Are we optimizing for speed? */
328 /* Whether the loop body includes any function calls. */
329 bool body_includes_call
;
331 /* Whether the loop body can only be exited via single exit. */
332 bool loop_single_exit_p
;
335 /* An assignment of iv candidates to uses. */
339 /* The number of uses covered by the assignment. */
342 /* Number of uses that cannot be expressed by the candidates in the set. */
345 /* Candidate assigned to a use, together with the related costs. */
346 struct cost_pair
**cand_for_use
;
348 /* Number of times each candidate is used. */
349 unsigned *n_cand_uses
;
351 /* The candidates used. */
354 /* The number of candidates in the set. */
357 /* Total number of registers needed. */
360 /* Total cost of expressing uses. */
361 comp_cost cand_use_cost
;
363 /* Total cost of candidates. */
366 /* Number of times each invariant is used. */
367 unsigned *n_invariant_uses
;
369 /* The array holding the number of uses of each loop
370 invariant expressions created by ivopt. */
371 unsigned *used_inv_expr
;
373 /* The number of created loop invariants. */
374 unsigned num_used_inv_expr
;
376 /* Total cost of the assignment. */
380 /* Difference of two iv candidate assignments. */
387 /* An old assignment (for rollback purposes). */
388 struct cost_pair
*old_cp
;
390 /* A new assignment. */
391 struct cost_pair
*new_cp
;
393 /* Next change in the list. */
394 struct iv_ca_delta
*next_change
;
397 /* Bound on number of candidates below that all candidates are considered. */
399 #define CONSIDER_ALL_CANDIDATES_BOUND \
400 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
403 optimizing such a loop would help, and it would take ages). */
405 #define MAX_CONSIDERED_USES \
406 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
408 /* If there are at most this number of ivs in the set, try removing unnecessary
409 ivs from the set always. */
411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
412 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
414 /* The list of trees for that the decl_rtl field must be reset is stored
417 static vec
<tree
> decl_rtl_to_reset
;
419 static comp_cost
force_expr_to_var_cost (tree
, bool);
421 /* Number of uses recorded in DATA. */
423 static inline unsigned
424 n_iv_uses (struct ivopts_data
*data
)
426 return data
->iv_uses
.length ();
429 /* Ith use recorded in DATA. */
431 static inline struct iv_use
*
432 iv_use (struct ivopts_data
*data
, unsigned i
)
434 return data
->iv_uses
[i
];
437 /* Number of candidates recorded in DATA. */
439 static inline unsigned
440 n_iv_cands (struct ivopts_data
*data
)
442 return data
->iv_candidates
.length ();
445 /* Ith candidate recorded in DATA. */
447 static inline struct iv_cand
*
448 iv_cand (struct ivopts_data
*data
, unsigned i
)
450 return data
->iv_candidates
[i
];
453 /* The single loop exit if it dominates the latch, NULL otherwise. */
456 single_dom_exit (struct loop
*loop
)
458 edge exit
= single_exit (loop
);
463 if (!just_once_each_iteration_p (loop
, exit
->src
))
469 /* Dumps information about the induction variable IV to FILE. */
472 dump_iv (FILE *file
, struct iv
*iv
)
476 fprintf (file
, "ssa name ");
477 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
478 fprintf (file
, "\n");
481 fprintf (file
, " type ");
482 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
483 fprintf (file
, "\n");
487 fprintf (file
, " base ");
488 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
489 fprintf (file
, "\n");
491 fprintf (file
, " step ");
492 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
493 fprintf (file
, "\n");
497 fprintf (file
, " invariant ");
498 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
499 fprintf (file
, "\n");
504 fprintf (file
, " base object ");
505 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
506 fprintf (file
, "\n");
510 fprintf (file
, " is a biv\n");
513 /* Dumps information about the USE to FILE. */
516 dump_use (FILE *file
, struct iv_use
*use
)
518 fprintf (file
, "use %d\n", use
->id
);
522 case USE_NONLINEAR_EXPR
:
523 fprintf (file
, " generic\n");
527 fprintf (file
, " address\n");
531 fprintf (file
, " compare\n");
538 fprintf (file
, " in statement ");
539 print_gimple_stmt (file
, use
->stmt
, 0, 0);
540 fprintf (file
, "\n");
542 fprintf (file
, " at position ");
544 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
545 fprintf (file
, "\n");
547 dump_iv (file
, use
->iv
);
549 if (use
->related_cands
)
551 fprintf (file
, " related candidates ");
552 dump_bitmap (file
, use
->related_cands
);
556 /* Dumps information about the uses to FILE. */
559 dump_uses (FILE *file
, struct ivopts_data
*data
)
564 for (i
= 0; i
< n_iv_uses (data
); i
++)
566 use
= iv_use (data
, i
);
568 dump_use (file
, use
);
569 fprintf (file
, "\n");
573 /* Dumps information about induction variable candidate CAND to FILE. */
576 dump_cand (FILE *file
, struct iv_cand
*cand
)
578 struct iv
*iv
= cand
->iv
;
580 fprintf (file
, "candidate %d%s\n",
581 cand
->id
, cand
->important
? " (important)" : "");
583 if (cand
->depends_on
)
585 fprintf (file
, " depends on ");
586 dump_bitmap (file
, cand
->depends_on
);
591 fprintf (file
, " final value replacement\n");
595 if (cand
->var_before
)
597 fprintf (file
, " var_before ");
598 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
599 fprintf (file
, "\n");
603 fprintf (file
, " var_after ");
604 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
605 fprintf (file
, "\n");
611 fprintf (file
, " incremented before exit test\n");
615 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
619 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
623 fprintf (file
, " incremented at end\n");
627 fprintf (file
, " original biv\n");
634 /* Returns the info for ssa version VER. */
636 static inline struct version_info
*
637 ver_info (struct ivopts_data
*data
, unsigned ver
)
639 return data
->version_info
+ ver
;
642 /* Returns the info for ssa name NAME. */
644 static inline struct version_info
*
645 name_info (struct ivopts_data
*data
, tree name
)
647 return ver_info (data
, SSA_NAME_VERSION (name
));
650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
654 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
656 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
660 if (sbb
== loop
->latch
)
666 return stmt
== last_stmt (bb
);
669 /* Returns true if STMT if after the place where the original induction
670 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
671 if the positions are identical. */
674 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
676 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
677 basic_block stmt_bb
= gimple_bb (stmt
);
679 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
682 if (stmt_bb
!= cand_bb
)
686 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
688 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
691 /* Returns true if STMT if after the place where the induction variable
692 CAND is incremented in LOOP. */
695 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
703 return stmt_after_ip_normal_pos (loop
, stmt
);
707 return stmt_after_inc_pos (cand
, stmt
, false);
710 return stmt_after_inc_pos (cand
, stmt
, true);
717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
720 abnormal_ssa_name_p (tree exp
)
725 if (TREE_CODE (exp
) != SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
732 abnormal phi node. Callback for for_each_index. */
735 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
736 void *data ATTRIBUTE_UNUSED
)
738 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
740 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
742 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
746 return !abnormal_ssa_name_p (*index
);
749 /* Returns true if EXPR contains a ssa name that occurs in an
750 abnormal phi node. */
753 contains_abnormal_ssa_name_p (tree expr
)
756 enum tree_code_class codeclass
;
761 code
= TREE_CODE (expr
);
762 codeclass
= TREE_CODE_CLASS (code
);
764 if (code
== SSA_NAME
)
765 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
767 if (code
== INTEGER_CST
768 || is_gimple_min_invariant (expr
))
771 if (code
== ADDR_EXPR
)
772 return !for_each_index (&TREE_OPERAND (expr
, 0),
773 idx_contains_abnormal_ssa_name_p
,
776 if (code
== COND_EXPR
)
777 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
778 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
779 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
785 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
790 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
802 /* Returns the structure describing number of iterations determined from
803 EXIT of DATA->current_loop, or NULL if something goes wrong. */
805 static struct tree_niter_desc
*
806 niter_for_exit (struct ivopts_data
*data
, edge exit
)
808 struct tree_niter_desc
*desc
;
813 data
->niters
= pointer_map_create ();
817 slot
= pointer_map_contains (data
->niters
, exit
);
821 /* Try to determine number of iterations. We cannot safely work with ssa
822 names that appear in phi nodes on abnormal edges, so that we do not
823 create overlapping life ranges for them (PR 27283). */
824 desc
= XNEW (struct tree_niter_desc
);
825 if (!number_of_iterations_exit (data
->current_loop
,
827 || contains_abnormal_ssa_name_p (desc
->niter
))
832 slot
= pointer_map_insert (data
->niters
, exit
);
836 desc
= (struct tree_niter_desc
*) *slot
;
841 /* Returns the structure describing number of iterations determined from
842 single dominating exit of DATA->current_loop, or NULL if something
845 static struct tree_niter_desc
*
846 niter_for_single_dom_exit (struct ivopts_data
*data
)
848 edge exit
= single_dom_exit (data
->current_loop
);
853 return niter_for_exit (data
, exit
);
856 /* Initializes data structures used by the iv optimization pass, stored
860 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
862 data
->version_info_size
= 2 * num_ssa_names
;
863 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
864 data
->relevant
= BITMAP_ALLOC (NULL
);
865 data
->important_candidates
= BITMAP_ALLOC (NULL
);
866 data
->max_inv_id
= 0;
868 data
->iv_uses
.create (20);
869 data
->iv_candidates
.create (20);
870 data
->inv_expr_tab
.create (10);
871 data
->inv_expr_id
= 0;
872 decl_rtl_to_reset
.create (20);
875 /* Returns a memory object to that EXPR points. In case we are able to
876 determine that it does not point to any such object, NULL is returned. */
879 determine_base_object (tree expr
)
881 enum tree_code code
= TREE_CODE (expr
);
884 /* If this is a pointer casted to any type, we need to determine
885 the base object for the pointer; so handle conversions before
886 throwing away non-pointer expressions. */
887 if (CONVERT_EXPR_P (expr
))
888 return determine_base_object (TREE_OPERAND (expr
, 0));
890 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
899 obj
= TREE_OPERAND (expr
, 0);
900 base
= get_base_address (obj
);
905 if (TREE_CODE (base
) == MEM_REF
)
906 return determine_base_object (TREE_OPERAND (base
, 0));
908 return fold_convert (ptr_type_node
,
909 build_fold_addr_expr (base
));
911 case POINTER_PLUS_EXPR
:
912 return determine_base_object (TREE_OPERAND (expr
, 0));
916 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
920 return fold_convert (ptr_type_node
, expr
);
924 /* Allocates an induction variable with given initial value BASE and step STEP
928 alloc_iv (tree base
, tree step
)
930 tree base_object
= base
;
931 struct iv
*iv
= XCNEW (struct iv
);
932 gcc_assert (step
!= NULL_TREE
);
934 /* Lower all address expressions except ones with DECL_P as operand.
936 1) More accurate cost can be computed for address expressions;
937 2) Duplicate candidates won't be created for bases in different
938 forms, like &a[0] and &a. */
939 STRIP_NOPS (base_object
);
940 if (TREE_CODE (base_object
) == ADDR_EXPR
941 && !DECL_P (TREE_OPERAND (base_object
, 0)))
945 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
947 gcc_assert (base_object
!= NULL_TREE
);
948 base_object
= build_fold_addr_expr (base_object
);
949 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
953 iv
->base_object
= determine_base_object (base_object
);
956 iv
->have_use_for
= false;
958 iv
->ssa_name
= NULL_TREE
;
963 /* Sets STEP and BASE for induction variable IV. */
966 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
968 struct version_info
*info
= name_info (data
, iv
);
970 gcc_assert (!info
->iv
);
972 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
973 info
->iv
= alloc_iv (base
, step
);
974 info
->iv
->ssa_name
= iv
;
977 /* Finds induction variable declaration for VAR. */
980 get_iv (struct ivopts_data
*data
, tree var
)
983 tree type
= TREE_TYPE (var
);
985 if (!POINTER_TYPE_P (type
)
986 && !INTEGRAL_TYPE_P (type
))
989 if (!name_info (data
, var
)->iv
)
991 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
994 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
995 set_iv (data
, var
, var
, build_int_cst (type
, 0));
998 return name_info (data
, var
)->iv
;
1001 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1002 not define a simple affine biv with nonzero step. */
1005 determine_biv_step (gimple phi
)
1007 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1008 tree name
= PHI_RESULT (phi
);
1011 if (virtual_operand_p (name
))
1014 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1017 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1020 /* Finds basic ivs. */
1023 find_bivs (struct ivopts_data
*data
)
1026 tree step
, type
, base
;
1028 struct loop
*loop
= data
->current_loop
;
1029 gimple_stmt_iterator psi
;
1031 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1033 phi
= gsi_stmt (psi
);
1035 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1038 step
= determine_biv_step (phi
);
1042 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1043 base
= expand_simple_operations (base
);
1044 if (contains_abnormal_ssa_name_p (base
)
1045 || contains_abnormal_ssa_name_p (step
))
1048 type
= TREE_TYPE (PHI_RESULT (phi
));
1049 base
= fold_convert (type
, base
);
1052 if (POINTER_TYPE_P (type
))
1053 step
= convert_to_ptrofftype (step
);
1055 step
= fold_convert (type
, step
);
1058 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1065 /* Marks basic ivs. */
1068 mark_bivs (struct ivopts_data
*data
)
1072 struct iv
*iv
, *incr_iv
;
1073 struct loop
*loop
= data
->current_loop
;
1074 basic_block incr_bb
;
1075 gimple_stmt_iterator psi
;
1077 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1079 phi
= gsi_stmt (psi
);
1081 iv
= get_iv (data
, PHI_RESULT (phi
));
1085 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1086 incr_iv
= get_iv (data
, var
);
1090 /* If the increment is in the subloop, ignore it. */
1091 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1092 if (incr_bb
->loop_father
!= data
->current_loop
1093 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1097 incr_iv
->biv_p
= true;
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102 parameters to IV. */
1105 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1108 struct loop
*loop
= data
->current_loop
;
1110 iv
->base
= NULL_TREE
;
1111 iv
->step
= NULL_TREE
;
1113 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1116 lhs
= gimple_assign_lhs (stmt
);
1117 if (TREE_CODE (lhs
) != SSA_NAME
)
1120 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1122 iv
->base
= expand_simple_operations (iv
->base
);
1124 if (contains_abnormal_ssa_name_p (iv
->base
)
1125 || contains_abnormal_ssa_name_p (iv
->step
))
1128 /* If STMT could throw, then do not consider STMT as defining a GIV.
1129 While this will suppress optimizations, we can not safely delete this
1130 GIV and associated statements, even if it appears it is not used. */
1131 if (stmt_could_throw_p (stmt
))
1137 /* Finds general ivs in statement STMT. */
1140 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1144 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1147 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1150 /* Finds general ivs in basic block BB. */
1153 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1155 gimple_stmt_iterator bsi
;
1157 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1158 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1161 /* Finds general ivs. */
1164 find_givs (struct ivopts_data
*data
)
1166 struct loop
*loop
= data
->current_loop
;
1167 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1170 for (i
= 0; i
< loop
->num_nodes
; i
++)
1171 find_givs_in_bb (data
, body
[i
]);
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176 variable and if so, its initial value and step. */
1179 find_induction_variables (struct ivopts_data
*data
)
1184 if (!find_bivs (data
))
1190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1192 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1196 fprintf (dump_file
, " number of iterations ");
1197 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1198 if (!integer_zerop (niter
->may_be_zero
))
1200 fprintf (dump_file
, "; zero if ");
1201 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1203 fprintf (dump_file
, "\n\n");
1206 fprintf (dump_file
, "Induction variables:\n\n");
1208 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1210 if (ver_info (data
, i
)->iv
)
1211 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1220 static struct iv_use
*
1221 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1222 gimple stmt
, enum use_type use_type
)
1224 struct iv_use
*use
= XCNEW (struct iv_use
);
1226 use
->id
= n_iv_uses (data
);
1227 use
->type
= use_type
;
1231 use
->related_cands
= BITMAP_ALLOC (NULL
);
1233 /* To avoid showing ssa name in the dumps, if it was not reset by the
1235 iv
->ssa_name
= NULL_TREE
;
1237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1238 dump_use (dump_file
, use
);
1240 data
->iv_uses
.safe_push (use
);
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246 NONLINEAR_USE is true if the invariant is used in a way we do not
1247 handle specially. */
1250 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1253 struct version_info
*info
;
1255 if (TREE_CODE (op
) != SSA_NAME
1256 || virtual_operand_p (op
))
1259 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1261 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1264 info
= name_info (data
, op
);
1266 info
->has_nonlin_use
|= nonlinear_use
;
1268 info
->inv_id
= ++data
->max_inv_id
;
1269 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1272 /* Checks whether the use OP is interesting and if so, records it. */
1274 static struct iv_use
*
1275 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1282 if (TREE_CODE (op
) != SSA_NAME
)
1285 iv
= get_iv (data
, op
);
1289 if (iv
->have_use_for
)
1291 use
= iv_use (data
, iv
->use_id
);
1293 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1297 if (integer_zerop (iv
->step
))
1299 record_invariant (data
, op
, true);
1302 iv
->have_use_for
= true;
1304 civ
= XNEW (struct iv
);
1307 stmt
= SSA_NAME_DEF_STMT (op
);
1308 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1309 || is_gimple_assign (stmt
));
1311 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1312 iv
->use_id
= use
->id
;
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318 of an induction variable and an invariant. If this is the case,
1319 CONTROL_VAR is set to location of the iv, BOUND to the location of
1320 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321 induction variable descriptions, and true is returned. If this is not
1322 the case, CONTROL_VAR and BOUND are set to the arguments of the
1323 condition and false is returned. */
1326 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1327 tree
**control_var
, tree
**bound
,
1328 struct iv
**iv_var
, struct iv
**iv_bound
)
1330 /* The objects returned when COND has constant operands. */
1331 static struct iv const_iv
;
1333 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1334 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1337 if (gimple_code (stmt
) == GIMPLE_COND
)
1339 op0
= gimple_cond_lhs_ptr (stmt
);
1340 op1
= gimple_cond_rhs_ptr (stmt
);
1344 op0
= gimple_assign_rhs1_ptr (stmt
);
1345 op1
= gimple_assign_rhs2_ptr (stmt
);
1348 zero
= integer_zero_node
;
1349 const_iv
.step
= integer_zero_node
;
1351 if (TREE_CODE (*op0
) == SSA_NAME
)
1352 iv0
= get_iv (data
, *op0
);
1353 if (TREE_CODE (*op1
) == SSA_NAME
)
1354 iv1
= get_iv (data
, *op1
);
1356 /* Exactly one of the compared values must be an iv, and the other one must
1361 if (integer_zerop (iv0
->step
))
1363 /* Control variable may be on the other side. */
1364 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1365 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1367 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1371 *control_var
= op0
;;
1382 /* Checks whether the condition in STMT is interesting and if so,
1386 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1388 tree
*var_p
, *bound_p
;
1389 struct iv
*var_iv
, *civ
;
1391 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1393 find_interesting_uses_op (data
, *var_p
);
1394 find_interesting_uses_op (data
, *bound_p
);
1398 civ
= XNEW (struct iv
);
1400 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404 relative to the loop LOOP, i.e. if all its operands are defined
1405 outside of the returned loop. Returns NULL if EXPR is not
1406 even obviously invariant in LOOP. */
1409 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1414 if (is_gimple_min_invariant (expr
))
1415 return current_loops
->tree_root
;
1417 if (TREE_CODE (expr
) == SSA_NAME
)
1419 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1422 if (flow_bb_inside_loop_p (loop
, def_bb
))
1424 return superloop_at_depth (loop
,
1425 loop_depth (def_bb
->loop_father
) + 1);
1428 return current_loops
->tree_root
;
1434 unsigned maxdepth
= 0;
1435 len
= TREE_OPERAND_LENGTH (expr
);
1436 for (i
= 0; i
< len
; i
++)
1438 struct loop
*ivloop
;
1439 if (!TREE_OPERAND (expr
, i
))
1442 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1445 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1448 return superloop_at_depth (loop
, maxdepth
);
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452 i.e. if all its operands are defined outside of the LOOP. LOOP
1453 should not be the function body. */
1456 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1461 gcc_assert (loop_depth (loop
) > 0);
1463 if (is_gimple_min_invariant (expr
))
1466 if (TREE_CODE (expr
) == SSA_NAME
)
1468 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1470 && flow_bb_inside_loop_p (loop
, def_bb
))
1479 len
= TREE_OPERAND_LENGTH (expr
);
1480 for (i
= 0; i
< len
; i
++)
1481 if (TREE_OPERAND (expr
, i
)
1482 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489 initial ones. Returns false when the value of the index cannot be determined.
1490 Callback for for_each_index. */
1492 struct ifs_ivopts_data
1494 struct ivopts_data
*ivopts_data
;
1500 idx_find_step (tree base
, tree
*idx
, void *data
)
1502 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1504 tree step
, iv_base
, iv_step
, lbound
, off
;
1505 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1507 /* If base is a component ref, require that the offset of the reference
1509 if (TREE_CODE (base
) == COMPONENT_REF
)
1511 off
= component_ref_field_offset (base
);
1512 return expr_invariant_in_loop_p (loop
, off
);
1515 /* If base is array, first check whether we will be able to move the
1516 reference out of the loop (in order to take its address in strength
1517 reduction). In order for this to work we need both lower bound
1518 and step to be loop invariants. */
1519 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1521 /* Moreover, for a range, the size needs to be invariant as well. */
1522 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1523 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1526 step
= array_ref_element_size (base
);
1527 lbound
= array_ref_low_bound (base
);
1529 if (!expr_invariant_in_loop_p (loop
, step
)
1530 || !expr_invariant_in_loop_p (loop
, lbound
))
1534 if (TREE_CODE (*idx
) != SSA_NAME
)
1537 iv
= get_iv (dta
->ivopts_data
, *idx
);
1541 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1542 *&x[0], which is not folded and does not trigger the
1543 ARRAY_REF path below. */
1546 if (integer_zerop (iv
->step
))
1549 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1551 step
= array_ref_element_size (base
);
1553 /* We only handle addresses whose step is an integer constant. */
1554 if (TREE_CODE (step
) != INTEGER_CST
)
1558 /* The step for pointer arithmetics already is 1 byte. */
1559 step
= size_one_node
;
1563 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1564 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1567 /* The index might wrap. */
1571 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1572 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1577 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1578 object is passed to it in DATA. */
1581 idx_record_use (tree base
, tree
*idx
,
1584 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1585 find_interesting_uses_op (data
, *idx
);
1586 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1588 find_interesting_uses_op (data
, array_ref_element_size (base
));
1589 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595 store cst to MUL and return true. Otherwise return false.
1596 The returned value is always sign-extended, regardless of the
1597 signedness of TOP and BOT. */
1600 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1603 enum tree_code code
;
1604 double_int res
, p0
, p1
;
1605 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1610 if (operand_equal_p (top
, bot
, 0))
1612 *mul
= double_int_one
;
1616 code
= TREE_CODE (top
);
1620 mby
= TREE_OPERAND (top
, 1);
1621 if (TREE_CODE (mby
) != INTEGER_CST
)
1624 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1627 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1632 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1633 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1636 if (code
== MINUS_EXPR
)
1638 *mul
= (p0
+ p1
).sext (precision
);
1642 if (TREE_CODE (bot
) != INTEGER_CST
)
1645 p0
= tree_to_double_int (top
).sext (precision
);
1646 p1
= tree_to_double_int (bot
).sext (precision
);
1649 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1650 return res
.is_zero ();
1657 /* Returns true if memory reference REF with step STEP may be unaligned. */
1660 may_be_unaligned_p (tree ref
, tree step
)
1664 HOST_WIDE_INT bitsize
;
1665 HOST_WIDE_INT bitpos
;
1667 enum machine_mode mode
;
1668 int unsignedp
, volatilep
;
1669 unsigned base_align
;
1671 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672 thus they are not misaligned. */
1673 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1676 /* The test below is basically copy of what expr.c:normal_inner_ref
1677 does to check whether the object must be loaded by parts when
1678 STRICT_ALIGNMENT is true. */
1679 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1680 &unsignedp
, &volatilep
, true);
1681 base_type
= TREE_TYPE (base
);
1682 base_align
= get_object_alignment (base
);
1683 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1685 if (mode
!= BLKmode
)
1687 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1689 if (base_align
< mode_align
1690 || (bitpos
% mode_align
) != 0
1691 || (bitpos
% BITS_PER_UNIT
) != 0)
1695 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1698 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1705 /* Return true if EXPR may be non-addressable. */
1708 may_be_nonaddressable_p (tree expr
)
1710 switch (TREE_CODE (expr
))
1712 case TARGET_MEM_REF
:
1713 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714 target, thus they are always addressable. */
1718 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1719 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1721 case VIEW_CONVERT_EXPR
:
1722 /* This kind of view-conversions may wrap non-addressable objects
1723 and make them look addressable. After some processing the
1724 non-addressability may be uncovered again, causing ADDR_EXPRs
1725 of inappropriate objects to be built. */
1726 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1727 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1730 /* ... fall through ... */
1733 case ARRAY_RANGE_REF
:
1734 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1746 /* Finds addresses in *OP_P inside STMT. */
1749 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1751 tree base
= *op_p
, step
= size_zero_node
;
1753 struct ifs_ivopts_data ifs_ivopts_data
;
1755 /* Do not play with volatile memory references. A bit too conservative,
1756 perhaps, but safe. */
1757 if (gimple_has_volatile_ops (stmt
))
1760 /* Ignore bitfields for now. Not really something terribly complicated
1762 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1765 base
= unshare_expr (base
);
1767 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1769 tree type
= build_pointer_type (TREE_TYPE (base
));
1773 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1775 civ
= get_iv (data
, TMR_BASE (base
));
1779 TMR_BASE (base
) = civ
->base
;
1782 if (TMR_INDEX2 (base
)
1783 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1785 civ
= get_iv (data
, TMR_INDEX2 (base
));
1789 TMR_INDEX2 (base
) = civ
->base
;
1792 if (TMR_INDEX (base
)
1793 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1795 civ
= get_iv (data
, TMR_INDEX (base
));
1799 TMR_INDEX (base
) = civ
->base
;
1804 if (TMR_STEP (base
))
1805 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1807 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1811 if (integer_zerop (step
))
1813 base
= tree_mem_ref_addr (type
, base
);
1817 ifs_ivopts_data
.ivopts_data
= data
;
1818 ifs_ivopts_data
.stmt
= stmt
;
1819 ifs_ivopts_data
.step
= size_zero_node
;
1820 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1821 || integer_zerop (ifs_ivopts_data
.step
))
1823 step
= ifs_ivopts_data
.step
;
1825 /* Check that the base expression is addressable. This needs
1826 to be done after substituting bases of IVs into it. */
1827 if (may_be_nonaddressable_p (base
))
1830 /* Moreover, on strict alignment platforms, check that it is
1831 sufficiently aligned. */
1832 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1835 base
= build_fold_addr_expr (base
);
1837 /* Substituting bases of IVs into the base expression might
1838 have caused folding opportunities. */
1839 if (TREE_CODE (base
) == ADDR_EXPR
)
1841 tree
*ref
= &TREE_OPERAND (base
, 0);
1842 while (handled_component_p (*ref
))
1843 ref
= &TREE_OPERAND (*ref
, 0);
1844 if (TREE_CODE (*ref
) == MEM_REF
)
1846 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1847 TREE_OPERAND (*ref
, 0),
1848 TREE_OPERAND (*ref
, 1));
1855 civ
= alloc_iv (base
, step
);
1856 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1860 for_each_index (op_p
, idx_record_use
, data
);
1863 /* Finds and records invariants used in STMT. */
1866 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1869 use_operand_p use_p
;
1872 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1874 op
= USE_FROM_PTR (use_p
);
1875 record_invariant (data
, op
, false);
1879 /* Finds interesting uses of induction variables in the statement STMT. */
1882 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1885 tree op
, *lhs
, *rhs
;
1887 use_operand_p use_p
;
1888 enum tree_code code
;
1890 find_invariants_stmt (data
, stmt
);
1892 if (gimple_code (stmt
) == GIMPLE_COND
)
1894 find_interesting_uses_cond (data
, stmt
);
1898 if (is_gimple_assign (stmt
))
1900 lhs
= gimple_assign_lhs_ptr (stmt
);
1901 rhs
= gimple_assign_rhs1_ptr (stmt
);
1903 if (TREE_CODE (*lhs
) == SSA_NAME
)
1905 /* If the statement defines an induction variable, the uses are not
1906 interesting by themselves. */
1908 iv
= get_iv (data
, *lhs
);
1910 if (iv
&& !integer_zerop (iv
->step
))
1914 code
= gimple_assign_rhs_code (stmt
);
1915 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1916 && (REFERENCE_CLASS_P (*rhs
)
1917 || is_gimple_val (*rhs
)))
1919 if (REFERENCE_CLASS_P (*rhs
))
1920 find_interesting_uses_address (data
, stmt
, rhs
);
1922 find_interesting_uses_op (data
, *rhs
);
1924 if (REFERENCE_CLASS_P (*lhs
))
1925 find_interesting_uses_address (data
, stmt
, lhs
);
1928 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1930 find_interesting_uses_cond (data
, stmt
);
1934 /* TODO -- we should also handle address uses of type
1936 memory = call (whatever);
1943 if (gimple_code (stmt
) == GIMPLE_PHI
1944 && gimple_bb (stmt
) == data
->current_loop
->header
)
1946 iv
= get_iv (data
, PHI_RESULT (stmt
));
1948 if (iv
&& !integer_zerop (iv
->step
))
1952 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1954 op
= USE_FROM_PTR (use_p
);
1956 if (TREE_CODE (op
) != SSA_NAME
)
1959 iv
= get_iv (data
, op
);
1963 find_interesting_uses_op (data
, op
);
1967 /* Finds interesting uses of induction variables outside of loops
1968 on loop exit edge EXIT. */
1971 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1974 gimple_stmt_iterator psi
;
1977 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1979 phi
= gsi_stmt (psi
);
1980 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1981 if (!virtual_operand_p (def
))
1982 find_interesting_uses_op (data
, def
);
1986 /* Finds uses of the induction variables that are interesting. */
1989 find_interesting_uses (struct ivopts_data
*data
)
1992 gimple_stmt_iterator bsi
;
1993 basic_block
*body
= get_loop_body (data
->current_loop
);
1995 struct version_info
*info
;
1998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1999 fprintf (dump_file
, "Uses:\n\n");
2001 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2006 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2007 if (e
->dest
!= EXIT_BLOCK_PTR
2008 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2009 find_interesting_uses_outside (data
, e
);
2011 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2012 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2013 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2014 if (!is_gimple_debug (gsi_stmt (bsi
)))
2015 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2022 fprintf (dump_file
, "\n");
2024 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2026 info
= ver_info (data
, i
);
2029 fprintf (dump_file
, " ");
2030 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2031 fprintf (dump_file
, " is invariant (%d)%s\n",
2032 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2036 fprintf (dump_file
, "\n");
2042 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2043 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2044 we are at the top-level of the processed address. */
2047 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2048 HOST_WIDE_INT
*offset
)
2050 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2051 enum tree_code code
;
2052 tree type
, orig_type
= TREE_TYPE (expr
);
2053 HOST_WIDE_INT off0
, off1
, st
;
2054 tree orig_expr
= expr
;
2058 type
= TREE_TYPE (expr
);
2059 code
= TREE_CODE (expr
);
2065 if (!cst_and_fits_in_hwi (expr
)
2066 || integer_zerop (expr
))
2069 *offset
= int_cst_value (expr
);
2070 return build_int_cst (orig_type
, 0);
2072 case POINTER_PLUS_EXPR
:
2075 op0
= TREE_OPERAND (expr
, 0);
2076 op1
= TREE_OPERAND (expr
, 1);
2078 op0
= strip_offset_1 (op0
, false, false, &off0
);
2079 op1
= strip_offset_1 (op1
, false, false, &off1
);
2081 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2082 if (op0
== TREE_OPERAND (expr
, 0)
2083 && op1
== TREE_OPERAND (expr
, 1))
2086 if (integer_zerop (op1
))
2088 else if (integer_zerop (op0
))
2090 if (code
== MINUS_EXPR
)
2091 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2096 expr
= fold_build2 (code
, type
, op0
, op1
);
2098 return fold_convert (orig_type
, expr
);
2101 op1
= TREE_OPERAND (expr
, 1);
2102 if (!cst_and_fits_in_hwi (op1
))
2105 op0
= TREE_OPERAND (expr
, 0);
2106 op0
= strip_offset_1 (op0
, false, false, &off0
);
2107 if (op0
== TREE_OPERAND (expr
, 0))
2110 *offset
= off0
* int_cst_value (op1
);
2111 if (integer_zerop (op0
))
2114 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2116 return fold_convert (orig_type
, expr
);
2119 case ARRAY_RANGE_REF
:
2123 step
= array_ref_element_size (expr
);
2124 if (!cst_and_fits_in_hwi (step
))
2127 st
= int_cst_value (step
);
2128 op1
= TREE_OPERAND (expr
, 1);
2129 op1
= strip_offset_1 (op1
, false, false, &off1
);
2130 *offset
= off1
* st
;
2133 && integer_zerop (op1
))
2135 /* Strip the component reference completely. */
2136 op0
= TREE_OPERAND (expr
, 0);
2137 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2150 tmp
= component_ref_field_offset (expr
);
2151 field
= TREE_OPERAND (expr
, 1);
2153 && cst_and_fits_in_hwi (tmp
)
2154 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2156 HOST_WIDE_INT boffset
, abs_off
;
2158 /* Strip the component reference completely. */
2159 op0
= TREE_OPERAND (expr
, 0);
2160 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2161 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2162 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2166 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2173 op0
= TREE_OPERAND (expr
, 0);
2174 op0
= strip_offset_1 (op0
, true, true, &off0
);
2177 if (op0
== TREE_OPERAND (expr
, 0))
2180 expr
= build_fold_addr_expr (op0
);
2181 return fold_convert (orig_type
, expr
);
2184 /* ??? Offset operand? */
2185 inside_addr
= false;
2192 /* Default handling of expressions for that we want to recurse into
2193 the first operand. */
2194 op0
= TREE_OPERAND (expr
, 0);
2195 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2198 if (op0
== TREE_OPERAND (expr
, 0)
2199 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2202 expr
= copy_node (expr
);
2203 TREE_OPERAND (expr
, 0) = op0
;
2205 TREE_OPERAND (expr
, 1) = op1
;
2207 /* Inside address, we might strip the top level component references,
2208 thus changing type of the expression. Handling of ADDR_EXPR
2210 expr
= fold_convert (orig_type
, expr
);
2215 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2218 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2221 tree core
= strip_offset_1 (expr
, false, false, &off
);
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227 We return unsigned type with the same precision, which avoids problems
2231 generic_type_for (tree type
)
2233 if (POINTER_TYPE_P (type
))
2234 return unsigned_type_for (type
);
2236 if (TYPE_UNSIGNED (type
))
2239 return unsigned_type_for (type
);
2242 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2243 the bitmap to that we should store it. */
2245 static struct ivopts_data
*fd_ivopts_data
;
2247 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2249 bitmap
*depends_on
= (bitmap
*) data
;
2250 struct version_info
*info
;
2252 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2254 info
= name_info (fd_ivopts_data
, *expr_p
);
2256 if (!info
->inv_id
|| info
->has_nonlin_use
)
2260 *depends_on
= BITMAP_ALLOC (NULL
);
2261 bitmap_set_bit (*depends_on
, info
->inv_id
);
2266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2267 position to POS. If USE is not NULL, the candidate is set as related to
2268 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2269 replacement of the final value of the iv by a direct computation. */
2271 static struct iv_cand
*
2272 add_candidate_1 (struct ivopts_data
*data
,
2273 tree base
, tree step
, bool important
, enum iv_position pos
,
2274 struct iv_use
*use
, gimple incremented_at
)
2277 struct iv_cand
*cand
= NULL
;
2278 tree type
, orig_type
;
2280 /* For non-original variables, make sure their values are computed in a type
2281 that does not invoke undefined behavior on overflows (since in general,
2282 we cannot prove that these induction variables are non-wrapping). */
2283 if (pos
!= IP_ORIGINAL
)
2285 orig_type
= TREE_TYPE (base
);
2286 type
= generic_type_for (orig_type
);
2287 if (type
!= orig_type
)
2289 base
= fold_convert (type
, base
);
2290 step
= fold_convert (type
, step
);
2294 for (i
= 0; i
< n_iv_cands (data
); i
++)
2296 cand
= iv_cand (data
, i
);
2298 if (cand
->pos
!= pos
)
2301 if (cand
->incremented_at
!= incremented_at
2302 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2303 && cand
->ainc_use
!= use
))
2317 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2318 && operand_equal_p (step
, cand
->iv
->step
, 0)
2319 && (TYPE_PRECISION (TREE_TYPE (base
))
2320 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2324 if (i
== n_iv_cands (data
))
2326 cand
= XCNEW (struct iv_cand
);
2332 cand
->iv
= alloc_iv (base
, step
);
2335 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2337 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2338 cand
->var_after
= cand
->var_before
;
2340 cand
->important
= important
;
2341 cand
->incremented_at
= incremented_at
;
2342 data
->iv_candidates
.safe_push (cand
);
2345 && TREE_CODE (step
) != INTEGER_CST
)
2347 fd_ivopts_data
= data
;
2348 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2351 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2352 cand
->ainc_use
= use
;
2354 cand
->ainc_use
= NULL
;
2356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2357 dump_cand (dump_file
, cand
);
2360 if (important
&& !cand
->important
)
2362 cand
->important
= true;
2363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2364 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2369 bitmap_set_bit (use
->related_cands
, i
);
2370 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2371 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2381 The purpose is to avoid splitting latch edge with a biv increment, thus
2382 creating a jump, possibly confusing other optimization passes and leaving
2383 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2384 is not available (so we do not have a better alternative), or if the latch
2385 edge is already nonempty. */
2388 allow_ip_end_pos_p (struct loop
*loop
)
2390 if (!ip_normal_pos (loop
))
2393 if (!empty_block_p (ip_end_pos (loop
)))
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400 Important field is set to IMPORTANT. */
2403 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2404 bool important
, struct iv_use
*use
)
2406 basic_block use_bb
= gimple_bb (use
->stmt
);
2407 enum machine_mode mem_mode
;
2408 unsigned HOST_WIDE_INT cstepi
;
2410 /* If we insert the increment in any position other than the standard
2411 ones, we must ensure that it is incremented once per iteration.
2412 It must not be in an inner nested loop, or one side of an if
2414 if (use_bb
->loop_father
!= data
->current_loop
2415 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2416 || stmt_could_throw_p (use
->stmt
)
2417 || !cst_and_fits_in_hwi (step
))
2420 cstepi
= int_cst_value (step
);
2422 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2423 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2424 || USE_STORE_PRE_INCREMENT (mem_mode
))
2425 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2426 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2427 || USE_STORE_PRE_DECREMENT (mem_mode
))
2428 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2430 enum tree_code code
= MINUS_EXPR
;
2432 tree new_step
= step
;
2434 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2436 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2437 code
= POINTER_PLUS_EXPR
;
2440 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2441 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2442 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2445 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2446 || USE_STORE_POST_INCREMENT (mem_mode
))
2447 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2448 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2449 || USE_STORE_POST_DECREMENT (mem_mode
))
2450 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2452 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2457 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2458 position to POS. If USE is not NULL, the candidate is set as related to
2459 it. The candidate computation is scheduled on all available positions. */
2462 add_candidate (struct ivopts_data
*data
,
2463 tree base
, tree step
, bool important
, struct iv_use
*use
)
2465 if (ip_normal_pos (data
->current_loop
))
2466 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2467 if (ip_end_pos (data
->current_loop
)
2468 && allow_ip_end_pos_p (data
->current_loop
))
2469 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2471 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2472 add_autoinc_candidates (data
, base
, step
, important
, use
);
2475 /* Adds standard iv candidates. */
2478 add_standard_iv_candidates (struct ivopts_data
*data
)
2480 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2482 /* The same for a double-integer type if it is still fast enough. */
2484 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2485 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2486 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2487 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2489 /* The same for a double-integer type if it is still fast enough. */
2491 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2492 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2493 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2494 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2498 /* Adds candidates bases on the old induction variable IV. */
2501 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2505 struct iv_cand
*cand
;
2507 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2509 /* The same, but with initial value zero. */
2510 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2511 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2513 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2514 iv
->step
, true, NULL
);
2516 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2517 if (gimple_code (phi
) == GIMPLE_PHI
)
2519 /* Additionally record the possibility of leaving the original iv
2521 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2522 cand
= add_candidate_1 (data
,
2523 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2524 SSA_NAME_DEF_STMT (def
));
2525 cand
->var_before
= iv
->ssa_name
;
2526 cand
->var_after
= def
;
2530 /* Adds candidates based on the old induction variables. */
2533 add_old_ivs_candidates (struct ivopts_data
*data
)
2539 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2541 iv
= ver_info (data
, i
)->iv
;
2542 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2543 add_old_iv_candidates (data
, iv
);
2547 /* Adds candidates based on the value of the induction variable IV and USE. */
2550 add_iv_value_candidates (struct ivopts_data
*data
,
2551 struct iv
*iv
, struct iv_use
*use
)
2553 unsigned HOST_WIDE_INT offset
;
2557 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2559 /* The same, but with initial value zero. Make such variable important,
2560 since it is generic enough so that possibly many uses may be based
2562 basetype
= TREE_TYPE (iv
->base
);
2563 if (POINTER_TYPE_P (basetype
))
2564 basetype
= sizetype
;
2565 add_candidate (data
, build_int_cst (basetype
, 0),
2566 iv
->step
, true, use
);
2568 /* Third, try removing the constant offset. Make sure to even
2569 add a candidate for &a[0] vs. (T *)&a. */
2570 base
= strip_offset (iv
->base
, &offset
);
2572 || base
!= iv
->base
)
2573 add_candidate (data
, base
, iv
->step
, false, use
);
2576 /* Adds candidates based on the uses. */
2579 add_derived_ivs_candidates (struct ivopts_data
*data
)
2583 for (i
= 0; i
< n_iv_uses (data
); i
++)
2585 struct iv_use
*use
= iv_use (data
, i
);
2592 case USE_NONLINEAR_EXPR
:
2595 /* Just add the ivs based on the value of the iv used here. */
2596 add_iv_value_candidates (data
, use
->iv
, use
);
2605 /* Record important candidates and add them to related_cands bitmaps
2609 record_important_candidates (struct ivopts_data
*data
)
2614 for (i
= 0; i
< n_iv_cands (data
); i
++)
2616 struct iv_cand
*cand
= iv_cand (data
, i
);
2618 if (cand
->important
)
2619 bitmap_set_bit (data
->important_candidates
, i
);
2622 data
->consider_all_candidates
= (n_iv_cands (data
)
2623 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2625 if (data
->consider_all_candidates
)
2627 /* We will not need "related_cands" bitmaps in this case,
2628 so release them to decrease peak memory consumption. */
2629 for (i
= 0; i
< n_iv_uses (data
); i
++)
2631 use
= iv_use (data
, i
);
2632 BITMAP_FREE (use
->related_cands
);
2637 /* Add important candidates to the related_cands bitmaps. */
2638 for (i
= 0; i
< n_iv_uses (data
); i
++)
2639 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2640 data
->important_candidates
);
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646 we allocate a simple list to every use. */
2649 alloc_use_cost_map (struct ivopts_data
*data
)
2651 unsigned i
, size
, s
;
2653 for (i
= 0; i
< n_iv_uses (data
); i
++)
2655 struct iv_use
*use
= iv_use (data
, i
);
2657 if (data
->consider_all_candidates
)
2658 size
= n_iv_cands (data
);
2661 s
= bitmap_count_bits (use
->related_cands
);
2663 /* Round up to the power of two, so that moduling by it is fast. */
2664 size
= s
? (1 << ceil_log2 (s
)) : 1;
2667 use
->n_map_members
= size
;
2668 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2672 /* Returns description of computation cost of expression whose runtime
2673 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2676 new_cost (unsigned runtime
, unsigned complexity
)
2680 cost
.cost
= runtime
;
2681 cost
.complexity
= complexity
;
2686 /* Adds costs COST1 and COST2. */
2689 add_costs (comp_cost cost1
, comp_cost cost2
)
2691 cost1
.cost
+= cost2
.cost
;
2692 cost1
.complexity
+= cost2
.complexity
;
2696 /* Subtracts costs COST1 and COST2. */
2699 sub_costs (comp_cost cost1
, comp_cost cost2
)
2701 cost1
.cost
-= cost2
.cost
;
2702 cost1
.complexity
-= cost2
.complexity
;
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708 COST1 > COST2, and 0 if COST1 = COST2. */
2711 compare_costs (comp_cost cost1
, comp_cost cost2
)
2713 if (cost1
.cost
== cost2
.cost
)
2714 return cost1
.complexity
- cost2
.complexity
;
2716 return cost1
.cost
- cost2
.cost
;
2719 /* Returns true if COST is infinite. */
2722 infinite_cost_p (comp_cost cost
)
2724 return cost
.cost
== INFTY
;
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728 on invariants DEPENDS_ON and that the value used in expressing it
2729 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2732 set_use_iv_cost (struct ivopts_data
*data
,
2733 struct iv_use
*use
, struct iv_cand
*cand
,
2734 comp_cost cost
, bitmap depends_on
, tree value
,
2735 enum tree_code comp
, int inv_expr_id
)
2739 if (infinite_cost_p (cost
))
2741 BITMAP_FREE (depends_on
);
2745 if (data
->consider_all_candidates
)
2747 use
->cost_map
[cand
->id
].cand
= cand
;
2748 use
->cost_map
[cand
->id
].cost
= cost
;
2749 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2750 use
->cost_map
[cand
->id
].value
= value
;
2751 use
->cost_map
[cand
->id
].comp
= comp
;
2752 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2756 /* n_map_members is a power of two, so this computes modulo. */
2757 s
= cand
->id
& (use
->n_map_members
- 1);
2758 for (i
= s
; i
< use
->n_map_members
; i
++)
2759 if (!use
->cost_map
[i
].cand
)
2761 for (i
= 0; i
< s
; i
++)
2762 if (!use
->cost_map
[i
].cand
)
2768 use
->cost_map
[i
].cand
= cand
;
2769 use
->cost_map
[i
].cost
= cost
;
2770 use
->cost_map
[i
].depends_on
= depends_on
;
2771 use
->cost_map
[i
].value
= value
;
2772 use
->cost_map
[i
].comp
= comp
;
2773 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2776 /* Gets cost of (USE, CANDIDATE) pair. */
2778 static struct cost_pair
*
2779 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2780 struct iv_cand
*cand
)
2783 struct cost_pair
*ret
;
2788 if (data
->consider_all_candidates
)
2790 ret
= use
->cost_map
+ cand
->id
;
2797 /* n_map_members is a power of two, so this computes modulo. */
2798 s
= cand
->id
& (use
->n_map_members
- 1);
2799 for (i
= s
; i
< use
->n_map_members
; i
++)
2800 if (use
->cost_map
[i
].cand
== cand
)
2801 return use
->cost_map
+ i
;
2802 else if (use
->cost_map
[i
].cand
== NULL
)
2804 for (i
= 0; i
< s
; i
++)
2805 if (use
->cost_map
[i
].cand
== cand
)
2806 return use
->cost_map
+ i
;
2807 else if (use
->cost_map
[i
].cand
== NULL
)
2813 /* Returns estimate on cost of computing SEQ. */
2816 seq_cost (rtx seq
, bool speed
)
2821 for (; seq
; seq
= NEXT_INSN (seq
))
2823 set
= single_set (seq
);
2825 cost
+= set_src_cost (SET_SRC (set
), speed
);
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2835 produce_memory_decl_rtl (tree obj
, int *regno
)
2837 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2838 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2842 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2844 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2845 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2846 SET_SYMBOL_REF_DECL (x
, obj
);
2847 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2848 set_mem_addr_space (x
, as
);
2849 targetm
.encode_section_info (obj
, x
, true);
2853 x
= gen_raw_REG (address_mode
, (*regno
)++);
2854 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2855 set_mem_addr_space (x
, as
);
2861 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2862 walk_tree. DATA contains the actual fake register number. */
2865 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2867 tree obj
= NULL_TREE
;
2869 int *regno
= (int *) data
;
2871 switch (TREE_CODE (*expr_p
))
2874 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2875 handled_component_p (*expr_p
);
2876 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2879 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2880 x
= produce_memory_decl_rtl (obj
, regno
);
2885 obj
= SSA_NAME_VAR (*expr_p
);
2886 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2889 if (!DECL_RTL_SET_P (obj
))
2890 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2899 if (DECL_RTL_SET_P (obj
))
2902 if (DECL_MODE (obj
) == BLKmode
)
2903 x
= produce_memory_decl_rtl (obj
, regno
);
2905 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2915 decl_rtl_to_reset
.safe_push (obj
);
2916 SET_DECL_RTL (obj
, x
);
2922 /* Determines cost of the computation of EXPR. */
2925 computation_cost (tree expr
, bool speed
)
2928 tree type
= TREE_TYPE (expr
);
2930 /* Avoid using hard regs in ways which may be unsupported. */
2931 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2932 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2933 enum node_frequency real_frequency
= node
->frequency
;
2935 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2936 crtl
->maybe_hot_insn_p
= speed
;
2937 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2939 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2942 default_rtl_profile ();
2943 node
->frequency
= real_frequency
;
2945 cost
= seq_cost (seq
, speed
);
2947 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2948 TYPE_ADDR_SPACE (type
), speed
);
2949 else if (!REG_P (rslt
))
2950 cost
+= set_src_cost (rslt
, speed
);
2955 /* Returns variable containing the value of candidate CAND at statement AT. */
2958 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2960 if (stmt_after_increment (loop
, cand
, stmt
))
2961 return cand
->var_after
;
2963 return cand
->var_before
;
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967 same precision that is at least as wide as the precision of TYPE, stores
2968 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2972 determine_common_wider_type (tree
*a
, tree
*b
)
2974 tree wider_type
= NULL
;
2976 tree atype
= TREE_TYPE (*a
);
2978 if (CONVERT_EXPR_P (*a
))
2980 suba
= TREE_OPERAND (*a
, 0);
2981 wider_type
= TREE_TYPE (suba
);
2982 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2988 if (CONVERT_EXPR_P (*b
))
2990 subb
= TREE_OPERAND (*b
, 0);
2991 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3002 /* Determines the expression by that USE is expressed from induction variable
3003 CAND at statement AT in LOOP. The expression is stored in a decomposed
3004 form into AFF. Returns false if USE cannot be expressed using CAND. */
3007 get_computation_aff (struct loop
*loop
,
3008 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3009 struct affine_tree_combination
*aff
)
3011 tree ubase
= use
->iv
->base
;
3012 tree ustep
= use
->iv
->step
;
3013 tree cbase
= cand
->iv
->base
;
3014 tree cstep
= cand
->iv
->step
, cstep_common
;
3015 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3016 tree common_type
, var
;
3018 aff_tree cbase_aff
, var_aff
;
3021 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3023 /* We do not have a precision to express the values of use. */
3027 var
= var_at_stmt (loop
, cand
, at
);
3028 uutype
= unsigned_type_for (utype
);
3030 /* If the conversion is not noop, perform it. */
3031 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3033 cstep
= fold_convert (uutype
, cstep
);
3034 cbase
= fold_convert (uutype
, cbase
);
3035 var
= fold_convert (uutype
, var
);
3038 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3041 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042 type, we achieve better folding by computing their difference in this
3043 wider type, and cast the result to UUTYPE. We do not need to worry about
3044 overflows, as all the arithmetics will in the end be performed in UUTYPE
3046 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3048 /* use = ubase - ratio * cbase + ratio * var. */
3049 tree_to_aff_combination (ubase
, common_type
, aff
);
3050 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3051 tree_to_aff_combination (var
, uutype
, &var_aff
);
3053 /* We need to shift the value if we are after the increment. */
3054 if (stmt_after_increment (loop
, cand
, at
))
3058 if (common_type
!= uutype
)
3059 cstep_common
= fold_convert (common_type
, cstep
);
3061 cstep_common
= cstep
;
3063 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3064 aff_combination_add (&cbase_aff
, &cstep_aff
);
3067 aff_combination_scale (&cbase_aff
, -rat
);
3068 aff_combination_add (aff
, &cbase_aff
);
3069 if (common_type
!= uutype
)
3070 aff_combination_convert (aff
, uutype
);
3072 aff_combination_scale (&var_aff
, rat
);
3073 aff_combination_add (aff
, &var_aff
);
3078 /* Return the type of USE. */
3081 get_use_type (struct iv_use
*use
)
3083 tree base_type
= TREE_TYPE (use
->iv
->base
);
3086 if (use
->type
== USE_ADDRESS
)
3088 /* The base_type may be a void pointer. Create a pointer type based on
3089 the mem_ref instead. */
3090 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3100 /* Determines the expression by that USE is expressed from induction variable
3101 CAND at statement AT in LOOP. The computation is unshared. */
3104 get_computation_at (struct loop
*loop
,
3105 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3108 tree type
= get_use_type (use
);
3110 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3112 unshare_aff_combination (&aff
);
3113 return fold_convert (type
, aff_combination_to_tree (&aff
));
3116 /* Determines the expression by that USE is expressed from induction variable
3117 CAND in LOOP. The computation is unshared. */
3120 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3122 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126 If we're optimizing for space, the loop setup overhead is constant;
3127 if we're optimizing for speed, amortize it over the per-iteration cost. */
3129 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3133 else if (optimize_loop_for_speed_p (data
->current_loop
))
3134 return cost
/ avg_loop_niter (data
->current_loop
);
3139 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3140 validity for a memory reference accessing memory of mode MODE in
3141 address space AS. */
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3148 #define MAX_RATIO 128
3149 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3150 static vec
<sbitmap
> valid_mult_list
;
3153 if (data_index
>= valid_mult_list
.length ())
3154 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3156 valid_mult
= valid_mult_list
[data_index
];
3159 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3160 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3161 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3165 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3166 bitmap_clear (valid_mult
);
3167 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3168 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3169 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3171 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3172 if (memory_address_addr_space_p (mode
, addr
, as
)
3173 || memory_address_addr_space_p (mode
, scaled
, as
))
3174 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3179 fprintf (dump_file
, " allowed multipliers:");
3180 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3181 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3182 fprintf (dump_file
, " %d", (int) i
);
3183 fprintf (dump_file
, "\n");
3184 fprintf (dump_file
, "\n");
3187 valid_mult_list
[data_index
] = valid_mult
;
3190 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3193 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3198 variable is omitted. Compute the cost for a memory reference that accesses
3199 a memory location of mode MEM_MODE in address space AS.
3201 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202 size of MEM_MODE / RATIO) is available. To make this determination, we
3203 look at the size of the increment to be made, which is given in CSTEP.
3204 CSTEP may be zero if the step is unknown.
3205 STMT_AFTER_INC is true iff the statement we're looking at is after the
3206 increment of the original biv.
3208 TODO -- there must be some better way. This all is quite crude. */
3210 typedef struct address_cost_data_s
3212 HOST_WIDE_INT min_offset
, max_offset
;
3213 unsigned costs
[2][2][2][2];
3214 } *address_cost_data
;
3218 get_address_cost (bool symbol_present
, bool var_present
,
3219 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3220 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3221 addr_space_t as
, bool speed
,
3222 bool stmt_after_inc
, bool *may_autoinc
)
3224 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3225 static vec
<address_cost_data
> address_cost_data_list
;
3226 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3227 address_cost_data data
;
3228 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3229 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3230 unsigned cost
, acost
, complexity
;
3231 bool offset_p
, ratio_p
, autoinc
;
3232 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3233 unsigned HOST_WIDE_INT mask
;
3236 if (data_index
>= address_cost_data_list
.length ())
3237 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3239 data
= address_cost_data_list
[data_index
];
3243 HOST_WIDE_INT rat
, off
= 0;
3244 int old_cse_not_expected
, width
;
3245 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3246 rtx seq
, addr
, base
;
3249 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3251 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3253 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3254 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3255 width
= HOST_BITS_PER_WIDE_INT
- 1;
3256 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3258 for (i
= width
; i
>= 0; i
--)
3260 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3261 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3262 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3265 data
->min_offset
= (i
== -1? 0 : off
);
3267 for (i
= width
; i
>= 0; i
--)
3269 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3270 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3271 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3276 data
->max_offset
= off
;
3278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3280 fprintf (dump_file
, "get_address_cost:\n");
3281 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3282 GET_MODE_NAME (mem_mode
),
3284 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3285 GET_MODE_NAME (mem_mode
),
3290 for (i
= 2; i
<= MAX_RATIO
; i
++)
3291 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3297 /* Compute the cost of various addressing modes. */
3299 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3300 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3302 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3303 || USE_STORE_PRE_DECREMENT (mem_mode
))
3305 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3306 has_predec
[mem_mode
]
3307 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3309 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3310 || USE_STORE_POST_DECREMENT (mem_mode
))
3312 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3313 has_postdec
[mem_mode
]
3314 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3316 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3317 || USE_STORE_PRE_DECREMENT (mem_mode
))
3319 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3320 has_preinc
[mem_mode
]
3321 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3323 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3324 || USE_STORE_POST_INCREMENT (mem_mode
))
3326 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3327 has_postinc
[mem_mode
]
3328 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3330 for (i
= 0; i
< 16; i
++)
3333 var_p
= (i
>> 1) & 1;
3334 off_p
= (i
>> 2) & 1;
3335 rat_p
= (i
>> 3) & 1;
3339 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3340 gen_int_mode (rat
, address_mode
));
3343 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3347 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3348 /* ??? We can run into trouble with some backends by presenting
3349 it with symbols which haven't been properly passed through
3350 targetm.encode_section_info. By setting the local bit, we
3351 enhance the probability of things working. */
3352 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3355 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3357 (PLUS
, address_mode
, base
,
3358 gen_int_mode (off
, address_mode
)));
3361 base
= gen_int_mode (off
, address_mode
);
3366 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3369 /* To avoid splitting addressing modes, pretend that no cse will
3371 old_cse_not_expected
= cse_not_expected
;
3372 cse_not_expected
= true;
3373 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3374 cse_not_expected
= old_cse_not_expected
;
3378 acost
= seq_cost (seq
, speed
);
3379 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3383 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3386 /* On some targets, it is quite expensive to load symbol to a register,
3387 which makes addresses that contain symbols look much more expensive.
3388 However, the symbol will have to be loaded in any case before the
3389 loop (and quite likely we have it in register already), so it does not
3390 make much sense to penalize them too heavily. So make some final
3391 tweaks for the SYMBOL_PRESENT modes:
3393 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3394 var is cheaper, use this mode with small penalty.
3395 If VAR_PRESENT is true, try whether the mode with
3396 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3397 if this is the case, use it. */
3398 add_c
= add_cost (speed
, address_mode
);
3399 for (i
= 0; i
< 8; i
++)
3402 off_p
= (i
>> 1) & 1;
3403 rat_p
= (i
>> 2) & 1;
3405 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3409 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3410 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3415 fprintf (dump_file
, "Address costs:\n");
3417 for (i
= 0; i
< 16; i
++)
3420 var_p
= (i
>> 1) & 1;
3421 off_p
= (i
>> 2) & 1;
3422 rat_p
= (i
>> 3) & 1;
3424 fprintf (dump_file
, " ");
3426 fprintf (dump_file
, "sym + ");
3428 fprintf (dump_file
, "var + ");
3430 fprintf (dump_file
, "cst + ");
3432 fprintf (dump_file
, "rat * ");
3434 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3435 fprintf (dump_file
, "index costs %d\n", acost
);
3437 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3438 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3439 fprintf (dump_file
, " May include autoinc/dec\n");
3440 fprintf (dump_file
, "\n");
3443 address_cost_data_list
[data_index
] = data
;
3446 bits
= GET_MODE_BITSIZE (address_mode
);
3447 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3449 if ((offset
>> (bits
- 1) & 1))
3454 msize
= GET_MODE_SIZE (mem_mode
);
3455 autoinc_offset
= offset
;
3457 autoinc_offset
+= ratio
* cstep
;
3458 if (symbol_present
|| var_present
|| ratio
!= 1)
3460 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3462 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3464 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3466 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3467 && msize
== -cstep
))
3471 offset_p
= (s_offset
!= 0
3472 && data
->min_offset
<= s_offset
3473 && s_offset
<= data
->max_offset
);
3474 ratio_p
= (ratio
!= 1
3475 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3477 if (ratio
!= 1 && !ratio_p
)
3478 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3480 if (s_offset
&& !offset_p
&& !symbol_present
)
3481 cost
+= add_cost (speed
, address_mode
);
3484 *may_autoinc
= autoinc
;
3485 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3486 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3487 return new_cost (cost
+ acost
, complexity
);
3490 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3491 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3492 calculating the operands of EXPR. Returns true if successful, and returns
3493 the cost in COST. */
3496 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3497 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3500 tree op1
= TREE_OPERAND (expr
, 1);
3501 tree cst
= TREE_OPERAND (mult
, 1);
3502 tree multop
= TREE_OPERAND (mult
, 0);
3503 int m
= exact_log2 (int_cst_value (cst
));
3504 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3506 bool equal_p
= false;
3508 if (!(m
>= 0 && m
< maxm
))
3511 if (operand_equal_p (op1
, mult
, 0))
3514 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3515 ? shiftadd_cost (speed
, mode
, m
)
3517 ? shiftsub1_cost (speed
, mode
, m
)
3518 : shiftsub0_cost (speed
, mode
, m
)));
3519 res
= new_cost (sa_cost
, 0);
3520 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3522 STRIP_NOPS (multop
);
3523 if (!is_gimple_val (multop
))
3524 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3530 /* Estimates cost of forcing expression EXPR into a variable. */
3533 force_expr_to_var_cost (tree expr
, bool speed
)
3535 static bool costs_initialized
= false;
3536 static unsigned integer_cost
[2];
3537 static unsigned symbol_cost
[2];
3538 static unsigned address_cost
[2];
3540 comp_cost cost0
, cost1
, cost
;
3541 enum machine_mode mode
;
3543 if (!costs_initialized
)
3545 tree type
= build_pointer_type (integer_type_node
);
3550 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3551 TREE_STATIC (var
) = 1;
3552 x
= produce_memory_decl_rtl (var
, NULL
);
3553 SET_DECL_RTL (var
, x
);
3555 addr
= build1 (ADDR_EXPR
, type
, var
);
3558 for (i
= 0; i
< 2; i
++)
3560 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3563 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3566 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3567 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3569 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3570 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3571 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3572 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3573 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3574 fprintf (dump_file
, "\n");
3578 costs_initialized
= true;
3583 if (SSA_VAR_P (expr
))
3586 if (is_gimple_min_invariant (expr
))
3588 if (TREE_CODE (expr
) == INTEGER_CST
)
3589 return new_cost (integer_cost
[speed
], 0);
3591 if (TREE_CODE (expr
) == ADDR_EXPR
)
3593 tree obj
= TREE_OPERAND (expr
, 0);
3595 if (TREE_CODE (obj
) == VAR_DECL
3596 || TREE_CODE (obj
) == PARM_DECL
3597 || TREE_CODE (obj
) == RESULT_DECL
)
3598 return new_cost (symbol_cost
[speed
], 0);
3601 return new_cost (address_cost
[speed
], 0);
3604 switch (TREE_CODE (expr
))
3606 case POINTER_PLUS_EXPR
:
3610 op0
= TREE_OPERAND (expr
, 0);
3611 op1
= TREE_OPERAND (expr
, 1);
3618 op0
= TREE_OPERAND (expr
, 0);
3624 /* Just an arbitrary value, FIXME. */
3625 return new_cost (target_spill_cost
[speed
], 0);
3628 if (op0
== NULL_TREE
3629 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3632 cost0
= force_expr_to_var_cost (op0
, speed
);
3634 if (op1
== NULL_TREE
3635 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3638 cost1
= force_expr_to_var_cost (op1
, speed
);
3640 mode
= TYPE_MODE (TREE_TYPE (expr
));
3641 switch (TREE_CODE (expr
))
3643 case POINTER_PLUS_EXPR
:
3647 cost
= new_cost (add_cost (speed
, mode
), 0);
3648 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3650 tree mult
= NULL_TREE
;
3652 if (TREE_CODE (op1
) == MULT_EXPR
)
3654 else if (TREE_CODE (op0
) == MULT_EXPR
)
3657 if (mult
!= NULL_TREE
3658 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3659 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3667 tree inner_mode
, outer_mode
;
3668 outer_mode
= TREE_TYPE (expr
);
3669 inner_mode
= TREE_TYPE (op0
);
3670 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3671 TYPE_MODE (inner_mode
), speed
), 0);
3676 if (cst_and_fits_in_hwi (op0
))
3677 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3679 else if (cst_and_fits_in_hwi (op1
))
3680 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3683 return new_cost (target_spill_cost
[speed
], 0);
3690 cost
= add_costs (cost
, cost0
);
3691 cost
= add_costs (cost
, cost1
);
3693 /* Bound the cost by target_spill_cost. The parts of complicated
3694 computations often are either loop invariant or at least can
3695 be shared between several iv uses, so letting this grow without
3696 limits would not give reasonable results. */
3697 if (cost
.cost
> (int) target_spill_cost
[speed
])
3698 cost
.cost
= target_spill_cost
[speed
];
3703 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3704 invariants the computation depends on. */
3707 force_var_cost (struct ivopts_data
*data
,
3708 tree expr
, bitmap
*depends_on
)
3712 fd_ivopts_data
= data
;
3713 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3716 return force_expr_to_var_cost (expr
, data
->speed
);
3719 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3720 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3721 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3722 invariants the computation depends on. */
3725 split_address_cost (struct ivopts_data
*data
,
3726 tree addr
, bool *symbol_present
, bool *var_present
,
3727 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3730 HOST_WIDE_INT bitsize
;
3731 HOST_WIDE_INT bitpos
;
3733 enum machine_mode mode
;
3734 int unsignedp
, volatilep
;
3736 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3737 &unsignedp
, &volatilep
, false);
3740 || bitpos
% BITS_PER_UNIT
!= 0
3741 || TREE_CODE (core
) != VAR_DECL
)
3743 *symbol_present
= false;
3744 *var_present
= true;
3745 fd_ivopts_data
= data
;
3746 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3747 return new_cost (target_spill_cost
[data
->speed
], 0);
3750 *offset
+= bitpos
/ BITS_PER_UNIT
;
3751 if (TREE_STATIC (core
)
3752 || DECL_EXTERNAL (core
))
3754 *symbol_present
= true;
3755 *var_present
= false;
3759 *symbol_present
= false;
3760 *var_present
= true;
3764 /* Estimates cost of expressing difference of addresses E1 - E2 as
3765 var + symbol + offset. The value of offset is added to OFFSET,
3766 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3767 part is missing. DEPENDS_ON is a set of the invariants the computation
3771 ptr_difference_cost (struct ivopts_data
*data
,
3772 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3773 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3775 HOST_WIDE_INT diff
= 0;
3776 aff_tree aff_e1
, aff_e2
;
3779 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3781 if (ptr_difference_const (e1
, e2
, &diff
))
3784 *symbol_present
= false;
3785 *var_present
= false;
3789 if (integer_zerop (e2
))
3790 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3791 symbol_present
, var_present
, offset
, depends_on
);
3793 *symbol_present
= false;
3794 *var_present
= true;
3796 type
= signed_type_for (TREE_TYPE (e1
));
3797 tree_to_aff_combination (e1
, type
, &aff_e1
);
3798 tree_to_aff_combination (e2
, type
, &aff_e2
);
3799 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3800 aff_combination_add (&aff_e1
, &aff_e2
);
3802 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3805 /* Estimates cost of expressing difference E1 - E2 as
3806 var + symbol + offset. The value of offset is added to OFFSET,
3807 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3808 part is missing. DEPENDS_ON is a set of the invariants the computation
3812 difference_cost (struct ivopts_data
*data
,
3813 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3814 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3816 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3817 unsigned HOST_WIDE_INT off1
, off2
;
3818 aff_tree aff_e1
, aff_e2
;
3821 e1
= strip_offset (e1
, &off1
);
3822 e2
= strip_offset (e2
, &off2
);
3823 *offset
+= off1
- off2
;
3828 if (TREE_CODE (e1
) == ADDR_EXPR
)
3829 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3830 offset
, depends_on
);
3831 *symbol_present
= false;
3833 if (operand_equal_p (e1
, e2
, 0))
3835 *var_present
= false;
3839 *var_present
= true;
3841 if (integer_zerop (e2
))
3842 return force_var_cost (data
, e1
, depends_on
);
3844 if (integer_zerop (e1
))
3846 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3847 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3851 type
= signed_type_for (TREE_TYPE (e1
));
3852 tree_to_aff_combination (e1
, type
, &aff_e1
);
3853 tree_to_aff_combination (e2
, type
, &aff_e2
);
3854 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3855 aff_combination_add (&aff_e1
, &aff_e2
);
3857 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3860 /* Returns true if AFF1 and AFF2 are identical. */
3863 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3867 if (aff1
->n
!= aff2
->n
)
3870 for (i
= 0; i
< aff1
->n
; i
++)
3872 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3875 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3881 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3884 get_expr_id (struct ivopts_data
*data
, tree expr
)
3886 struct iv_inv_expr_ent ent
;
3887 struct iv_inv_expr_ent
**slot
;
3890 ent
.hash
= iterative_hash_expr (expr
, 0);
3891 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3895 *slot
= XNEW (struct iv_inv_expr_ent
);
3896 (*slot
)->expr
= expr
;
3897 (*slot
)->hash
= ent
.hash
;
3898 (*slot
)->id
= data
->inv_expr_id
++;
3902 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3903 requires a new compiler generated temporary. Returns -1 otherwise.
3904 ADDRESS_P is a flag indicating if the expression is for address
3908 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3909 tree cbase
, HOST_WIDE_INT ratio
,
3912 aff_tree ubase_aff
, cbase_aff
;
3920 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3921 && (TREE_CODE (cbase
) == INTEGER_CST
))
3924 /* Strips the constant part. */
3925 if (TREE_CODE (ubase
) == PLUS_EXPR
3926 || TREE_CODE (ubase
) == MINUS_EXPR
3927 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3929 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3930 ubase
= TREE_OPERAND (ubase
, 0);
3933 /* Strips the constant part. */
3934 if (TREE_CODE (cbase
) == PLUS_EXPR
3935 || TREE_CODE (cbase
) == MINUS_EXPR
3936 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3938 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3939 cbase
= TREE_OPERAND (cbase
, 0);
3944 if (((TREE_CODE (ubase
) == SSA_NAME
)
3945 || (TREE_CODE (ubase
) == ADDR_EXPR
3946 && is_gimple_min_invariant (ubase
)))
3947 && (TREE_CODE (cbase
) == INTEGER_CST
))
3950 if (((TREE_CODE (cbase
) == SSA_NAME
)
3951 || (TREE_CODE (cbase
) == ADDR_EXPR
3952 && is_gimple_min_invariant (cbase
)))
3953 && (TREE_CODE (ubase
) == INTEGER_CST
))
3959 if (operand_equal_p (ubase
, cbase
, 0))
3962 if (TREE_CODE (ubase
) == ADDR_EXPR
3963 && TREE_CODE (cbase
) == ADDR_EXPR
)
3967 usym
= TREE_OPERAND (ubase
, 0);
3968 csym
= TREE_OPERAND (cbase
, 0);
3969 if (TREE_CODE (usym
) == ARRAY_REF
)
3971 tree ind
= TREE_OPERAND (usym
, 1);
3972 if (TREE_CODE (ind
) == INTEGER_CST
3973 && tree_fits_shwi_p (ind
)
3974 && TREE_INT_CST_LOW (ind
) == 0)
3975 usym
= TREE_OPERAND (usym
, 0);
3977 if (TREE_CODE (csym
) == ARRAY_REF
)
3979 tree ind
= TREE_OPERAND (csym
, 1);
3980 if (TREE_CODE (ind
) == INTEGER_CST
3981 && tree_fits_shwi_p (ind
)
3982 && TREE_INT_CST_LOW (ind
) == 0)
3983 csym
= TREE_OPERAND (csym
, 0);
3985 if (operand_equal_p (usym
, csym
, 0))
3988 /* Now do more complex comparison */
3989 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3990 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3991 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3995 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3996 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3998 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3999 aff_combination_add (&ubase_aff
, &cbase_aff
);
4000 expr
= aff_combination_to_tree (&ubase_aff
);
4001 return get_expr_id (data
, expr
);
4006 /* Determines the cost of the computation by that USE is expressed
4007 from induction variable CAND. If ADDRESS_P is true, we just need
4008 to create an address from it, otherwise we want to get it into
4009 register. A set of invariants we depend on is stored in
4010 DEPENDS_ON. AT is the statement at that the value is computed.
4011 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4012 addressing is likely. */
4015 get_computation_cost_at (struct ivopts_data
*data
,
4016 struct iv_use
*use
, struct iv_cand
*cand
,
4017 bool address_p
, bitmap
*depends_on
, gimple at
,
4021 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4023 tree utype
= TREE_TYPE (ubase
), ctype
;
4024 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4025 HOST_WIDE_INT ratio
, aratio
;
4026 bool var_present
, symbol_present
, stmt_is_after_inc
;
4029 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4030 enum machine_mode mem_mode
= (address_p
4031 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4036 /* Only consider real candidates. */
4038 return infinite_cost
;
4040 cbase
= cand
->iv
->base
;
4041 cstep
= cand
->iv
->step
;
4042 ctype
= TREE_TYPE (cbase
);
4044 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4046 /* We do not have a precision to express the values of use. */
4047 return infinite_cost
;
4051 || (use
->iv
->base_object
4052 && cand
->iv
->base_object
4053 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4054 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4056 /* Do not try to express address of an object with computation based
4057 on address of a different object. This may cause problems in rtl
4058 level alias analysis (that does not expect this to be happening,
4059 as this is illegal in C), and would be unlikely to be useful
4061 if (use
->iv
->base_object
4062 && cand
->iv
->base_object
4063 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4064 return infinite_cost
;
4067 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4069 /* TODO -- add direct handling of this case. */
4073 /* CSTEPI is removed from the offset in case statement is after the
4074 increment. If the step is not constant, we use zero instead.
4075 This is a bit imprecise (there is the extra addition), but
4076 redundancy elimination is likely to transform the code so that
4077 it uses value of the variable before increment anyway,
4078 so it is not that much unrealistic. */
4079 if (cst_and_fits_in_hwi (cstep
))
4080 cstepi
= int_cst_value (cstep
);
4084 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4085 return infinite_cost
;
4087 if (rat
.fits_shwi ())
4088 ratio
= rat
.to_shwi ();
4090 return infinite_cost
;
4093 ctype
= TREE_TYPE (cbase
);
4095 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4097 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4098 or ratio == 1, it is better to handle this like
4100 ubase - ratio * cbase + ratio * var
4102 (also holds in the case ratio == -1, TODO. */
4104 if (cst_and_fits_in_hwi (cbase
))
4106 offset
= - ratio
* int_cst_value (cbase
);
4107 cost
= difference_cost (data
,
4108 ubase
, build_int_cst (utype
, 0),
4109 &symbol_present
, &var_present
, &offset
,
4111 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4113 else if (ratio
== 1)
4115 tree real_cbase
= cbase
;
4117 /* Check to see if any adjustment is needed. */
4118 if (cstepi
== 0 && stmt_is_after_inc
)
4120 aff_tree real_cbase_aff
;
4123 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4125 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4127 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4128 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4131 cost
= difference_cost (data
,
4133 &symbol_present
, &var_present
, &offset
,
4135 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4138 && !POINTER_TYPE_P (ctype
)
4139 && multiplier_allowed_in_address_p
4141 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4144 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4145 cost
= difference_cost (data
,
4147 &symbol_present
, &var_present
, &offset
,
4149 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4153 cost
= force_var_cost (data
, cbase
, depends_on
);
4154 cost
= add_costs (cost
,
4155 difference_cost (data
,
4156 ubase
, build_int_cst (utype
, 0),
4157 &symbol_present
, &var_present
,
4158 &offset
, depends_on
));
4159 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4160 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4166 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4167 /* Clear depends on. */
4168 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4169 bitmap_clear (*depends_on
);
4172 /* If we are after the increment, the value of the candidate is higher by
4174 if (stmt_is_after_inc
)
4175 offset
-= ratio
* cstepi
;
4177 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4178 (symbol/var1/const parts may be omitted). If we are looking for an
4179 address, find the cost of addressing this. */
4181 return add_costs (cost
,
4182 get_address_cost (symbol_present
, var_present
,
4183 offset
, ratio
, cstepi
,
4185 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4186 speed
, stmt_is_after_inc
,
4189 /* Otherwise estimate the costs for computing the expression. */
4190 if (!symbol_present
&& !var_present
&& !offset
)
4193 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4197 /* Symbol + offset should be compile-time computable so consider that they
4198 are added once to the variable, if present. */
4199 if (var_present
&& (symbol_present
|| offset
))
4200 cost
.cost
+= adjust_setup_cost (data
,
4201 add_cost (speed
, TYPE_MODE (ctype
)));
4203 /* Having offset does not affect runtime cost in case it is added to
4204 symbol, but it increases complexity. */
4208 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4210 aratio
= ratio
> 0 ? ratio
: -ratio
;
4212 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4217 *can_autoinc
= false;
4220 /* Just get the expression, expand it and measure the cost. */
4221 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4224 return infinite_cost
;
4227 comp
= build_simple_mem_ref (comp
);
4229 return new_cost (computation_cost (comp
, speed
), 0);
4233 /* Determines the cost of the computation by that USE is expressed
4234 from induction variable CAND. If ADDRESS_P is true, we just need
4235 to create an address from it, otherwise we want to get it into
4236 register. A set of invariants we depend on is stored in
4237 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4238 autoinc addressing is likely. */
4241 get_computation_cost (struct ivopts_data
*data
,
4242 struct iv_use
*use
, struct iv_cand
*cand
,
4243 bool address_p
, bitmap
*depends_on
,
4244 bool *can_autoinc
, int *inv_expr_id
)
4246 return get_computation_cost_at (data
,
4247 use
, cand
, address_p
, depends_on
, use
->stmt
,
4248 can_autoinc
, inv_expr_id
);
4251 /* Determines cost of basing replacement of USE on CAND in a generic
4255 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4256 struct iv_use
*use
, struct iv_cand
*cand
)
4260 int inv_expr_id
= -1;
4262 /* The simple case first -- if we need to express value of the preserved
4263 original biv, the cost is 0. This also prevents us from counting the
4264 cost of increment twice -- once at this use and once in the cost of
4266 if (cand
->pos
== IP_ORIGINAL
4267 && cand
->incremented_at
== use
->stmt
)
4269 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4274 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4275 NULL
, &inv_expr_id
);
4277 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4280 return !infinite_cost_p (cost
);
4283 /* Determines cost of basing replacement of USE on CAND in an address. */
4286 determine_use_iv_cost_address (struct ivopts_data
*data
,
4287 struct iv_use
*use
, struct iv_cand
*cand
)
4291 int inv_expr_id
= -1;
4292 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4293 &can_autoinc
, &inv_expr_id
);
4295 if (cand
->ainc_use
== use
)
4298 cost
.cost
-= cand
->cost_step
;
4299 /* If we generated the candidate solely for exploiting autoincrement
4300 opportunities, and it turns out it can't be used, set the cost to
4301 infinity to make sure we ignore it. */
4302 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4303 cost
= infinite_cost
;
4305 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4308 return !infinite_cost_p (cost
);
4311 /* Computes value of candidate CAND at position AT in iteration NITER, and
4312 stores it to VAL. */
4315 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4318 aff_tree step
, delta
, nit
;
4319 struct iv
*iv
= cand
->iv
;
4320 tree type
= TREE_TYPE (iv
->base
);
4321 tree steptype
= type
;
4322 if (POINTER_TYPE_P (type
))
4323 steptype
= sizetype
;
4325 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4326 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4327 aff_combination_convert (&nit
, steptype
);
4328 aff_combination_mult (&nit
, &step
, &delta
);
4329 if (stmt_after_increment (loop
, cand
, at
))
4330 aff_combination_add (&delta
, &step
);
4332 tree_to_aff_combination (iv
->base
, type
, val
);
4333 aff_combination_add (val
, &delta
);
4336 /* Returns period of induction variable iv. */
4339 iv_period (struct iv
*iv
)
4341 tree step
= iv
->step
, period
, type
;
4344 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4346 type
= unsigned_type_for (TREE_TYPE (step
));
4347 /* Period of the iv is lcm (step, type_range)/step -1,
4348 i.e., N*type_range/step - 1. Since type range is power
4349 of two, N == (step >> num_of_ending_zeros_binary (step),
4350 so the final result is
4352 (type_range >> num_of_ending_zeros_binary (step)) - 1
4355 pow2div
= num_ending_zeros (step
);
4357 period
= build_low_bits_mask (type
,
4358 (TYPE_PRECISION (type
)
4359 - tree_to_uhwi (pow2div
)));
4364 /* Returns the comparison operator used when eliminating the iv USE. */
4366 static enum tree_code
4367 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4369 struct loop
*loop
= data
->current_loop
;
4373 ex_bb
= gimple_bb (use
->stmt
);
4374 exit
= EDGE_SUCC (ex_bb
, 0);
4375 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4376 exit
= EDGE_SUCC (ex_bb
, 1);
4378 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4382 strip_wrap_conserving_type_conversions (tree exp
)
4384 while (tree_ssa_useless_type_conversion (exp
)
4385 && (nowrap_type_p (TREE_TYPE (exp
))
4386 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4387 exp
= TREE_OPERAND (exp
, 0);
4391 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4392 check for an exact match. */
4395 expr_equal_p (tree e
, tree what
)
4398 enum tree_code code
;
4400 e
= strip_wrap_conserving_type_conversions (e
);
4401 what
= strip_wrap_conserving_type_conversions (what
);
4403 code
= TREE_CODE (what
);
4404 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4407 if (operand_equal_p (e
, what
, 0))
4410 if (TREE_CODE (e
) != SSA_NAME
)
4413 stmt
= SSA_NAME_DEF_STMT (e
);
4414 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4415 || gimple_assign_rhs_code (stmt
) != code
)
4418 switch (get_gimple_rhs_class (code
))
4420 case GIMPLE_BINARY_RHS
:
4421 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4425 case GIMPLE_UNARY_RHS
:
4426 case GIMPLE_SINGLE_RHS
:
4427 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4433 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4434 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4435 calculation is performed in non-wrapping type.
4437 TODO: More generally, we could test for the situation that
4438 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4439 This would require knowing the sign of OFFSET.
4441 Also, we only look for the first addition in the computation of BASE.
4442 More complex analysis would be better, but introducing it just for
4443 this optimization seems like an overkill. */
4446 difference_cannot_overflow_p (tree base
, tree offset
)
4448 enum tree_code code
;
4451 if (!nowrap_type_p (TREE_TYPE (base
)))
4454 base
= expand_simple_operations (base
);
4456 if (TREE_CODE (base
) == SSA_NAME
)
4458 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4460 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4463 code
= gimple_assign_rhs_code (stmt
);
4464 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4467 e1
= gimple_assign_rhs1 (stmt
);
4468 e2
= gimple_assign_rhs2 (stmt
);
4472 code
= TREE_CODE (base
);
4473 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4475 e1
= TREE_OPERAND (base
, 0);
4476 e2
= TREE_OPERAND (base
, 1);
4479 /* TODO: deeper inspection may be necessary to prove the equality. */
4483 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4484 case POINTER_PLUS_EXPR
:
4485 return expr_equal_p (e2
, offset
);
4492 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4493 comparison with CAND. NITER describes the number of iterations of
4494 the loops. If successful, the comparison in COMP_P is altered accordingly.
4496 We aim to handle the following situation:
4512 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4513 We aim to optimize this to
4521 while (p < p_0 - a + b);
4523 This preserves the correctness, since the pointer arithmetics does not
4524 overflow. More precisely:
4526 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4527 overflow in computing it or the values of p.
4528 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4529 overflow. To prove this, we use the fact that p_0 = base + a. */
4532 iv_elimination_compare_lt (struct ivopts_data
*data
,
4533 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4534 struct tree_niter_desc
*niter
)
4536 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4537 struct affine_tree_combination nit
, tmpa
, tmpb
;
4538 enum tree_code comp
;
4541 /* We need to know that the candidate induction variable does not overflow.
4542 While more complex analysis may be used to prove this, for now just
4543 check that the variable appears in the original program and that it
4544 is computed in a type that guarantees no overflows. */
4545 cand_type
= TREE_TYPE (cand
->iv
->base
);
4546 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4549 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4550 the calculation of the BOUND could overflow, making the comparison
4552 if (!data
->loop_single_exit_p
)
4555 /* We need to be able to decide whether candidate is increasing or decreasing
4556 in order to choose the right comparison operator. */
4557 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4559 step
= int_cst_value (cand
->iv
->step
);
4561 /* Check that the number of iterations matches the expected pattern:
4562 a + 1 > b ? 0 : b - a - 1. */
4563 mbz
= niter
->may_be_zero
;
4564 if (TREE_CODE (mbz
) == GT_EXPR
)
4566 /* Handle a + 1 > b. */
4567 tree op0
= TREE_OPERAND (mbz
, 0);
4568 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4570 a
= TREE_OPERAND (op0
, 0);
4571 b
= TREE_OPERAND (mbz
, 1);
4576 else if (TREE_CODE (mbz
) == LT_EXPR
)
4578 tree op1
= TREE_OPERAND (mbz
, 1);
4580 /* Handle b < a + 1. */
4581 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4583 a
= TREE_OPERAND (op1
, 0);
4584 b
= TREE_OPERAND (mbz
, 0);
4592 /* Expected number of iterations is B - A - 1. Check that it matches
4593 the actual number, i.e., that B - A - NITER = 1. */
4594 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4595 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4596 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4597 aff_combination_scale (&nit
, double_int_minus_one
);
4598 aff_combination_scale (&tmpa
, double_int_minus_one
);
4599 aff_combination_add (&tmpb
, &tmpa
);
4600 aff_combination_add (&tmpb
, &nit
);
4601 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4604 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4606 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4608 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4609 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4612 /* Determine the new comparison operator. */
4613 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4614 if (*comp_p
== NE_EXPR
)
4616 else if (*comp_p
== EQ_EXPR
)
4617 *comp_p
= invert_tree_comparison (comp
, false);
4624 /* Check whether it is possible to express the condition in USE by comparison
4625 of candidate CAND. If so, store the value compared with to BOUND, and the
4626 comparison operator to COMP. */
4629 may_eliminate_iv (struct ivopts_data
*data
,
4630 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4631 enum tree_code
*comp
)
4636 struct loop
*loop
= data
->current_loop
;
4638 struct tree_niter_desc
*desc
= NULL
;
4640 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4643 /* For now works only for exits that dominate the loop latch.
4644 TODO: extend to other conditions inside loop body. */
4645 ex_bb
= gimple_bb (use
->stmt
);
4646 if (use
->stmt
!= last_stmt (ex_bb
)
4647 || gimple_code (use
->stmt
) != GIMPLE_COND
4648 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4651 exit
= EDGE_SUCC (ex_bb
, 0);
4652 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4653 exit
= EDGE_SUCC (ex_bb
, 1);
4654 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4657 desc
= niter_for_exit (data
, exit
);
4661 /* Determine whether we can use the variable to test the exit condition.
4662 This is the case iff the period of the induction variable is greater
4663 than the number of iterations for which the exit condition is true. */
4664 period
= iv_period (cand
->iv
);
4666 /* If the number of iterations is constant, compare against it directly. */
4667 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4669 /* See cand_value_at. */
4670 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4672 if (!tree_int_cst_lt (desc
->niter
, period
))
4677 if (tree_int_cst_lt (period
, desc
->niter
))
4682 /* If not, and if this is the only possible exit of the loop, see whether
4683 we can get a conservative estimate on the number of iterations of the
4684 entire loop and compare against that instead. */
4687 double_int period_value
, max_niter
;
4689 max_niter
= desc
->max
;
4690 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4691 max_niter
+= double_int_one
;
4692 period_value
= tree_to_double_int (period
);
4693 if (max_niter
.ugt (period_value
))
4695 /* See if we can take advantage of inferred loop bound information. */
4696 if (data
->loop_single_exit_p
)
4698 if (!max_loop_iterations (loop
, &max_niter
))
4700 /* The loop bound is already adjusted by adding 1. */
4701 if (max_niter
.ugt (period_value
))
4709 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4711 *bound
= aff_combination_to_tree (&bnd
);
4712 *comp
= iv_elimination_compare (data
, use
);
4714 /* It is unlikely that computing the number of iterations using division
4715 would be more profitable than keeping the original induction variable. */
4716 if (expression_expensive_p (*bound
))
4719 /* Sometimes, it is possible to handle the situation that the number of
4720 iterations may be zero unless additional assumtions by using <
4721 instead of != in the exit condition.
4723 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4724 base the exit condition on it. However, that is often too
4726 if (!integer_zerop (desc
->may_be_zero
))
4727 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4732 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4733 be copied, if is is used in the loop body and DATA->body_includes_call. */
4736 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4738 tree sbound
= bound
;
4739 STRIP_NOPS (sbound
);
4741 if (TREE_CODE (sbound
) == SSA_NAME
4742 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4743 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4744 && data
->body_includes_call
)
4745 return COSTS_N_INSNS (1);
4750 /* Determines cost of basing replacement of USE on CAND in a condition. */
4753 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4754 struct iv_use
*use
, struct iv_cand
*cand
)
4756 tree bound
= NULL_TREE
;
4758 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4759 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4761 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4762 tree
*control_var
, *bound_cst
;
4763 enum tree_code comp
= ERROR_MARK
;
4765 /* Only consider real candidates. */
4768 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4773 /* Try iv elimination. */
4774 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4776 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4777 if (elim_cost
.cost
== 0)
4778 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4779 else if (TREE_CODE (bound
) == INTEGER_CST
)
4781 /* If we replace a loop condition 'i < n' with 'p < base + n',
4782 depends_on_elim will have 'base' and 'n' set, which implies
4783 that both 'base' and 'n' will be live during the loop. More likely,
4784 'base + n' will be loop invariant, resulting in only one live value
4785 during the loop. So in that case we clear depends_on_elim and set
4786 elim_inv_expr_id instead. */
4787 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4789 elim_inv_expr_id
= get_expr_id (data
, bound
);
4790 bitmap_clear (depends_on_elim
);
4792 /* The bound is a loop invariant, so it will be only computed
4794 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4797 elim_cost
= infinite_cost
;
4799 /* Try expressing the original giv. If it is compared with an invariant,
4800 note that we cannot get rid of it. */
4801 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4805 /* When the condition is a comparison of the candidate IV against
4806 zero, prefer this IV.
4808 TODO: The constant that we're subtracting from the cost should
4809 be target-dependent. This information should be added to the
4810 target costs for each backend. */
4811 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4812 && integer_zerop (*bound_cst
)
4813 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4814 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4815 elim_cost
.cost
-= 1;
4817 express_cost
= get_computation_cost (data
, use
, cand
, false,
4818 &depends_on_express
, NULL
,
4819 &express_inv_expr_id
);
4820 fd_ivopts_data
= data
;
4821 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4823 /* Count the cost of the original bound as well. */
4824 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4825 if (bound_cost
.cost
== 0)
4826 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4827 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4828 bound_cost
.cost
= 0;
4829 express_cost
.cost
+= bound_cost
.cost
;
4831 /* Choose the better approach, preferring the eliminated IV. */
4832 if (compare_costs (elim_cost
, express_cost
) <= 0)
4835 depends_on
= depends_on_elim
;
4836 depends_on_elim
= NULL
;
4837 inv_expr_id
= elim_inv_expr_id
;
4841 cost
= express_cost
;
4842 depends_on
= depends_on_express
;
4843 depends_on_express
= NULL
;
4846 inv_expr_id
= express_inv_expr_id
;
4849 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4851 if (depends_on_elim
)
4852 BITMAP_FREE (depends_on_elim
);
4853 if (depends_on_express
)
4854 BITMAP_FREE (depends_on_express
);
4856 return !infinite_cost_p (cost
);
4859 /* Determines cost of basing replacement of USE on CAND. Returns false
4860 if USE cannot be based on CAND. */
4863 determine_use_iv_cost (struct ivopts_data
*data
,
4864 struct iv_use
*use
, struct iv_cand
*cand
)
4868 case USE_NONLINEAR_EXPR
:
4869 return determine_use_iv_cost_generic (data
, use
, cand
);
4872 return determine_use_iv_cost_address (data
, use
, cand
);
4875 return determine_use_iv_cost_condition (data
, use
, cand
);
4882 /* Return true if get_computation_cost indicates that autoincrement is
4883 a possibility for the pair of USE and CAND, false otherwise. */
4886 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4887 struct iv_cand
*cand
)
4893 if (use
->type
!= USE_ADDRESS
)
4896 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4897 &can_autoinc
, NULL
);
4899 BITMAP_FREE (depends_on
);
4901 return !infinite_cost_p (cost
) && can_autoinc
;
4904 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4905 use that allows autoincrement, and set their AINC_USE if possible. */
4908 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4912 for (i
= 0; i
< n_iv_cands (data
); i
++)
4914 struct iv_cand
*cand
= iv_cand (data
, i
);
4915 struct iv_use
*closest_before
= NULL
;
4916 struct iv_use
*closest_after
= NULL
;
4917 if (cand
->pos
!= IP_ORIGINAL
)
4920 for (j
= 0; j
< n_iv_uses (data
); j
++)
4922 struct iv_use
*use
= iv_use (data
, j
);
4923 unsigned uid
= gimple_uid (use
->stmt
);
4925 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4928 if (uid
< gimple_uid (cand
->incremented_at
)
4929 && (closest_before
== NULL
4930 || uid
> gimple_uid (closest_before
->stmt
)))
4931 closest_before
= use
;
4933 if (uid
> gimple_uid (cand
->incremented_at
)
4934 && (closest_after
== NULL
4935 || uid
< gimple_uid (closest_after
->stmt
)))
4936 closest_after
= use
;
4939 if (closest_before
!= NULL
4940 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4941 cand
->ainc_use
= closest_before
;
4942 else if (closest_after
!= NULL
4943 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4944 cand
->ainc_use
= closest_after
;
4948 /* Finds the candidates for the induction variables. */
4951 find_iv_candidates (struct ivopts_data
*data
)
4953 /* Add commonly used ivs. */
4954 add_standard_iv_candidates (data
);
4956 /* Add old induction variables. */
4957 add_old_ivs_candidates (data
);
4959 /* Add induction variables derived from uses. */
4960 add_derived_ivs_candidates (data
);
4962 set_autoinc_for_original_candidates (data
);
4964 /* Record the important candidates. */
4965 record_important_candidates (data
);
4968 /* Determines costs of basing the use of the iv on an iv candidate. */
4971 determine_use_iv_costs (struct ivopts_data
*data
)
4975 struct iv_cand
*cand
;
4976 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4978 alloc_use_cost_map (data
);
4980 for (i
= 0; i
< n_iv_uses (data
); i
++)
4982 use
= iv_use (data
, i
);
4984 if (data
->consider_all_candidates
)
4986 for (j
= 0; j
< n_iv_cands (data
); j
++)
4988 cand
= iv_cand (data
, j
);
4989 determine_use_iv_cost (data
, use
, cand
);
4996 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4998 cand
= iv_cand (data
, j
);
4999 if (!determine_use_iv_cost (data
, use
, cand
))
5000 bitmap_set_bit (to_clear
, j
);
5003 /* Remove the candidates for that the cost is infinite from
5004 the list of related candidates. */
5005 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5006 bitmap_clear (to_clear
);
5010 BITMAP_FREE (to_clear
);
5012 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5014 fprintf (dump_file
, "Use-candidate costs:\n");
5016 for (i
= 0; i
< n_iv_uses (data
); i
++)
5018 use
= iv_use (data
, i
);
5020 fprintf (dump_file
, "Use %d:\n", i
);
5021 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5022 for (j
= 0; j
< use
->n_map_members
; j
++)
5024 if (!use
->cost_map
[j
].cand
5025 || infinite_cost_p (use
->cost_map
[j
].cost
))
5028 fprintf (dump_file
, " %d\t%d\t%d\t",
5029 use
->cost_map
[j
].cand
->id
,
5030 use
->cost_map
[j
].cost
.cost
,
5031 use
->cost_map
[j
].cost
.complexity
);
5032 if (use
->cost_map
[j
].depends_on
)
5033 bitmap_print (dump_file
,
5034 use
->cost_map
[j
].depends_on
, "","");
5035 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5036 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5037 fprintf (dump_file
, "\n");
5040 fprintf (dump_file
, "\n");
5042 fprintf (dump_file
, "\n");
5046 /* Determines cost of the candidate CAND. */
5049 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5051 comp_cost cost_base
;
5052 unsigned cost
, cost_step
;
5061 /* There are two costs associated with the candidate -- its increment
5062 and its initialization. The second is almost negligible for any loop
5063 that rolls enough, so we take it just very little into account. */
5065 base
= cand
->iv
->base
;
5066 cost_base
= force_var_cost (data
, base
, NULL
);
5067 /* It will be exceptional that the iv register happens to be initialized with
5068 the proper value at no cost. In general, there will at least be a regcopy
5070 if (cost_base
.cost
== 0)
5071 cost_base
.cost
= COSTS_N_INSNS (1);
5072 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5074 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5076 /* Prefer the original ivs unless we may gain something by replacing it.
5077 The reason is to make debugging simpler; so this is not relevant for
5078 artificial ivs created by other optimization passes. */
5079 if (cand
->pos
!= IP_ORIGINAL
5080 || !SSA_NAME_VAR (cand
->var_before
)
5081 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5084 /* Prefer not to insert statements into latch unless there are some
5085 already (so that we do not create unnecessary jumps). */
5086 if (cand
->pos
== IP_END
5087 && empty_block_p (ip_end_pos (data
->current_loop
)))
5091 cand
->cost_step
= cost_step
;
5094 /* Determines costs of computation of the candidates. */
5097 determine_iv_costs (struct ivopts_data
*data
)
5101 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5103 fprintf (dump_file
, "Candidate costs:\n");
5104 fprintf (dump_file
, " cand\tcost\n");
5107 for (i
= 0; i
< n_iv_cands (data
); i
++)
5109 struct iv_cand
*cand
= iv_cand (data
, i
);
5111 determine_iv_cost (data
, cand
);
5113 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5114 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5118 fprintf (dump_file
, "\n");
5121 /* Calculates cost for having SIZE induction variables. */
5124 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5126 /* We add size to the cost, so that we prefer eliminating ivs
5128 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5129 data
->body_includes_call
);
5132 /* For each size of the induction variable set determine the penalty. */
5135 determine_set_costs (struct ivopts_data
*data
)
5139 gimple_stmt_iterator psi
;
5141 struct loop
*loop
= data
->current_loop
;
5144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5146 fprintf (dump_file
, "Global costs:\n");
5147 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5148 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5149 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5150 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5154 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5156 phi
= gsi_stmt (psi
);
5157 op
= PHI_RESULT (phi
);
5159 if (virtual_operand_p (op
))
5162 if (get_iv (data
, op
))
5168 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5170 struct version_info
*info
= ver_info (data
, j
);
5172 if (info
->inv_id
&& info
->has_nonlin_use
)
5176 data
->regs_used
= n
;
5177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5178 fprintf (dump_file
, " regs_used %d\n", n
);
5180 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5182 fprintf (dump_file
, " cost for size:\n");
5183 fprintf (dump_file
, " ivs\tcost\n");
5184 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5185 fprintf (dump_file
, " %d\t%d\n", j
,
5186 ivopts_global_cost_for_size (data
, j
));
5187 fprintf (dump_file
, "\n");
5191 /* Returns true if A is a cheaper cost pair than B. */
5194 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5204 cmp
= compare_costs (a
->cost
, b
->cost
);
5211 /* In case the costs are the same, prefer the cheaper candidate. */
5212 if (a
->cand
->cost
< b
->cand
->cost
)
5219 /* Returns candidate by that USE is expressed in IVS. */
5221 static struct cost_pair
*
5222 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5224 return ivs
->cand_for_use
[use
->id
];
5227 /* Computes the cost field of IVS structure. */
5230 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5232 comp_cost cost
= ivs
->cand_use_cost
;
5234 cost
.cost
+= ivs
->cand_cost
;
5236 cost
.cost
+= ivopts_global_cost_for_size (data
,
5237 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5242 /* Remove invariants in set INVS to set IVS. */
5245 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5253 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5255 ivs
->n_invariant_uses
[iid
]--;
5256 if (ivs
->n_invariant_uses
[iid
] == 0)
5261 /* Set USE not to be expressed by any candidate in IVS. */
5264 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5267 unsigned uid
= use
->id
, cid
;
5268 struct cost_pair
*cp
;
5270 cp
= ivs
->cand_for_use
[uid
];
5276 ivs
->cand_for_use
[uid
] = NULL
;
5277 ivs
->n_cand_uses
[cid
]--;
5279 if (ivs
->n_cand_uses
[cid
] == 0)
5281 bitmap_clear_bit (ivs
->cands
, cid
);
5282 /* Do not count the pseudocandidates. */
5286 ivs
->cand_cost
-= cp
->cand
->cost
;
5288 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5291 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5293 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5295 if (cp
->inv_expr_id
!= -1)
5297 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5298 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5299 ivs
->num_used_inv_expr
--;
5301 iv_ca_recount_cost (data
, ivs
);
5304 /* Add invariants in set INVS to set IVS. */
5307 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5315 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5317 ivs
->n_invariant_uses
[iid
]++;
5318 if (ivs
->n_invariant_uses
[iid
] == 1)
5323 /* Set cost pair for USE in set IVS to CP. */
5326 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5327 struct iv_use
*use
, struct cost_pair
*cp
)
5329 unsigned uid
= use
->id
, cid
;
5331 if (ivs
->cand_for_use
[uid
] == cp
)
5334 if (ivs
->cand_for_use
[uid
])
5335 iv_ca_set_no_cp (data
, ivs
, use
);
5342 ivs
->cand_for_use
[uid
] = cp
;
5343 ivs
->n_cand_uses
[cid
]++;
5344 if (ivs
->n_cand_uses
[cid
] == 1)
5346 bitmap_set_bit (ivs
->cands
, cid
);
5347 /* Do not count the pseudocandidates. */
5351 ivs
->cand_cost
+= cp
->cand
->cost
;
5353 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5356 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5357 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5359 if (cp
->inv_expr_id
!= -1)
5361 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5362 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5363 ivs
->num_used_inv_expr
++;
5365 iv_ca_recount_cost (data
, ivs
);
5369 /* Extend set IVS by expressing USE by some of the candidates in it
5370 if possible. All important candidates will be considered
5371 if IMPORTANT_CANDIDATES is true. */
5374 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5375 struct iv_use
*use
, bool important_candidates
)
5377 struct cost_pair
*best_cp
= NULL
, *cp
;
5382 gcc_assert (ivs
->upto
>= use
->id
);
5384 if (ivs
->upto
== use
->id
)
5390 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5391 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5393 struct iv_cand
*cand
= iv_cand (data
, i
);
5395 cp
= get_use_iv_cost (data
, use
, cand
);
5397 if (cheaper_cost_pair (cp
, best_cp
))
5401 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5404 /* Get cost for assignment IVS. */
5407 iv_ca_cost (struct iv_ca
*ivs
)
5409 /* This was a conditional expression but it triggered a bug in
5412 return infinite_cost
;
5417 /* Returns true if all dependences of CP are among invariants in IVS. */
5420 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5425 if (!cp
->depends_on
)
5428 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5430 if (ivs
->n_invariant_uses
[i
] == 0)
5437 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5438 it before NEXT_CHANGE. */
5440 static struct iv_ca_delta
*
5441 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5442 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5444 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5447 change
->old_cp
= old_cp
;
5448 change
->new_cp
= new_cp
;
5449 change
->next_change
= next_change
;
5454 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5457 static struct iv_ca_delta
*
5458 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5460 struct iv_ca_delta
*last
;
5468 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5470 last
->next_change
= l2
;
5475 /* Reverse the list of changes DELTA, forming the inverse to it. */
5477 static struct iv_ca_delta
*
5478 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5480 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5481 struct cost_pair
*tmp
;
5483 for (act
= delta
; act
; act
= next
)
5485 next
= act
->next_change
;
5486 act
->next_change
= prev
;
5490 act
->old_cp
= act
->new_cp
;
5497 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5498 reverted instead. */
5501 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5502 struct iv_ca_delta
*delta
, bool forward
)
5504 struct cost_pair
*from
, *to
;
5505 struct iv_ca_delta
*act
;
5508 delta
= iv_ca_delta_reverse (delta
);
5510 for (act
= delta
; act
; act
= act
->next_change
)
5514 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5515 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5519 iv_ca_delta_reverse (delta
);
5522 /* Returns true if CAND is used in IVS. */
5525 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5527 return ivs
->n_cand_uses
[cand
->id
] > 0;
5530 /* Returns number of induction variable candidates in the set IVS. */
5533 iv_ca_n_cands (struct iv_ca
*ivs
)
5535 return ivs
->n_cands
;
5538 /* Free the list of changes DELTA. */
5541 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5543 struct iv_ca_delta
*act
, *next
;
5545 for (act
= *delta
; act
; act
= next
)
5547 next
= act
->next_change
;
5554 /* Allocates new iv candidates assignment. */
5556 static struct iv_ca
*
5557 iv_ca_new (struct ivopts_data
*data
)
5559 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5563 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5564 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5565 nw
->cands
= BITMAP_ALLOC (NULL
);
5568 nw
->cand_use_cost
= no_cost
;
5570 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5572 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5573 nw
->num_used_inv_expr
= 0;
5578 /* Free memory occupied by the set IVS. */
5581 iv_ca_free (struct iv_ca
**ivs
)
5583 free ((*ivs
)->cand_for_use
);
5584 free ((*ivs
)->n_cand_uses
);
5585 BITMAP_FREE ((*ivs
)->cands
);
5586 free ((*ivs
)->n_invariant_uses
);
5587 free ((*ivs
)->used_inv_expr
);
5592 /* Dumps IVS to FILE. */
5595 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5597 const char *pref
= " invariants ";
5599 comp_cost cost
= iv_ca_cost (ivs
);
5601 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5602 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5603 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5604 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5606 for (i
= 0; i
< ivs
->upto
; i
++)
5608 struct iv_use
*use
= iv_use (data
, i
);
5609 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5611 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5612 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5614 fprintf (file
, " use:%d --> ??\n", use
->id
);
5617 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5618 if (ivs
->n_invariant_uses
[i
])
5620 fprintf (file
, "%s%d", pref
, i
);
5623 fprintf (file
, "\n\n");
5626 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5627 new set, and store differences in DELTA. Number of induction variables
5628 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5629 the function will try to find a solution with mimimal iv candidates. */
5632 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5633 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5634 unsigned *n_ivs
, bool min_ncand
)
5639 struct cost_pair
*old_cp
, *new_cp
;
5642 for (i
= 0; i
< ivs
->upto
; i
++)
5644 use
= iv_use (data
, i
);
5645 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5648 && old_cp
->cand
== cand
)
5651 new_cp
= get_use_iv_cost (data
, use
, cand
);
5655 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5658 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5661 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5664 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5665 cost
= iv_ca_cost (ivs
);
5667 *n_ivs
= iv_ca_n_cands (ivs
);
5668 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5673 /* Try narrowing set IVS by removing CAND. Return the cost of
5674 the new set and store the differences in DELTA. */
5677 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5678 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5682 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5684 struct iv_cand
*cnd
;
5688 for (i
= 0; i
< n_iv_uses (data
); i
++)
5690 use
= iv_use (data
, i
);
5692 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5693 if (old_cp
->cand
!= cand
)
5698 if (data
->consider_all_candidates
)
5700 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5705 cnd
= iv_cand (data
, ci
);
5707 cp
= get_use_iv_cost (data
, use
, cnd
);
5711 if (!iv_ca_has_deps (ivs
, cp
))
5714 if (!cheaper_cost_pair (cp
, new_cp
))
5722 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5727 cnd
= iv_cand (data
, ci
);
5729 cp
= get_use_iv_cost (data
, use
, cnd
);
5732 if (!iv_ca_has_deps (ivs
, cp
))
5735 if (!cheaper_cost_pair (cp
, new_cp
))
5744 iv_ca_delta_free (delta
);
5745 return infinite_cost
;
5748 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5751 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5752 cost
= iv_ca_cost (ivs
);
5753 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5758 /* Try optimizing the set of candidates IVS by removing candidates different
5759 from to EXCEPT_CAND from it. Return cost of the new set, and store
5760 differences in DELTA. */
5763 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5764 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5767 struct iv_ca_delta
*act_delta
, *best_delta
;
5769 comp_cost best_cost
, acost
;
5770 struct iv_cand
*cand
;
5773 best_cost
= iv_ca_cost (ivs
);
5775 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5777 cand
= iv_cand (data
, i
);
5779 if (cand
== except_cand
)
5782 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5784 if (compare_costs (acost
, best_cost
) < 0)
5787 iv_ca_delta_free (&best_delta
);
5788 best_delta
= act_delta
;
5791 iv_ca_delta_free (&act_delta
);
5800 /* Recurse to possibly remove other unnecessary ivs. */
5801 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5802 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5803 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5804 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5808 /* Tries to extend the sets IVS in the best possible way in order
5809 to express the USE. If ORIGINALP is true, prefer candidates from
5810 the original set of IVs, otherwise favor important candidates not
5811 based on any memory object. */
5814 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5815 struct iv_use
*use
, bool originalp
)
5817 comp_cost best_cost
, act_cost
;
5820 struct iv_cand
*cand
;
5821 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5822 struct cost_pair
*cp
;
5824 iv_ca_add_use (data
, ivs
, use
, false);
5825 best_cost
= iv_ca_cost (ivs
);
5827 cp
= iv_ca_cand_for_use (ivs
, use
);
5832 iv_ca_add_use (data
, ivs
, use
, true);
5833 best_cost
= iv_ca_cost (ivs
);
5834 cp
= iv_ca_cand_for_use (ivs
, use
);
5838 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5839 iv_ca_set_no_cp (data
, ivs
, use
);
5842 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5843 first try important candidates not based on any memory object. Only if
5844 this fails, try the specific ones. Rationale -- in loops with many
5845 variables the best choice often is to use just one generic biv. If we
5846 added here many ivs specific to the uses, the optimization algorithm later
5847 would be likely to get stuck in a local minimum, thus causing us to create
5848 too many ivs. The approach from few ivs to more seems more likely to be
5849 successful -- starting from few ivs, replacing an expensive use by a
5850 specific iv should always be a win. */
5851 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5853 cand
= iv_cand (data
, i
);
5855 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5858 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5861 if (iv_ca_cand_used_p (ivs
, cand
))
5864 cp
= get_use_iv_cost (data
, use
, cand
);
5868 iv_ca_set_cp (data
, ivs
, use
, cp
);
5869 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5871 iv_ca_set_no_cp (data
, ivs
, use
);
5872 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5874 if (compare_costs (act_cost
, best_cost
) < 0)
5876 best_cost
= act_cost
;
5878 iv_ca_delta_free (&best_delta
);
5879 best_delta
= act_delta
;
5882 iv_ca_delta_free (&act_delta
);
5885 if (infinite_cost_p (best_cost
))
5887 for (i
= 0; i
< use
->n_map_members
; i
++)
5889 cp
= use
->cost_map
+ i
;
5894 /* Already tried this. */
5895 if (cand
->important
)
5897 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5899 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5903 if (iv_ca_cand_used_p (ivs
, cand
))
5907 iv_ca_set_cp (data
, ivs
, use
, cp
);
5908 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5909 iv_ca_set_no_cp (data
, ivs
, use
);
5910 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5913 if (compare_costs (act_cost
, best_cost
) < 0)
5915 best_cost
= act_cost
;
5918 iv_ca_delta_free (&best_delta
);
5919 best_delta
= act_delta
;
5922 iv_ca_delta_free (&act_delta
);
5926 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5927 iv_ca_delta_free (&best_delta
);
5929 return !infinite_cost_p (best_cost
);
5932 /* Finds an initial assignment of candidates to uses. */
5934 static struct iv_ca
*
5935 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5937 struct iv_ca
*ivs
= iv_ca_new (data
);
5940 for (i
= 0; i
< n_iv_uses (data
); i
++)
5941 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5950 /* Tries to improve set of induction variables IVS. */
5953 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5956 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5957 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5958 struct iv_cand
*cand
;
5960 /* Try extending the set of induction variables by one. */
5961 for (i
= 0; i
< n_iv_cands (data
); i
++)
5963 cand
= iv_cand (data
, i
);
5965 if (iv_ca_cand_used_p (ivs
, cand
))
5968 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5972 /* If we successfully added the candidate and the set is small enough,
5973 try optimizing it by removing other candidates. */
5974 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5976 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5977 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5978 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5979 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5982 if (compare_costs (acost
, best_cost
) < 0)
5985 iv_ca_delta_free (&best_delta
);
5986 best_delta
= act_delta
;
5989 iv_ca_delta_free (&act_delta
);
5994 /* Try removing the candidates from the set instead. */
5995 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5997 /* Nothing more we can do. */
6002 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6003 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6004 iv_ca_delta_free (&best_delta
);
6008 /* Attempts to find the optimal set of induction variables. We do simple
6009 greedy heuristic -- we try to replace at most one candidate in the selected
6010 solution and remove the unused ivs while this improves the cost. */
6012 static struct iv_ca
*
6013 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6017 /* Get the initial solution. */
6018 set
= get_initial_solution (data
, originalp
);
6021 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6022 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6028 fprintf (dump_file
, "Initial set of candidates:\n");
6029 iv_ca_dump (data
, dump_file
, set
);
6032 while (try_improve_iv_set (data
, set
))
6034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6036 fprintf (dump_file
, "Improved to:\n");
6037 iv_ca_dump (data
, dump_file
, set
);
6044 static struct iv_ca
*
6045 find_optimal_iv_set (struct ivopts_data
*data
)
6048 struct iv_ca
*set
, *origset
;
6050 comp_cost cost
, origcost
;
6052 /* Determine the cost based on a strategy that starts with original IVs,
6053 and try again using a strategy that prefers candidates not based
6055 origset
= find_optimal_iv_set_1 (data
, true);
6056 set
= find_optimal_iv_set_1 (data
, false);
6058 if (!origset
&& !set
)
6061 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6062 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6064 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6066 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6067 origcost
.cost
, origcost
.complexity
);
6068 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6069 cost
.cost
, cost
.complexity
);
6072 /* Choose the one with the best cost. */
6073 if (compare_costs (origcost
, cost
) <= 0)
6080 iv_ca_free (&origset
);
6082 for (i
= 0; i
< n_iv_uses (data
); i
++)
6084 use
= iv_use (data
, i
);
6085 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6091 /* Creates a new induction variable corresponding to CAND. */
6094 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6096 gimple_stmt_iterator incr_pos
;
6106 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6110 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6118 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6122 /* Mark that the iv is preserved. */
6123 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6124 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6126 /* Rewrite the increment so that it uses var_before directly. */
6127 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6131 gimple_add_tmp_var (cand
->var_before
);
6133 base
= unshare_expr (cand
->iv
->base
);
6135 create_iv (base
, unshare_expr (cand
->iv
->step
),
6136 cand
->var_before
, data
->current_loop
,
6137 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6140 /* Creates new induction variables described in SET. */
6143 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6146 struct iv_cand
*cand
;
6149 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6151 cand
= iv_cand (data
, i
);
6152 create_new_iv (data
, cand
);
6155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6157 fprintf (dump_file
, "\nSelected IV set: \n");
6158 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6160 cand
= iv_cand (data
, i
);
6161 dump_cand (dump_file
, cand
);
6163 fprintf (dump_file
, "\n");
6167 /* Rewrites USE (definition of iv used in a nonlinear expression)
6168 using candidate CAND. */
6171 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6172 struct iv_use
*use
, struct iv_cand
*cand
)
6177 gimple_stmt_iterator bsi
;
6179 /* An important special case -- if we are asked to express value of
6180 the original iv by itself, just exit; there is no need to
6181 introduce a new computation (that might also need casting the
6182 variable to unsigned and back). */
6183 if (cand
->pos
== IP_ORIGINAL
6184 && cand
->incremented_at
== use
->stmt
)
6186 enum tree_code stmt_code
;
6188 gcc_assert (is_gimple_assign (use
->stmt
));
6189 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6191 /* Check whether we may leave the computation unchanged.
6192 This is the case only if it does not rely on other
6193 computations in the loop -- otherwise, the computation
6194 we rely upon may be removed in remove_unused_ivs,
6195 thus leading to ICE. */
6196 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6197 if (stmt_code
== PLUS_EXPR
6198 || stmt_code
== MINUS_EXPR
6199 || stmt_code
== POINTER_PLUS_EXPR
)
6201 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6202 op
= gimple_assign_rhs2 (use
->stmt
);
6203 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6204 op
= gimple_assign_rhs1 (use
->stmt
);
6211 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6215 comp
= get_computation (data
->current_loop
, use
, cand
);
6216 gcc_assert (comp
!= NULL_TREE
);
6218 switch (gimple_code (use
->stmt
))
6221 tgt
= PHI_RESULT (use
->stmt
);
6223 /* If we should keep the biv, do not replace it. */
6224 if (name_info (data
, tgt
)->preserve_biv
)
6227 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6231 tgt
= gimple_assign_lhs (use
->stmt
);
6232 bsi
= gsi_for_stmt (use
->stmt
);
6239 if (!valid_gimple_rhs_p (comp
)
6240 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6241 /* We can't allow re-allocating the stmt as it might be pointed
6243 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6244 >= gimple_num_ops (gsi_stmt (bsi
)))))
6246 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6247 true, GSI_SAME_STMT
);
6248 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6250 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6251 /* As this isn't a plain copy we have to reset alignment
6253 if (SSA_NAME_PTR_INFO (comp
))
6254 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6258 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6260 ass
= gimple_build_assign (tgt
, comp
);
6261 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6263 bsi
= gsi_for_stmt (use
->stmt
);
6264 remove_phi_node (&bsi
, false);
6268 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6269 use
->stmt
= gsi_stmt (bsi
);
6273 /* Performs a peephole optimization to reorder the iv update statement with
6274 a mem ref to enable instruction combining in later phases. The mem ref uses
6275 the iv value before the update, so the reordering transformation requires
6276 adjustment of the offset. CAND is the selected IV_CAND.
6280 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6288 directly propagating t over to (1) will introduce overlapping live range
6289 thus increase register pressure. This peephole transform it into:
6293 t = MEM_REF (base, iv2, 8, 8);
6300 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6303 gimple iv_update
, stmt
;
6305 gimple_stmt_iterator gsi
, gsi_iv
;
6307 if (cand
->pos
!= IP_NORMAL
)
6310 var_after
= cand
->var_after
;
6311 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6313 bb
= gimple_bb (iv_update
);
6314 gsi
= gsi_last_nondebug_bb (bb
);
6315 stmt
= gsi_stmt (gsi
);
6317 /* Only handle conditional statement for now. */
6318 if (gimple_code (stmt
) != GIMPLE_COND
)
6321 gsi_prev_nondebug (&gsi
);
6322 stmt
= gsi_stmt (gsi
);
6323 if (stmt
!= iv_update
)
6326 gsi_prev_nondebug (&gsi
);
6327 if (gsi_end_p (gsi
))
6330 stmt
= gsi_stmt (gsi
);
6331 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6334 if (stmt
!= use
->stmt
)
6337 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6340 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6342 fprintf (dump_file
, "Reordering \n");
6343 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6344 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6345 fprintf (dump_file
, "\n");
6348 gsi
= gsi_for_stmt (use
->stmt
);
6349 gsi_iv
= gsi_for_stmt (iv_update
);
6350 gsi_move_before (&gsi_iv
, &gsi
);
6352 cand
->pos
= IP_BEFORE_USE
;
6353 cand
->incremented_at
= use
->stmt
;
6356 /* Rewrites USE (address that is an iv) using candidate CAND. */
6359 rewrite_use_address (struct ivopts_data
*data
,
6360 struct iv_use
*use
, struct iv_cand
*cand
)
6363 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6364 tree base_hint
= NULL_TREE
;
6368 adjust_iv_update_pos (cand
, use
);
6369 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6371 unshare_aff_combination (&aff
);
6373 /* To avoid undefined overflow problems, all IV candidates use unsigned
6374 integer types. The drawback is that this makes it impossible for
6375 create_mem_ref to distinguish an IV that is based on a memory object
6376 from one that represents simply an offset.
6378 To work around this problem, we pass a hint to create_mem_ref that
6379 indicates which variable (if any) in aff is an IV based on a memory
6380 object. Note that we only consider the candidate. If this is not
6381 based on an object, the base of the reference is in some subexpression
6382 of the use -- but these will use pointer types, so they are recognized
6383 by the create_mem_ref heuristics anyway. */
6384 if (cand
->iv
->base_object
)
6385 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6387 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6388 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6389 reference_alias_ptr_type (*use
->op_p
),
6390 iv
, base_hint
, data
->speed
);
6391 copy_ref_info (ref
, *use
->op_p
);
6395 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6399 rewrite_use_compare (struct ivopts_data
*data
,
6400 struct iv_use
*use
, struct iv_cand
*cand
)
6402 tree comp
, *var_p
, op
, bound
;
6403 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6404 enum tree_code compare
;
6405 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6411 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6412 tree var_type
= TREE_TYPE (var
);
6415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6417 fprintf (dump_file
, "Replacing exit test: ");
6418 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6421 bound
= unshare_expr (fold_convert (var_type
, bound
));
6422 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6424 gsi_insert_seq_on_edge_immediate (
6425 loop_preheader_edge (data
->current_loop
),
6428 gimple_cond_set_lhs (use
->stmt
, var
);
6429 gimple_cond_set_code (use
->stmt
, compare
);
6430 gimple_cond_set_rhs (use
->stmt
, op
);
6434 /* The induction variable elimination failed; just express the original
6436 comp
= get_computation (data
->current_loop
, use
, cand
);
6437 gcc_assert (comp
!= NULL_TREE
);
6439 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6442 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6443 true, GSI_SAME_STMT
);
6446 /* Rewrites USE using candidate CAND. */
6449 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6453 case USE_NONLINEAR_EXPR
:
6454 rewrite_use_nonlinear_expr (data
, use
, cand
);
6458 rewrite_use_address (data
, use
, cand
);
6462 rewrite_use_compare (data
, use
, cand
);
6469 update_stmt (use
->stmt
);
6472 /* Rewrite the uses using the selected induction variables. */
6475 rewrite_uses (struct ivopts_data
*data
)
6478 struct iv_cand
*cand
;
6481 for (i
= 0; i
< n_iv_uses (data
); i
++)
6483 use
= iv_use (data
, i
);
6484 cand
= use
->selected
;
6487 rewrite_use (data
, use
, cand
);
6491 /* Removes the ivs that are not used after rewriting. */
6494 remove_unused_ivs (struct ivopts_data
*data
)
6498 bitmap toremove
= BITMAP_ALLOC (NULL
);
6500 /* Figure out an order in which to release SSA DEFs so that we don't
6501 release something that we'd have to propagate into a debug stmt
6503 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6505 struct version_info
*info
;
6507 info
= ver_info (data
, j
);
6509 && !integer_zerop (info
->iv
->step
)
6511 && !info
->iv
->have_use_for
6512 && !info
->preserve_biv
)
6514 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6516 tree def
= info
->iv
->ssa_name
;
6518 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6520 imm_use_iterator imm_iter
;
6521 use_operand_p use_p
;
6525 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6527 if (!gimple_debug_bind_p (stmt
))
6530 /* We just want to determine whether to do nothing
6531 (count == 0), to substitute the computed
6532 expression into a single use of the SSA DEF by
6533 itself (count == 1), or to use a debug temp
6534 because the SSA DEF is used multiple times or as
6535 part of a larger expression (count > 1). */
6537 if (gimple_debug_bind_get_value (stmt
) != def
)
6541 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6547 struct iv_use dummy_use
;
6548 struct iv_cand
*best_cand
= NULL
, *cand
;
6549 unsigned i
, best_pref
= 0, cand_pref
;
6551 memset (&dummy_use
, 0, sizeof (dummy_use
));
6552 dummy_use
.iv
= info
->iv
;
6553 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6555 cand
= iv_use (data
, i
)->selected
;
6556 if (cand
== best_cand
)
6558 cand_pref
= operand_equal_p (cand
->iv
->step
,
6562 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6563 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6566 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6568 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6571 best_pref
= cand_pref
;
6578 tree comp
= get_computation_at (data
->current_loop
,
6579 &dummy_use
, best_cand
,
6580 SSA_NAME_DEF_STMT (def
));
6586 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6587 DECL_ARTIFICIAL (vexpr
) = 1;
6588 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6589 if (SSA_NAME_VAR (def
))
6590 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6592 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6593 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6594 gimple_stmt_iterator gsi
;
6596 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6597 gsi
= gsi_after_labels (gimple_bb
6598 (SSA_NAME_DEF_STMT (def
)));
6600 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6602 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6606 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6608 if (!gimple_debug_bind_p (stmt
))
6611 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6612 SET_USE (use_p
, comp
);
6620 release_defs_bitset (toremove
);
6622 BITMAP_FREE (toremove
);
6625 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6626 for pointer_map_traverse. */
6629 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6630 void *data ATTRIBUTE_UNUSED
)
6632 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6638 /* Frees data allocated by the optimization of a single loop. */
6641 free_loop_data (struct ivopts_data
*data
)
6649 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6650 pointer_map_destroy (data
->niters
);
6651 data
->niters
= NULL
;
6654 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6656 struct version_info
*info
;
6658 info
= ver_info (data
, i
);
6661 info
->has_nonlin_use
= false;
6662 info
->preserve_biv
= false;
6665 bitmap_clear (data
->relevant
);
6666 bitmap_clear (data
->important_candidates
);
6668 for (i
= 0; i
< n_iv_uses (data
); i
++)
6670 struct iv_use
*use
= iv_use (data
, i
);
6673 BITMAP_FREE (use
->related_cands
);
6674 for (j
= 0; j
< use
->n_map_members
; j
++)
6675 if (use
->cost_map
[j
].depends_on
)
6676 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6677 free (use
->cost_map
);
6680 data
->iv_uses
.truncate (0);
6682 for (i
= 0; i
< n_iv_cands (data
); i
++)
6684 struct iv_cand
*cand
= iv_cand (data
, i
);
6687 if (cand
->depends_on
)
6688 BITMAP_FREE (cand
->depends_on
);
6691 data
->iv_candidates
.truncate (0);
6693 if (data
->version_info_size
< num_ssa_names
)
6695 data
->version_info_size
= 2 * num_ssa_names
;
6696 free (data
->version_info
);
6697 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6700 data
->max_inv_id
= 0;
6702 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6703 SET_DECL_RTL (obj
, NULL_RTX
);
6705 decl_rtl_to_reset
.truncate (0);
6707 data
->inv_expr_tab
.empty ();
6708 data
->inv_expr_id
= 0;
6711 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6715 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6717 free_loop_data (data
);
6718 free (data
->version_info
);
6719 BITMAP_FREE (data
->relevant
);
6720 BITMAP_FREE (data
->important_candidates
);
6722 decl_rtl_to_reset
.release ();
6723 data
->iv_uses
.release ();
6724 data
->iv_candidates
.release ();
6725 data
->inv_expr_tab
.dispose ();
6728 /* Returns true if the loop body BODY includes any function calls. */
6731 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6733 gimple_stmt_iterator gsi
;
6736 for (i
= 0; i
< num_nodes
; i
++)
6737 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6739 gimple stmt
= gsi_stmt (gsi
);
6740 if (is_gimple_call (stmt
)
6741 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6747 /* Optimizes the LOOP. Returns true if anything changed. */
6750 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6752 bool changed
= false;
6753 struct iv_ca
*iv_ca
;
6754 edge exit
= single_dom_exit (loop
);
6757 gcc_assert (!data
->niters
);
6758 data
->current_loop
= loop
;
6759 data
->speed
= optimize_loop_for_speed_p (loop
);
6761 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6763 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6767 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6768 exit
->src
->index
, exit
->dest
->index
);
6769 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6770 fprintf (dump_file
, "\n");
6773 fprintf (dump_file
, "\n");
6776 body
= get_loop_body (loop
);
6777 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6778 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6781 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6783 /* For each ssa name determines whether it behaves as an induction variable
6785 if (!find_induction_variables (data
))
6788 /* Finds interesting uses (item 1). */
6789 find_interesting_uses (data
);
6790 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6793 /* Finds candidates for the induction variables (item 2). */
6794 find_iv_candidates (data
);
6796 /* Calculates the costs (item 3, part 1). */
6797 determine_iv_costs (data
);
6798 determine_use_iv_costs (data
);
6799 determine_set_costs (data
);
6801 /* Find the optimal set of induction variables (item 3, part 2). */
6802 iv_ca
= find_optimal_iv_set (data
);
6807 /* Create the new induction variables (item 4, part 1). */
6808 create_new_ivs (data
, iv_ca
);
6809 iv_ca_free (&iv_ca
);
6811 /* Rewrite the uses (item 4, part 2). */
6812 rewrite_uses (data
);
6814 /* Remove the ivs that are unused after rewriting. */
6815 remove_unused_ivs (data
);
6817 /* We have changed the structure of induction variables; it might happen
6818 that definitions in the scev database refer to some of them that were
6823 free_loop_data (data
);
6828 /* Main entry point. Optimizes induction variables in loops. */
6831 tree_ssa_iv_optimize (void)
6834 struct ivopts_data data
;
6837 tree_ssa_iv_optimize_init (&data
);
6839 /* Optimize the loops starting with the innermost ones. */
6840 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6843 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6845 tree_ssa_iv_optimize_loop (&data
, loop
);
6848 tree_ssa_iv_optimize_finalize (&data
);