1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
74 #include "gimple-iterator.h"
75 #include "gimplify-me.h"
76 #include "gimple-ssa.h"
79 #include "tree-phinodes.h"
80 #include "ssa-iterators.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-ivopts.h"
83 #include "tree-ssa-loop-manip.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
89 #include "tree-pass.h"
91 #include "insn-config.h"
92 #include "pointer-set.h"
93 #include "hash-table.h"
94 #include "tree-chrec.h"
95 #include "tree-scalar-evolution.h"
98 #include "langhooks.h"
99 #include "tree-affine.h"
101 #include "tree-inline.h"
102 #include "tree-ssa-propagate.h"
104 #include "tree-ssa-address.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
112 /* The infinite cost. */
113 #define INFTY 10000000
115 #define AVG_LOOP_NITER(LOOP) 5
117 /* Returns the expected number of loop iterations for LOOP.
118 The average trip count is computed from profile data if it
121 static inline HOST_WIDE_INT
122 avg_loop_niter (struct loop
*loop
)
124 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
126 return AVG_LOOP_NITER (loop
);
131 /* Representation of the induction variable. */
134 tree base
; /* Initial value of the iv. */
135 tree base_object
; /* A memory object to that the induction variable points. */
136 tree step
; /* Step of the iv (constant only). */
137 tree ssa_name
; /* The ssa name with the value. */
138 bool biv_p
; /* Is it a biv? */
139 bool have_use_for
; /* Do we already have a use for it? */
140 unsigned use_id
; /* The identifier in the use if it is the case. */
143 /* Per-ssa version information (induction variable descriptions, etc.). */
146 tree name
; /* The ssa name. */
147 struct iv
*iv
; /* Induction variable description. */
148 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
149 an expression that is not an induction variable. */
150 bool preserve_biv
; /* For the original biv, whether to preserve it. */
151 unsigned inv_id
; /* Id of an invariant. */
157 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
158 USE_ADDRESS
, /* Use in an address. */
159 USE_COMPARE
/* Use is a compare. */
162 /* Cost of a computation. */
165 int cost
; /* The runtime cost. */
166 unsigned complexity
; /* The estimate of the complexity of the code for
167 the computation (in no concrete units --
168 complexity field should be larger for more
169 complex expressions and addressing modes). */
172 static const comp_cost no_cost
= {0, 0};
173 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
175 /* The candidate - cost pair. */
178 struct iv_cand
*cand
; /* The candidate. */
179 comp_cost cost
; /* The cost. */
180 bitmap depends_on
; /* The list of invariants that have to be
182 tree value
; /* For final value elimination, the expression for
183 the final value of the iv. For iv elimination,
184 the new bound to compare with. */
185 enum tree_code comp
; /* For iv elimination, the comparison. */
186 int inv_expr_id
; /* Loop invariant expression id. */
192 unsigned id
; /* The id of the use. */
193 enum use_type type
; /* Type of the use. */
194 struct iv
*iv
; /* The induction variable it is based on. */
195 gimple stmt
; /* Statement in that it occurs. */
196 tree
*op_p
; /* The place where it occurs. */
197 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
200 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
201 struct cost_pair
*cost_map
;
202 /* The costs wrto the iv candidates. */
204 struct iv_cand
*selected
;
205 /* The selected candidate. */
208 /* The position where the iv is computed. */
211 IP_NORMAL
, /* At the end, just before the exit condition. */
212 IP_END
, /* At the end of the latch block. */
213 IP_BEFORE_USE
, /* Immediately before a specific use. */
214 IP_AFTER_USE
, /* Immediately after a specific use. */
215 IP_ORIGINAL
/* The original biv. */
218 /* The induction variable candidate. */
221 unsigned id
; /* The number of the candidate. */
222 bool important
; /* Whether this is an "important" candidate, i.e. such
223 that it should be considered by all uses. */
224 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
225 gimple incremented_at
;/* For original biv, the statement where it is
227 tree var_before
; /* The variable used for it before increment. */
228 tree var_after
; /* The variable used for it after increment. */
229 struct iv
*iv
; /* The value of the candidate. NULL for
230 "pseudocandidate" used to indicate the possibility
231 to replace the final value of an iv by direct
232 computation of the value. */
233 unsigned cost
; /* Cost of the candidate. */
234 unsigned cost_step
; /* Cost of the candidate's increment operation. */
235 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
236 where it is incremented. */
237 bitmap depends_on
; /* The list of invariants that are used in step of the
241 /* Loop invariant expression hashtable entry. */
242 struct iv_inv_expr_ent
249 /* The data used by the induction variable optimizations. */
251 typedef struct iv_use
*iv_use_p
;
253 typedef struct iv_cand
*iv_cand_p
;
255 /* Hashtable helpers. */
257 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
259 typedef iv_inv_expr_ent value_type
;
260 typedef iv_inv_expr_ent compare_type
;
261 static inline hashval_t
hash (const value_type
*);
262 static inline bool equal (const value_type
*, const compare_type
*);
265 /* Hash function for loop invariant expressions. */
268 iv_inv_expr_hasher::hash (const value_type
*expr
)
273 /* Hash table equality function for expressions. */
276 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
278 return expr1
->hash
== expr2
->hash
279 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
284 /* The currently optimized loop. */
285 struct loop
*current_loop
;
287 /* Numbers of iterations for all exits of the current loop. */
288 struct pointer_map_t
*niters
;
290 /* Number of registers used in it. */
293 /* The size of version_info array allocated. */
294 unsigned version_info_size
;
296 /* The array of information for the ssa names. */
297 struct version_info
*version_info
;
299 /* The hashtable of loop invariant expressions created
301 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
303 /* Loop invariant expression id. */
306 /* The bitmap of indices in version_info whose value was changed. */
309 /* The uses of induction variables. */
310 vec
<iv_use_p
> iv_uses
;
312 /* The candidates. */
313 vec
<iv_cand_p
> iv_candidates
;
315 /* A bitmap of important candidates. */
316 bitmap important_candidates
;
318 /* The maximum invariant id. */
321 /* Whether to consider just related and important candidates when replacing a
323 bool consider_all_candidates
;
325 /* Are we optimizing for speed? */
328 /* Whether the loop body includes any function calls. */
329 bool body_includes_call
;
331 /* Whether the loop body can only be exited via single exit. */
332 bool loop_single_exit_p
;
335 /* An assignment of iv candidates to uses. */
339 /* The number of uses covered by the assignment. */
342 /* Number of uses that cannot be expressed by the candidates in the set. */
345 /* Candidate assigned to a use, together with the related costs. */
346 struct cost_pair
**cand_for_use
;
348 /* Number of times each candidate is used. */
349 unsigned *n_cand_uses
;
351 /* The candidates used. */
354 /* The number of candidates in the set. */
357 /* Total number of registers needed. */
360 /* Total cost of expressing uses. */
361 comp_cost cand_use_cost
;
363 /* Total cost of candidates. */
366 /* Number of times each invariant is used. */
367 unsigned *n_invariant_uses
;
369 /* The array holding the number of uses of each loop
370 invariant expressions created by ivopt. */
371 unsigned *used_inv_expr
;
373 /* The number of created loop invariants. */
374 unsigned num_used_inv_expr
;
376 /* Total cost of the assignment. */
380 /* Difference of two iv candidate assignments. */
387 /* An old assignment (for rollback purposes). */
388 struct cost_pair
*old_cp
;
390 /* A new assignment. */
391 struct cost_pair
*new_cp
;
393 /* Next change in the list. */
394 struct iv_ca_delta
*next_change
;
397 /* Bound on number of candidates below that all candidates are considered. */
399 #define CONSIDER_ALL_CANDIDATES_BOUND \
400 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
403 optimizing such a loop would help, and it would take ages). */
405 #define MAX_CONSIDERED_USES \
406 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
408 /* If there are at most this number of ivs in the set, try removing unnecessary
409 ivs from the set always. */
411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
412 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
414 /* The list of trees for that the decl_rtl field must be reset is stored
417 static vec
<tree
> decl_rtl_to_reset
;
419 static comp_cost
force_expr_to_var_cost (tree
, bool);
421 /* Number of uses recorded in DATA. */
423 static inline unsigned
424 n_iv_uses (struct ivopts_data
*data
)
426 return data
->iv_uses
.length ();
429 /* Ith use recorded in DATA. */
431 static inline struct iv_use
*
432 iv_use (struct ivopts_data
*data
, unsigned i
)
434 return data
->iv_uses
[i
];
437 /* Number of candidates recorded in DATA. */
439 static inline unsigned
440 n_iv_cands (struct ivopts_data
*data
)
442 return data
->iv_candidates
.length ();
445 /* Ith candidate recorded in DATA. */
447 static inline struct iv_cand
*
448 iv_cand (struct ivopts_data
*data
, unsigned i
)
450 return data
->iv_candidates
[i
];
453 /* The single loop exit if it dominates the latch, NULL otherwise. */
456 single_dom_exit (struct loop
*loop
)
458 edge exit
= single_exit (loop
);
463 if (!just_once_each_iteration_p (loop
, exit
->src
))
469 /* Dumps information about the induction variable IV to FILE. */
472 dump_iv (FILE *file
, struct iv
*iv
)
476 fprintf (file
, "ssa name ");
477 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
478 fprintf (file
, "\n");
481 fprintf (file
, " type ");
482 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
483 fprintf (file
, "\n");
487 fprintf (file
, " base ");
488 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
489 fprintf (file
, "\n");
491 fprintf (file
, " step ");
492 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
493 fprintf (file
, "\n");
497 fprintf (file
, " invariant ");
498 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
499 fprintf (file
, "\n");
504 fprintf (file
, " base object ");
505 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
506 fprintf (file
, "\n");
510 fprintf (file
, " is a biv\n");
513 /* Dumps information about the USE to FILE. */
516 dump_use (FILE *file
, struct iv_use
*use
)
518 fprintf (file
, "use %d\n", use
->id
);
522 case USE_NONLINEAR_EXPR
:
523 fprintf (file
, " generic\n");
527 fprintf (file
, " address\n");
531 fprintf (file
, " compare\n");
538 fprintf (file
, " in statement ");
539 print_gimple_stmt (file
, use
->stmt
, 0, 0);
540 fprintf (file
, "\n");
542 fprintf (file
, " at position ");
544 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
545 fprintf (file
, "\n");
547 dump_iv (file
, use
->iv
);
549 if (use
->related_cands
)
551 fprintf (file
, " related candidates ");
552 dump_bitmap (file
, use
->related_cands
);
556 /* Dumps information about the uses to FILE. */
559 dump_uses (FILE *file
, struct ivopts_data
*data
)
564 for (i
= 0; i
< n_iv_uses (data
); i
++)
566 use
= iv_use (data
, i
);
568 dump_use (file
, use
);
569 fprintf (file
, "\n");
573 /* Dumps information about induction variable candidate CAND to FILE. */
576 dump_cand (FILE *file
, struct iv_cand
*cand
)
578 struct iv
*iv
= cand
->iv
;
580 fprintf (file
, "candidate %d%s\n",
581 cand
->id
, cand
->important
? " (important)" : "");
583 if (cand
->depends_on
)
585 fprintf (file
, " depends on ");
586 dump_bitmap (file
, cand
->depends_on
);
591 fprintf (file
, " final value replacement\n");
595 if (cand
->var_before
)
597 fprintf (file
, " var_before ");
598 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
599 fprintf (file
, "\n");
603 fprintf (file
, " var_after ");
604 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
605 fprintf (file
, "\n");
611 fprintf (file
, " incremented before exit test\n");
615 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
619 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
623 fprintf (file
, " incremented at end\n");
627 fprintf (file
, " original biv\n");
634 /* Returns the info for ssa version VER. */
636 static inline struct version_info
*
637 ver_info (struct ivopts_data
*data
, unsigned ver
)
639 return data
->version_info
+ ver
;
642 /* Returns the info for ssa name NAME. */
644 static inline struct version_info
*
645 name_info (struct ivopts_data
*data
, tree name
)
647 return ver_info (data
, SSA_NAME_VERSION (name
));
650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
654 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
656 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
660 if (sbb
== loop
->latch
)
666 return stmt
== last_stmt (bb
);
669 /* Returns true if STMT if after the place where the original induction
670 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
671 if the positions are identical. */
674 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
676 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
677 basic_block stmt_bb
= gimple_bb (stmt
);
679 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
682 if (stmt_bb
!= cand_bb
)
686 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
688 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
691 /* Returns true if STMT if after the place where the induction variable
692 CAND is incremented in LOOP. */
695 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
703 return stmt_after_ip_normal_pos (loop
, stmt
);
707 return stmt_after_inc_pos (cand
, stmt
, false);
710 return stmt_after_inc_pos (cand
, stmt
, true);
717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
720 abnormal_ssa_name_p (tree exp
)
725 if (TREE_CODE (exp
) != SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
732 abnormal phi node. Callback for for_each_index. */
735 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
736 void *data ATTRIBUTE_UNUSED
)
738 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
740 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
742 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
746 return !abnormal_ssa_name_p (*index
);
749 /* Returns true if EXPR contains a ssa name that occurs in an
750 abnormal phi node. */
753 contains_abnormal_ssa_name_p (tree expr
)
756 enum tree_code_class codeclass
;
761 code
= TREE_CODE (expr
);
762 codeclass
= TREE_CODE_CLASS (code
);
764 if (code
== SSA_NAME
)
765 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
767 if (code
== INTEGER_CST
768 || is_gimple_min_invariant (expr
))
771 if (code
== ADDR_EXPR
)
772 return !for_each_index (&TREE_OPERAND (expr
, 0),
773 idx_contains_abnormal_ssa_name_p
,
776 if (code
== COND_EXPR
)
777 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
778 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
779 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
785 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
790 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
802 /* Returns the structure describing number of iterations determined from
803 EXIT of DATA->current_loop, or NULL if something goes wrong. */
805 static struct tree_niter_desc
*
806 niter_for_exit (struct ivopts_data
*data
, edge exit
)
808 struct tree_niter_desc
*desc
;
813 data
->niters
= pointer_map_create ();
817 slot
= pointer_map_contains (data
->niters
, exit
);
821 /* Try to determine number of iterations. We cannot safely work with ssa
822 names that appear in phi nodes on abnormal edges, so that we do not
823 create overlapping life ranges for them (PR 27283). */
824 desc
= XNEW (struct tree_niter_desc
);
825 if (!number_of_iterations_exit (data
->current_loop
,
827 || contains_abnormal_ssa_name_p (desc
->niter
))
832 slot
= pointer_map_insert (data
->niters
, exit
);
836 desc
= (struct tree_niter_desc
*) *slot
;
841 /* Returns the structure describing number of iterations determined from
842 single dominating exit of DATA->current_loop, or NULL if something
845 static struct tree_niter_desc
*
846 niter_for_single_dom_exit (struct ivopts_data
*data
)
848 edge exit
= single_dom_exit (data
->current_loop
);
853 return niter_for_exit (data
, exit
);
856 /* Initializes data structures used by the iv optimization pass, stored
860 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
862 data
->version_info_size
= 2 * num_ssa_names
;
863 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
864 data
->relevant
= BITMAP_ALLOC (NULL
);
865 data
->important_candidates
= BITMAP_ALLOC (NULL
);
866 data
->max_inv_id
= 0;
868 data
->iv_uses
.create (20);
869 data
->iv_candidates
.create (20);
870 data
->inv_expr_tab
.create (10);
871 data
->inv_expr_id
= 0;
872 decl_rtl_to_reset
.create (20);
875 /* Returns a memory object to that EXPR points. In case we are able to
876 determine that it does not point to any such object, NULL is returned. */
879 determine_base_object (tree expr
)
881 enum tree_code code
= TREE_CODE (expr
);
884 /* If this is a pointer casted to any type, we need to determine
885 the base object for the pointer; so handle conversions before
886 throwing away non-pointer expressions. */
887 if (CONVERT_EXPR_P (expr
))
888 return determine_base_object (TREE_OPERAND (expr
, 0));
890 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
899 obj
= TREE_OPERAND (expr
, 0);
900 base
= get_base_address (obj
);
905 if (TREE_CODE (base
) == MEM_REF
)
906 return determine_base_object (TREE_OPERAND (base
, 0));
908 return fold_convert (ptr_type_node
,
909 build_fold_addr_expr (base
));
911 case POINTER_PLUS_EXPR
:
912 return determine_base_object (TREE_OPERAND (expr
, 0));
916 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
920 return fold_convert (ptr_type_node
, expr
);
924 /* Allocates an induction variable with given initial value BASE and step STEP
928 alloc_iv (tree base
, tree step
)
930 tree base_object
= base
;
931 struct iv
*iv
= XCNEW (struct iv
);
932 gcc_assert (step
!= NULL_TREE
);
934 /* Lower all address expressions except ones with DECL_P as operand.
936 1) More accurate cost can be computed for address expressions;
937 2) Duplicate candidates won't be created for bases in different
938 forms, like &a[0] and &a. */
939 STRIP_NOPS (base_object
);
940 if (TREE_CODE (base_object
) == ADDR_EXPR
941 && !DECL_P (TREE_OPERAND (base_object
, 0)))
945 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
947 gcc_assert (base_object
!= NULL_TREE
);
948 base_object
= build_fold_addr_expr (base_object
);
949 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
953 iv
->base_object
= determine_base_object (base_object
);
956 iv
->have_use_for
= false;
958 iv
->ssa_name
= NULL_TREE
;
963 /* Sets STEP and BASE for induction variable IV. */
966 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
968 struct version_info
*info
= name_info (data
, iv
);
970 gcc_assert (!info
->iv
);
972 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
973 info
->iv
= alloc_iv (base
, step
);
974 info
->iv
->ssa_name
= iv
;
977 /* Finds induction variable declaration for VAR. */
980 get_iv (struct ivopts_data
*data
, tree var
)
983 tree type
= TREE_TYPE (var
);
985 if (!POINTER_TYPE_P (type
)
986 && !INTEGRAL_TYPE_P (type
))
989 if (!name_info (data
, var
)->iv
)
991 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
994 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
995 set_iv (data
, var
, var
, build_int_cst (type
, 0));
998 return name_info (data
, var
)->iv
;
1001 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1002 not define a simple affine biv with nonzero step. */
1005 determine_biv_step (gimple phi
)
1007 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1008 tree name
= PHI_RESULT (phi
);
1011 if (virtual_operand_p (name
))
1014 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1017 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1020 /* Finds basic ivs. */
1023 find_bivs (struct ivopts_data
*data
)
1026 tree step
, type
, base
;
1028 struct loop
*loop
= data
->current_loop
;
1029 gimple_stmt_iterator psi
;
1031 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1033 phi
= gsi_stmt (psi
);
1035 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1038 step
= determine_biv_step (phi
);
1042 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1043 base
= expand_simple_operations (base
);
1044 if (contains_abnormal_ssa_name_p (base
)
1045 || contains_abnormal_ssa_name_p (step
))
1048 type
= TREE_TYPE (PHI_RESULT (phi
));
1049 base
= fold_convert (type
, base
);
1052 if (POINTER_TYPE_P (type
))
1053 step
= convert_to_ptrofftype (step
);
1055 step
= fold_convert (type
, step
);
1058 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1065 /* Marks basic ivs. */
1068 mark_bivs (struct ivopts_data
*data
)
1072 struct iv
*iv
, *incr_iv
;
1073 struct loop
*loop
= data
->current_loop
;
1074 basic_block incr_bb
;
1075 gimple_stmt_iterator psi
;
1077 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1079 phi
= gsi_stmt (psi
);
1081 iv
= get_iv (data
, PHI_RESULT (phi
));
1085 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1086 incr_iv
= get_iv (data
, var
);
1090 /* If the increment is in the subloop, ignore it. */
1091 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1092 if (incr_bb
->loop_father
!= data
->current_loop
1093 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1097 incr_iv
->biv_p
= true;
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102 parameters to IV. */
1105 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1108 struct loop
*loop
= data
->current_loop
;
1110 iv
->base
= NULL_TREE
;
1111 iv
->step
= NULL_TREE
;
1113 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1116 lhs
= gimple_assign_lhs (stmt
);
1117 if (TREE_CODE (lhs
) != SSA_NAME
)
1120 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1122 iv
->base
= expand_simple_operations (iv
->base
);
1124 if (contains_abnormal_ssa_name_p (iv
->base
)
1125 || contains_abnormal_ssa_name_p (iv
->step
))
1128 /* If STMT could throw, then do not consider STMT as defining a GIV.
1129 While this will suppress optimizations, we can not safely delete this
1130 GIV and associated statements, even if it appears it is not used. */
1131 if (stmt_could_throw_p (stmt
))
1137 /* Finds general ivs in statement STMT. */
1140 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1144 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1147 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1150 /* Finds general ivs in basic block BB. */
1153 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1155 gimple_stmt_iterator bsi
;
1157 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1158 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1161 /* Finds general ivs. */
1164 find_givs (struct ivopts_data
*data
)
1166 struct loop
*loop
= data
->current_loop
;
1167 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1170 for (i
= 0; i
< loop
->num_nodes
; i
++)
1171 find_givs_in_bb (data
, body
[i
]);
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176 variable and if so, its initial value and step. */
1179 find_induction_variables (struct ivopts_data
*data
)
1184 if (!find_bivs (data
))
1190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1192 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1196 fprintf (dump_file
, " number of iterations ");
1197 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1198 if (!integer_zerop (niter
->may_be_zero
))
1200 fprintf (dump_file
, "; zero if ");
1201 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1203 fprintf (dump_file
, "\n\n");
1206 fprintf (dump_file
, "Induction variables:\n\n");
1208 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1210 if (ver_info (data
, i
)->iv
)
1211 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1220 static struct iv_use
*
1221 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1222 gimple stmt
, enum use_type use_type
)
1224 struct iv_use
*use
= XCNEW (struct iv_use
);
1226 use
->id
= n_iv_uses (data
);
1227 use
->type
= use_type
;
1231 use
->related_cands
= BITMAP_ALLOC (NULL
);
1233 /* To avoid showing ssa name in the dumps, if it was not reset by the
1235 iv
->ssa_name
= NULL_TREE
;
1237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1238 dump_use (dump_file
, use
);
1240 data
->iv_uses
.safe_push (use
);
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246 NONLINEAR_USE is true if the invariant is used in a way we do not
1247 handle specially. */
1250 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1253 struct version_info
*info
;
1255 if (TREE_CODE (op
) != SSA_NAME
1256 || virtual_operand_p (op
))
1259 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1261 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1264 info
= name_info (data
, op
);
1266 info
->has_nonlin_use
|= nonlinear_use
;
1268 info
->inv_id
= ++data
->max_inv_id
;
1269 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1272 /* Checks whether the use OP is interesting and if so, records it. */
1274 static struct iv_use
*
1275 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1282 if (TREE_CODE (op
) != SSA_NAME
)
1285 iv
= get_iv (data
, op
);
1289 if (iv
->have_use_for
)
1291 use
= iv_use (data
, iv
->use_id
);
1293 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1297 if (integer_zerop (iv
->step
))
1299 record_invariant (data
, op
, true);
1302 iv
->have_use_for
= true;
1304 civ
= XNEW (struct iv
);
1307 stmt
= SSA_NAME_DEF_STMT (op
);
1308 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1309 || is_gimple_assign (stmt
));
1311 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1312 iv
->use_id
= use
->id
;
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318 of an induction variable and an invariant. If this is the case,
1319 CONTROL_VAR is set to location of the iv, BOUND to the location of
1320 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321 induction variable descriptions, and true is returned. If this is not
1322 the case, CONTROL_VAR and BOUND are set to the arguments of the
1323 condition and false is returned. */
1326 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1327 tree
**control_var
, tree
**bound
,
1328 struct iv
**iv_var
, struct iv
**iv_bound
)
1330 /* The objects returned when COND has constant operands. */
1331 static struct iv const_iv
;
1333 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1334 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1337 if (gimple_code (stmt
) == GIMPLE_COND
)
1339 op0
= gimple_cond_lhs_ptr (stmt
);
1340 op1
= gimple_cond_rhs_ptr (stmt
);
1344 op0
= gimple_assign_rhs1_ptr (stmt
);
1345 op1
= gimple_assign_rhs2_ptr (stmt
);
1348 zero
= integer_zero_node
;
1349 const_iv
.step
= integer_zero_node
;
1351 if (TREE_CODE (*op0
) == SSA_NAME
)
1352 iv0
= get_iv (data
, *op0
);
1353 if (TREE_CODE (*op1
) == SSA_NAME
)
1354 iv1
= get_iv (data
, *op1
);
1356 /* Exactly one of the compared values must be an iv, and the other one must
1361 if (integer_zerop (iv0
->step
))
1363 /* Control variable may be on the other side. */
1364 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1365 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1367 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1371 *control_var
= op0
;;
1382 /* Checks whether the condition in STMT is interesting and if so,
1386 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1388 tree
*var_p
, *bound_p
;
1389 struct iv
*var_iv
, *civ
;
1391 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1393 find_interesting_uses_op (data
, *var_p
);
1394 find_interesting_uses_op (data
, *bound_p
);
1398 civ
= XNEW (struct iv
);
1400 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404 relative to the loop LOOP, i.e. if all its operands are defined
1405 outside of the returned loop. Returns NULL if EXPR is not
1406 even obviously invariant in LOOP. */
1409 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1414 if (is_gimple_min_invariant (expr
))
1415 return current_loops
->tree_root
;
1417 if (TREE_CODE (expr
) == SSA_NAME
)
1419 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1422 if (flow_bb_inside_loop_p (loop
, def_bb
))
1424 return superloop_at_depth (loop
,
1425 loop_depth (def_bb
->loop_father
) + 1);
1428 return current_loops
->tree_root
;
1434 unsigned maxdepth
= 0;
1435 len
= TREE_OPERAND_LENGTH (expr
);
1436 for (i
= 0; i
< len
; i
++)
1438 struct loop
*ivloop
;
1439 if (!TREE_OPERAND (expr
, i
))
1442 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1445 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1448 return superloop_at_depth (loop
, maxdepth
);
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452 i.e. if all its operands are defined outside of the LOOP. LOOP
1453 should not be the function body. */
1456 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1461 gcc_assert (loop_depth (loop
) > 0);
1463 if (is_gimple_min_invariant (expr
))
1466 if (TREE_CODE (expr
) == SSA_NAME
)
1468 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1470 && flow_bb_inside_loop_p (loop
, def_bb
))
1479 len
= TREE_OPERAND_LENGTH (expr
);
1480 for (i
= 0; i
< len
; i
++)
1481 if (TREE_OPERAND (expr
, i
)
1482 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489 initial ones. Returns false when the value of the index cannot be determined.
1490 Callback for for_each_index. */
1492 struct ifs_ivopts_data
1494 struct ivopts_data
*ivopts_data
;
1500 idx_find_step (tree base
, tree
*idx
, void *data
)
1502 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1504 tree step
, iv_base
, iv_step
, lbound
, off
;
1505 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1507 /* If base is a component ref, require that the offset of the reference
1509 if (TREE_CODE (base
) == COMPONENT_REF
)
1511 off
= component_ref_field_offset (base
);
1512 return expr_invariant_in_loop_p (loop
, off
);
1515 /* If base is array, first check whether we will be able to move the
1516 reference out of the loop (in order to take its address in strength
1517 reduction). In order for this to work we need both lower bound
1518 and step to be loop invariants. */
1519 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1521 /* Moreover, for a range, the size needs to be invariant as well. */
1522 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1523 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1526 step
= array_ref_element_size (base
);
1527 lbound
= array_ref_low_bound (base
);
1529 if (!expr_invariant_in_loop_p (loop
, step
)
1530 || !expr_invariant_in_loop_p (loop
, lbound
))
1534 if (TREE_CODE (*idx
) != SSA_NAME
)
1537 iv
= get_iv (dta
->ivopts_data
, *idx
);
1541 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1542 *&x[0], which is not folded and does not trigger the
1543 ARRAY_REF path below. */
1546 if (integer_zerop (iv
->step
))
1549 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1551 step
= array_ref_element_size (base
);
1553 /* We only handle addresses whose step is an integer constant. */
1554 if (TREE_CODE (step
) != INTEGER_CST
)
1558 /* The step for pointer arithmetics already is 1 byte. */
1559 step
= size_one_node
;
1563 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1564 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1567 /* The index might wrap. */
1571 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1572 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1577 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1578 object is passed to it in DATA. */
1581 idx_record_use (tree base
, tree
*idx
,
1584 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1585 find_interesting_uses_op (data
, *idx
);
1586 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1588 find_interesting_uses_op (data
, array_ref_element_size (base
));
1589 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595 store cst to MUL and return true. Otherwise return false.
1596 The returned value is always sign-extended, regardless of the
1597 signedness of TOP and BOT. */
1600 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1603 enum tree_code code
;
1604 double_int res
, p0
, p1
;
1605 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1610 if (operand_equal_p (top
, bot
, 0))
1612 *mul
= double_int_one
;
1616 code
= TREE_CODE (top
);
1620 mby
= TREE_OPERAND (top
, 1);
1621 if (TREE_CODE (mby
) != INTEGER_CST
)
1624 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1627 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1632 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1633 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1636 if (code
== MINUS_EXPR
)
1638 *mul
= (p0
+ p1
).sext (precision
);
1642 if (TREE_CODE (bot
) != INTEGER_CST
)
1645 p0
= tree_to_double_int (top
).sext (precision
);
1646 p1
= tree_to_double_int (bot
).sext (precision
);
1649 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1650 return res
.is_zero ();
1657 /* Returns true if memory reference REF with step STEP may be unaligned. */
1660 may_be_unaligned_p (tree ref
, tree step
)
1664 HOST_WIDE_INT bitsize
;
1665 HOST_WIDE_INT bitpos
;
1667 enum machine_mode mode
;
1668 int unsignedp
, volatilep
;
1669 unsigned base_align
;
1671 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672 thus they are not misaligned. */
1673 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1676 /* The test below is basically copy of what expr.c:normal_inner_ref
1677 does to check whether the object must be loaded by parts when
1678 STRICT_ALIGNMENT is true. */
1679 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1680 &unsignedp
, &volatilep
, true);
1681 base_type
= TREE_TYPE (base
);
1682 base_align
= get_object_alignment (base
);
1683 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1685 if (mode
!= BLKmode
)
1687 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1689 if (base_align
< mode_align
1690 || (bitpos
% mode_align
) != 0
1691 || (bitpos
% BITS_PER_UNIT
) != 0)
1695 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1698 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1705 /* Return true if EXPR may be non-addressable. */
1708 may_be_nonaddressable_p (tree expr
)
1710 switch (TREE_CODE (expr
))
1712 case TARGET_MEM_REF
:
1713 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714 target, thus they are always addressable. */
1718 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1719 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1721 case VIEW_CONVERT_EXPR
:
1722 /* This kind of view-conversions may wrap non-addressable objects
1723 and make them look addressable. After some processing the
1724 non-addressability may be uncovered again, causing ADDR_EXPRs
1725 of inappropriate objects to be built. */
1726 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1727 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1730 /* ... fall through ... */
1733 case ARRAY_RANGE_REF
:
1734 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1746 /* Finds addresses in *OP_P inside STMT. */
1749 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1751 tree base
= *op_p
, step
= size_zero_node
;
1753 struct ifs_ivopts_data ifs_ivopts_data
;
1755 /* Do not play with volatile memory references. A bit too conservative,
1756 perhaps, but safe. */
1757 if (gimple_has_volatile_ops (stmt
))
1760 /* Ignore bitfields for now. Not really something terribly complicated
1762 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1765 base
= unshare_expr (base
);
1767 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1769 tree type
= build_pointer_type (TREE_TYPE (base
));
1773 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1775 civ
= get_iv (data
, TMR_BASE (base
));
1779 TMR_BASE (base
) = civ
->base
;
1782 if (TMR_INDEX2 (base
)
1783 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1785 civ
= get_iv (data
, TMR_INDEX2 (base
));
1789 TMR_INDEX2 (base
) = civ
->base
;
1792 if (TMR_INDEX (base
)
1793 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1795 civ
= get_iv (data
, TMR_INDEX (base
));
1799 TMR_INDEX (base
) = civ
->base
;
1804 if (TMR_STEP (base
))
1805 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1807 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1811 if (integer_zerop (step
))
1813 base
= tree_mem_ref_addr (type
, base
);
1817 ifs_ivopts_data
.ivopts_data
= data
;
1818 ifs_ivopts_data
.stmt
= stmt
;
1819 ifs_ivopts_data
.step
= size_zero_node
;
1820 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1821 || integer_zerop (ifs_ivopts_data
.step
))
1823 step
= ifs_ivopts_data
.step
;
1825 /* Check that the base expression is addressable. This needs
1826 to be done after substituting bases of IVs into it. */
1827 if (may_be_nonaddressable_p (base
))
1830 /* Moreover, on strict alignment platforms, check that it is
1831 sufficiently aligned. */
1832 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1835 base
= build_fold_addr_expr (base
);
1837 /* Substituting bases of IVs into the base expression might
1838 have caused folding opportunities. */
1839 if (TREE_CODE (base
) == ADDR_EXPR
)
1841 tree
*ref
= &TREE_OPERAND (base
, 0);
1842 while (handled_component_p (*ref
))
1843 ref
= &TREE_OPERAND (*ref
, 0);
1844 if (TREE_CODE (*ref
) == MEM_REF
)
1846 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1847 TREE_OPERAND (*ref
, 0),
1848 TREE_OPERAND (*ref
, 1));
1855 civ
= alloc_iv (base
, step
);
1856 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1860 for_each_index (op_p
, idx_record_use
, data
);
1863 /* Finds and records invariants used in STMT. */
1866 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1869 use_operand_p use_p
;
1872 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1874 op
= USE_FROM_PTR (use_p
);
1875 record_invariant (data
, op
, false);
1879 /* Finds interesting uses of induction variables in the statement STMT. */
1882 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1885 tree op
, *lhs
, *rhs
;
1887 use_operand_p use_p
;
1888 enum tree_code code
;
1890 find_invariants_stmt (data
, stmt
);
1892 if (gimple_code (stmt
) == GIMPLE_COND
)
1894 find_interesting_uses_cond (data
, stmt
);
1898 if (is_gimple_assign (stmt
))
1900 lhs
= gimple_assign_lhs_ptr (stmt
);
1901 rhs
= gimple_assign_rhs1_ptr (stmt
);
1903 if (TREE_CODE (*lhs
) == SSA_NAME
)
1905 /* If the statement defines an induction variable, the uses are not
1906 interesting by themselves. */
1908 iv
= get_iv (data
, *lhs
);
1910 if (iv
&& !integer_zerop (iv
->step
))
1914 code
= gimple_assign_rhs_code (stmt
);
1915 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1916 && (REFERENCE_CLASS_P (*rhs
)
1917 || is_gimple_val (*rhs
)))
1919 if (REFERENCE_CLASS_P (*rhs
))
1920 find_interesting_uses_address (data
, stmt
, rhs
);
1922 find_interesting_uses_op (data
, *rhs
);
1924 if (REFERENCE_CLASS_P (*lhs
))
1925 find_interesting_uses_address (data
, stmt
, lhs
);
1928 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1930 find_interesting_uses_cond (data
, stmt
);
1934 /* TODO -- we should also handle address uses of type
1936 memory = call (whatever);
1943 if (gimple_code (stmt
) == GIMPLE_PHI
1944 && gimple_bb (stmt
) == data
->current_loop
->header
)
1946 iv
= get_iv (data
, PHI_RESULT (stmt
));
1948 if (iv
&& !integer_zerop (iv
->step
))
1952 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1954 op
= USE_FROM_PTR (use_p
);
1956 if (TREE_CODE (op
) != SSA_NAME
)
1959 iv
= get_iv (data
, op
);
1963 find_interesting_uses_op (data
, op
);
1967 /* Finds interesting uses of induction variables outside of loops
1968 on loop exit edge EXIT. */
1971 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1974 gimple_stmt_iterator psi
;
1977 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1979 phi
= gsi_stmt (psi
);
1980 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1981 if (!virtual_operand_p (def
))
1982 find_interesting_uses_op (data
, def
);
1986 /* Finds uses of the induction variables that are interesting. */
1989 find_interesting_uses (struct ivopts_data
*data
)
1992 gimple_stmt_iterator bsi
;
1993 basic_block
*body
= get_loop_body (data
->current_loop
);
1995 struct version_info
*info
;
1998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1999 fprintf (dump_file
, "Uses:\n\n");
2001 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2006 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2007 if (e
->dest
!= EXIT_BLOCK_PTR
2008 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2009 find_interesting_uses_outside (data
, e
);
2011 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2012 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2013 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2014 if (!is_gimple_debug (gsi_stmt (bsi
)))
2015 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2022 fprintf (dump_file
, "\n");
2024 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2026 info
= ver_info (data
, i
);
2029 fprintf (dump_file
, " ");
2030 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2031 fprintf (dump_file
, " is invariant (%d)%s\n",
2032 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2036 fprintf (dump_file
, "\n");
2042 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2043 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2044 we are at the top-level of the processed address. */
2047 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2048 HOST_WIDE_INT
*offset
)
2050 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2051 enum tree_code code
;
2052 tree type
, orig_type
= TREE_TYPE (expr
);
2053 HOST_WIDE_INT off0
, off1
, st
;
2054 tree orig_expr
= expr
;
2058 type
= TREE_TYPE (expr
);
2059 code
= TREE_CODE (expr
);
2065 if (!cst_and_fits_in_hwi (expr
)
2066 || integer_zerop (expr
))
2069 *offset
= int_cst_value (expr
);
2070 return build_int_cst (orig_type
, 0);
2072 case POINTER_PLUS_EXPR
:
2075 op0
= TREE_OPERAND (expr
, 0);
2076 op1
= TREE_OPERAND (expr
, 1);
2078 op0
= strip_offset_1 (op0
, false, false, &off0
);
2079 op1
= strip_offset_1 (op1
, false, false, &off1
);
2081 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2082 if (op0
== TREE_OPERAND (expr
, 0)
2083 && op1
== TREE_OPERAND (expr
, 1))
2086 if (integer_zerop (op1
))
2088 else if (integer_zerop (op0
))
2090 if (code
== MINUS_EXPR
)
2091 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2096 expr
= fold_build2 (code
, type
, op0
, op1
);
2098 return fold_convert (orig_type
, expr
);
2101 op1
= TREE_OPERAND (expr
, 1);
2102 if (!cst_and_fits_in_hwi (op1
))
2105 op0
= TREE_OPERAND (expr
, 0);
2106 op0
= strip_offset_1 (op0
, false, false, &off0
);
2107 if (op0
== TREE_OPERAND (expr
, 0))
2110 *offset
= off0
* int_cst_value (op1
);
2111 if (integer_zerop (op0
))
2114 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2116 return fold_convert (orig_type
, expr
);
2119 case ARRAY_RANGE_REF
:
2123 step
= array_ref_element_size (expr
);
2124 if (!cst_and_fits_in_hwi (step
))
2127 st
= int_cst_value (step
);
2128 op1
= TREE_OPERAND (expr
, 1);
2129 op1
= strip_offset_1 (op1
, false, false, &off1
);
2130 *offset
= off1
* st
;
2133 && integer_zerop (op1
))
2135 /* Strip the component reference completely. */
2136 op0
= TREE_OPERAND (expr
, 0);
2137 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2150 tmp
= component_ref_field_offset (expr
);
2151 field
= TREE_OPERAND (expr
, 1);
2153 && cst_and_fits_in_hwi (tmp
)
2154 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2156 HOST_WIDE_INT boffset
, abs_off
;
2158 /* Strip the component reference completely. */
2159 op0
= TREE_OPERAND (expr
, 0);
2160 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2161 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2162 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2166 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2173 op0
= TREE_OPERAND (expr
, 0);
2174 op0
= strip_offset_1 (op0
, true, true, &off0
);
2177 if (op0
== TREE_OPERAND (expr
, 0))
2180 expr
= build_fold_addr_expr (op0
);
2181 return fold_convert (orig_type
, expr
);
2184 /* ??? Offset operand? */
2185 inside_addr
= false;
2192 /* Default handling of expressions for that we want to recurse into
2193 the first operand. */
2194 op0
= TREE_OPERAND (expr
, 0);
2195 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2198 if (op0
== TREE_OPERAND (expr
, 0)
2199 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2202 expr
= copy_node (expr
);
2203 TREE_OPERAND (expr
, 0) = op0
;
2205 TREE_OPERAND (expr
, 1) = op1
;
2207 /* Inside address, we might strip the top level component references,
2208 thus changing type of the expression. Handling of ADDR_EXPR
2210 expr
= fold_convert (orig_type
, expr
);
2215 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2218 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2221 tree core
= strip_offset_1 (expr
, false, false, &off
);
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227 We return unsigned type with the same precision, which avoids problems
2231 generic_type_for (tree type
)
2233 if (POINTER_TYPE_P (type
))
2234 return unsigned_type_for (type
);
2236 if (TYPE_UNSIGNED (type
))
2239 return unsigned_type_for (type
);
2242 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2243 the bitmap to that we should store it. */
2245 static struct ivopts_data
*fd_ivopts_data
;
2247 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2249 bitmap
*depends_on
= (bitmap
*) data
;
2250 struct version_info
*info
;
2252 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2254 info
= name_info (fd_ivopts_data
, *expr_p
);
2256 if (!info
->inv_id
|| info
->has_nonlin_use
)
2260 *depends_on
= BITMAP_ALLOC (NULL
);
2261 bitmap_set_bit (*depends_on
, info
->inv_id
);
2266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2267 position to POS. If USE is not NULL, the candidate is set as related to
2268 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2269 replacement of the final value of the iv by a direct computation. */
2271 static struct iv_cand
*
2272 add_candidate_1 (struct ivopts_data
*data
,
2273 tree base
, tree step
, bool important
, enum iv_position pos
,
2274 struct iv_use
*use
, gimple incremented_at
)
2277 struct iv_cand
*cand
= NULL
;
2278 tree type
, orig_type
;
2280 /* For non-original variables, make sure their values are computed in a type
2281 that does not invoke undefined behavior on overflows (since in general,
2282 we cannot prove that these induction variables are non-wrapping). */
2283 if (pos
!= IP_ORIGINAL
)
2285 orig_type
= TREE_TYPE (base
);
2286 type
= generic_type_for (orig_type
);
2287 if (type
!= orig_type
)
2289 base
= fold_convert (type
, base
);
2290 step
= fold_convert (type
, step
);
2294 for (i
= 0; i
< n_iv_cands (data
); i
++)
2296 cand
= iv_cand (data
, i
);
2298 if (cand
->pos
!= pos
)
2301 if (cand
->incremented_at
!= incremented_at
2302 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2303 && cand
->ainc_use
!= use
))
2317 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2318 && operand_equal_p (step
, cand
->iv
->step
, 0)
2319 && (TYPE_PRECISION (TREE_TYPE (base
))
2320 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2324 if (i
== n_iv_cands (data
))
2326 cand
= XCNEW (struct iv_cand
);
2332 cand
->iv
= alloc_iv (base
, step
);
2335 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2337 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2338 cand
->var_after
= cand
->var_before
;
2340 cand
->important
= important
;
2341 cand
->incremented_at
= incremented_at
;
2342 data
->iv_candidates
.safe_push (cand
);
2345 && TREE_CODE (step
) != INTEGER_CST
)
2347 fd_ivopts_data
= data
;
2348 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2351 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2352 cand
->ainc_use
= use
;
2354 cand
->ainc_use
= NULL
;
2356 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2357 dump_cand (dump_file
, cand
);
2360 if (important
&& !cand
->important
)
2362 cand
->important
= true;
2363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2364 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2369 bitmap_set_bit (use
->related_cands
, i
);
2370 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2371 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2381 The purpose is to avoid splitting latch edge with a biv increment, thus
2382 creating a jump, possibly confusing other optimization passes and leaving
2383 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2384 is not available (so we do not have a better alternative), or if the latch
2385 edge is already nonempty. */
2388 allow_ip_end_pos_p (struct loop
*loop
)
2390 if (!ip_normal_pos (loop
))
2393 if (!empty_block_p (ip_end_pos (loop
)))
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400 Important field is set to IMPORTANT. */
2403 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2404 bool important
, struct iv_use
*use
)
2406 basic_block use_bb
= gimple_bb (use
->stmt
);
2407 enum machine_mode mem_mode
;
2408 unsigned HOST_WIDE_INT cstepi
;
2410 /* If we insert the increment in any position other than the standard
2411 ones, we must ensure that it is incremented once per iteration.
2412 It must not be in an inner nested loop, or one side of an if
2414 if (use_bb
->loop_father
!= data
->current_loop
2415 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2416 || stmt_could_throw_p (use
->stmt
)
2417 || !cst_and_fits_in_hwi (step
))
2420 cstepi
= int_cst_value (step
);
2422 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2423 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2424 || USE_STORE_PRE_INCREMENT (mem_mode
))
2425 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2426 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2427 || USE_STORE_PRE_DECREMENT (mem_mode
))
2428 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2430 enum tree_code code
= MINUS_EXPR
;
2432 tree new_step
= step
;
2434 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2436 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2437 code
= POINTER_PLUS_EXPR
;
2440 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2441 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2442 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2445 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2446 || USE_STORE_POST_INCREMENT (mem_mode
))
2447 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2448 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2449 || USE_STORE_POST_DECREMENT (mem_mode
))
2450 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2452 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2457 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2458 position to POS. If USE is not NULL, the candidate is set as related to
2459 it. The candidate computation is scheduled on all available positions. */
2462 add_candidate (struct ivopts_data
*data
,
2463 tree base
, tree step
, bool important
, struct iv_use
*use
)
2465 if (ip_normal_pos (data
->current_loop
))
2466 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2467 if (ip_end_pos (data
->current_loop
)
2468 && allow_ip_end_pos_p (data
->current_loop
))
2469 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2471 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2472 add_autoinc_candidates (data
, base
, step
, important
, use
);
2475 /* Adds standard iv candidates. */
2478 add_standard_iv_candidates (struct ivopts_data
*data
)
2480 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2482 /* The same for a double-integer type if it is still fast enough. */
2484 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2485 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2486 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2487 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2489 /* The same for a double-integer type if it is still fast enough. */
2491 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2492 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2493 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2494 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2498 /* Adds candidates bases on the old induction variable IV. */
2501 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2505 struct iv_cand
*cand
;
2507 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2509 /* The same, but with initial value zero. */
2510 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2511 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2513 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2514 iv
->step
, true, NULL
);
2516 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2517 if (gimple_code (phi
) == GIMPLE_PHI
)
2519 /* Additionally record the possibility of leaving the original iv
2521 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2522 cand
= add_candidate_1 (data
,
2523 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2524 SSA_NAME_DEF_STMT (def
));
2525 cand
->var_before
= iv
->ssa_name
;
2526 cand
->var_after
= def
;
2530 /* Adds candidates based on the old induction variables. */
2533 add_old_ivs_candidates (struct ivopts_data
*data
)
2539 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2541 iv
= ver_info (data
, i
)->iv
;
2542 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2543 add_old_iv_candidates (data
, iv
);
2547 /* Adds candidates based on the value of the induction variable IV and USE. */
2550 add_iv_value_candidates (struct ivopts_data
*data
,
2551 struct iv
*iv
, struct iv_use
*use
)
2553 unsigned HOST_WIDE_INT offset
;
2557 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2559 /* The same, but with initial value zero. Make such variable important,
2560 since it is generic enough so that possibly many uses may be based
2562 basetype
= TREE_TYPE (iv
->base
);
2563 if (POINTER_TYPE_P (basetype
))
2564 basetype
= sizetype
;
2565 add_candidate (data
, build_int_cst (basetype
, 0),
2566 iv
->step
, true, use
);
2568 /* Third, try removing the constant offset. Make sure to even
2569 add a candidate for &a[0] vs. (T *)&a. */
2570 base
= strip_offset (iv
->base
, &offset
);
2572 || base
!= iv
->base
)
2573 add_candidate (data
, base
, iv
->step
, false, use
);
2576 /* Adds candidates based on the uses. */
2579 add_derived_ivs_candidates (struct ivopts_data
*data
)
2583 for (i
= 0; i
< n_iv_uses (data
); i
++)
2585 struct iv_use
*use
= iv_use (data
, i
);
2592 case USE_NONLINEAR_EXPR
:
2595 /* Just add the ivs based on the value of the iv used here. */
2596 add_iv_value_candidates (data
, use
->iv
, use
);
2605 /* Record important candidates and add them to related_cands bitmaps
2609 record_important_candidates (struct ivopts_data
*data
)
2614 for (i
= 0; i
< n_iv_cands (data
); i
++)
2616 struct iv_cand
*cand
= iv_cand (data
, i
);
2618 if (cand
->important
)
2619 bitmap_set_bit (data
->important_candidates
, i
);
2622 data
->consider_all_candidates
= (n_iv_cands (data
)
2623 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2625 if (data
->consider_all_candidates
)
2627 /* We will not need "related_cands" bitmaps in this case,
2628 so release them to decrease peak memory consumption. */
2629 for (i
= 0; i
< n_iv_uses (data
); i
++)
2631 use
= iv_use (data
, i
);
2632 BITMAP_FREE (use
->related_cands
);
2637 /* Add important candidates to the related_cands bitmaps. */
2638 for (i
= 0; i
< n_iv_uses (data
); i
++)
2639 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2640 data
->important_candidates
);
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646 we allocate a simple list to every use. */
2649 alloc_use_cost_map (struct ivopts_data
*data
)
2651 unsigned i
, size
, s
;
2653 for (i
= 0; i
< n_iv_uses (data
); i
++)
2655 struct iv_use
*use
= iv_use (data
, i
);
2657 if (data
->consider_all_candidates
)
2658 size
= n_iv_cands (data
);
2661 s
= bitmap_count_bits (use
->related_cands
);
2663 /* Round up to the power of two, so that moduling by it is fast. */
2664 size
= s
? (1 << ceil_log2 (s
)) : 1;
2667 use
->n_map_members
= size
;
2668 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2672 /* Returns description of computation cost of expression whose runtime
2673 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2676 new_cost (unsigned runtime
, unsigned complexity
)
2680 cost
.cost
= runtime
;
2681 cost
.complexity
= complexity
;
2686 /* Adds costs COST1 and COST2. */
2689 add_costs (comp_cost cost1
, comp_cost cost2
)
2691 cost1
.cost
+= cost2
.cost
;
2692 cost1
.complexity
+= cost2
.complexity
;
2696 /* Subtracts costs COST1 and COST2. */
2699 sub_costs (comp_cost cost1
, comp_cost cost2
)
2701 cost1
.cost
-= cost2
.cost
;
2702 cost1
.complexity
-= cost2
.complexity
;
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708 COST1 > COST2, and 0 if COST1 = COST2. */
2711 compare_costs (comp_cost cost1
, comp_cost cost2
)
2713 if (cost1
.cost
== cost2
.cost
)
2714 return cost1
.complexity
- cost2
.complexity
;
2716 return cost1
.cost
- cost2
.cost
;
2719 /* Returns true if COST is infinite. */
2722 infinite_cost_p (comp_cost cost
)
2724 return cost
.cost
== INFTY
;
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728 on invariants DEPENDS_ON and that the value used in expressing it
2729 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2732 set_use_iv_cost (struct ivopts_data
*data
,
2733 struct iv_use
*use
, struct iv_cand
*cand
,
2734 comp_cost cost
, bitmap depends_on
, tree value
,
2735 enum tree_code comp
, int inv_expr_id
)
2739 if (infinite_cost_p (cost
))
2741 BITMAP_FREE (depends_on
);
2745 if (data
->consider_all_candidates
)
2747 use
->cost_map
[cand
->id
].cand
= cand
;
2748 use
->cost_map
[cand
->id
].cost
= cost
;
2749 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2750 use
->cost_map
[cand
->id
].value
= value
;
2751 use
->cost_map
[cand
->id
].comp
= comp
;
2752 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2756 /* n_map_members is a power of two, so this computes modulo. */
2757 s
= cand
->id
& (use
->n_map_members
- 1);
2758 for (i
= s
; i
< use
->n_map_members
; i
++)
2759 if (!use
->cost_map
[i
].cand
)
2761 for (i
= 0; i
< s
; i
++)
2762 if (!use
->cost_map
[i
].cand
)
2768 use
->cost_map
[i
].cand
= cand
;
2769 use
->cost_map
[i
].cost
= cost
;
2770 use
->cost_map
[i
].depends_on
= depends_on
;
2771 use
->cost_map
[i
].value
= value
;
2772 use
->cost_map
[i
].comp
= comp
;
2773 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2776 /* Gets cost of (USE, CANDIDATE) pair. */
2778 static struct cost_pair
*
2779 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2780 struct iv_cand
*cand
)
2783 struct cost_pair
*ret
;
2788 if (data
->consider_all_candidates
)
2790 ret
= use
->cost_map
+ cand
->id
;
2797 /* n_map_members is a power of two, so this computes modulo. */
2798 s
= cand
->id
& (use
->n_map_members
- 1);
2799 for (i
= s
; i
< use
->n_map_members
; i
++)
2800 if (use
->cost_map
[i
].cand
== cand
)
2801 return use
->cost_map
+ i
;
2802 else if (use
->cost_map
[i
].cand
== NULL
)
2804 for (i
= 0; i
< s
; i
++)
2805 if (use
->cost_map
[i
].cand
== cand
)
2806 return use
->cost_map
+ i
;
2807 else if (use
->cost_map
[i
].cand
== NULL
)
2813 /* Returns estimate on cost of computing SEQ. */
2816 seq_cost (rtx seq
, bool speed
)
2821 for (; seq
; seq
= NEXT_INSN (seq
))
2823 set
= single_set (seq
);
2825 cost
+= set_src_cost (SET_SRC (set
), speed
);
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2835 produce_memory_decl_rtl (tree obj
, int *regno
)
2837 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2838 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2842 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2844 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2845 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2846 SET_SYMBOL_REF_DECL (x
, obj
);
2847 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2848 set_mem_addr_space (x
, as
);
2849 targetm
.encode_section_info (obj
, x
, true);
2853 x
= gen_raw_REG (address_mode
, (*regno
)++);
2854 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2855 set_mem_addr_space (x
, as
);
2861 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2862 walk_tree. DATA contains the actual fake register number. */
2865 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2867 tree obj
= NULL_TREE
;
2869 int *regno
= (int *) data
;
2871 switch (TREE_CODE (*expr_p
))
2874 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2875 handled_component_p (*expr_p
);
2876 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2879 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2880 x
= produce_memory_decl_rtl (obj
, regno
);
2885 obj
= SSA_NAME_VAR (*expr_p
);
2886 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2889 if (!DECL_RTL_SET_P (obj
))
2890 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2899 if (DECL_RTL_SET_P (obj
))
2902 if (DECL_MODE (obj
) == BLKmode
)
2903 x
= produce_memory_decl_rtl (obj
, regno
);
2905 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2915 decl_rtl_to_reset
.safe_push (obj
);
2916 SET_DECL_RTL (obj
, x
);
2922 /* Determines cost of the computation of EXPR. */
2925 computation_cost (tree expr
, bool speed
)
2928 tree type
= TREE_TYPE (expr
);
2930 /* Avoid using hard regs in ways which may be unsupported. */
2931 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2932 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2933 enum node_frequency real_frequency
= node
->frequency
;
2935 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2936 crtl
->maybe_hot_insn_p
= speed
;
2937 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2939 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2942 default_rtl_profile ();
2943 node
->frequency
= real_frequency
;
2945 cost
= seq_cost (seq
, speed
);
2947 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2948 TYPE_ADDR_SPACE (type
), speed
);
2949 else if (!REG_P (rslt
))
2950 cost
+= set_src_cost (rslt
, speed
);
2955 /* Returns variable containing the value of candidate CAND at statement AT. */
2958 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2960 if (stmt_after_increment (loop
, cand
, stmt
))
2961 return cand
->var_after
;
2963 return cand
->var_before
;
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967 same precision that is at least as wide as the precision of TYPE, stores
2968 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2972 determine_common_wider_type (tree
*a
, tree
*b
)
2974 tree wider_type
= NULL
;
2976 tree atype
= TREE_TYPE (*a
);
2978 if (CONVERT_EXPR_P (*a
))
2980 suba
= TREE_OPERAND (*a
, 0);
2981 wider_type
= TREE_TYPE (suba
);
2982 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2988 if (CONVERT_EXPR_P (*b
))
2990 subb
= TREE_OPERAND (*b
, 0);
2991 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3002 /* Determines the expression by that USE is expressed from induction variable
3003 CAND at statement AT in LOOP. The expression is stored in a decomposed
3004 form into AFF. Returns false if USE cannot be expressed using CAND. */
3007 get_computation_aff (struct loop
*loop
,
3008 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3009 struct affine_tree_combination
*aff
)
3011 tree ubase
= use
->iv
->base
;
3012 tree ustep
= use
->iv
->step
;
3013 tree cbase
= cand
->iv
->base
;
3014 tree cstep
= cand
->iv
->step
, cstep_common
;
3015 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3016 tree common_type
, var
;
3018 aff_tree cbase_aff
, var_aff
;
3021 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3023 /* We do not have a precision to express the values of use. */
3027 var
= var_at_stmt (loop
, cand
, at
);
3028 uutype
= unsigned_type_for (utype
);
3030 /* If the conversion is not noop, perform it. */
3031 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3033 cstep
= fold_convert (uutype
, cstep
);
3034 cbase
= fold_convert (uutype
, cbase
);
3035 var
= fold_convert (uutype
, var
);
3038 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3041 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042 type, we achieve better folding by computing their difference in this
3043 wider type, and cast the result to UUTYPE. We do not need to worry about
3044 overflows, as all the arithmetics will in the end be performed in UUTYPE
3046 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3048 /* use = ubase - ratio * cbase + ratio * var. */
3049 tree_to_aff_combination (ubase
, common_type
, aff
);
3050 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3051 tree_to_aff_combination (var
, uutype
, &var_aff
);
3053 /* We need to shift the value if we are after the increment. */
3054 if (stmt_after_increment (loop
, cand
, at
))
3058 if (common_type
!= uutype
)
3059 cstep_common
= fold_convert (common_type
, cstep
);
3061 cstep_common
= cstep
;
3063 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3064 aff_combination_add (&cbase_aff
, &cstep_aff
);
3067 aff_combination_scale (&cbase_aff
, -rat
);
3068 aff_combination_add (aff
, &cbase_aff
);
3069 if (common_type
!= uutype
)
3070 aff_combination_convert (aff
, uutype
);
3072 aff_combination_scale (&var_aff
, rat
);
3073 aff_combination_add (aff
, &var_aff
);
3078 /* Return the type of USE. */
3081 get_use_type (struct iv_use
*use
)
3083 tree base_type
= TREE_TYPE (use
->iv
->base
);
3086 if (use
->type
== USE_ADDRESS
)
3088 /* The base_type may be a void pointer. Create a pointer type based on
3089 the mem_ref instead. */
3090 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3100 /* Determines the expression by that USE is expressed from induction variable
3101 CAND at statement AT in LOOP. The computation is unshared. */
3104 get_computation_at (struct loop
*loop
,
3105 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3108 tree type
= get_use_type (use
);
3110 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3112 unshare_aff_combination (&aff
);
3113 return fold_convert (type
, aff_combination_to_tree (&aff
));
3116 /* Determines the expression by that USE is expressed from induction variable
3117 CAND in LOOP. The computation is unshared. */
3120 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3122 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126 If we're optimizing for space, the loop setup overhead is constant;
3127 if we're optimizing for speed, amortize it over the per-iteration cost. */
3129 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3133 else if (optimize_loop_for_speed_p (data
->current_loop
))
3134 return cost
/ avg_loop_niter (data
->current_loop
);
3139 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3140 validity for a memory reference accessing memory of mode MODE in
3141 address space AS. */
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3148 #define MAX_RATIO 128
3149 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3150 static vec
<sbitmap
> valid_mult_list
;
3153 if (data_index
>= valid_mult_list
.length ())
3154 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3156 valid_mult
= valid_mult_list
[data_index
];
3159 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3160 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3161 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3165 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3166 bitmap_clear (valid_mult
);
3167 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3168 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3169 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3171 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3172 if (memory_address_addr_space_p (mode
, addr
, as
)
3173 || memory_address_addr_space_p (mode
, scaled
, as
))
3174 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3179 fprintf (dump_file
, " allowed multipliers:");
3180 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3181 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3182 fprintf (dump_file
, " %d", (int) i
);
3183 fprintf (dump_file
, "\n");
3184 fprintf (dump_file
, "\n");
3187 valid_mult_list
[data_index
] = valid_mult
;
3190 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3193 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3198 variable is omitted. Compute the cost for a memory reference that accesses
3199 a memory location of mode MEM_MODE in address space AS.
3201 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202 size of MEM_MODE / RATIO) is available. To make this determination, we
3203 look at the size of the increment to be made, which is given in CSTEP.
3204 CSTEP may be zero if the step is unknown.
3205 STMT_AFTER_INC is true iff the statement we're looking at is after the
3206 increment of the original biv.
3208 TODO -- there must be some better way. This all is quite crude. */
3212 AINC_PRE_INC
, /* Pre increment. */
3213 AINC_PRE_DEC
, /* Pre decrement. */
3214 AINC_POST_INC
, /* Post increment. */
3215 AINC_POST_DEC
, /* Post decrement. */
3216 AINC_NONE
/* Also the number of auto increment types. */
3219 typedef struct address_cost_data_s
3221 HOST_WIDE_INT min_offset
, max_offset
;
3222 unsigned costs
[2][2][2][2];
3223 unsigned ainc_costs
[AINC_NONE
];
3224 } *address_cost_data
;
3228 get_address_cost (bool symbol_present
, bool var_present
,
3229 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3230 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3231 addr_space_t as
, bool speed
,
3232 bool stmt_after_inc
, bool *may_autoinc
)
3234 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3235 static vec
<address_cost_data
> address_cost_data_list
;
3236 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3237 address_cost_data data
;
3238 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3239 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3240 unsigned cost
, acost
, complexity
;
3241 enum ainc_type autoinc_type
;
3242 bool offset_p
, ratio_p
, autoinc
;
3243 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3244 unsigned HOST_WIDE_INT mask
;
3247 if (data_index
>= address_cost_data_list
.length ())
3248 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3250 data
= address_cost_data_list
[data_index
];
3254 HOST_WIDE_INT rat
, off
= 0;
3255 int old_cse_not_expected
, width
;
3256 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3257 rtx seq
, addr
, base
;
3260 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3262 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3264 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3265 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3266 width
= HOST_BITS_PER_WIDE_INT
- 1;
3267 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3269 for (i
= width
; i
>= 0; i
--)
3271 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3272 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3273 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3276 data
->min_offset
= (i
== -1? 0 : off
);
3278 for (i
= width
; i
>= 0; i
--)
3280 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3281 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3282 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3287 data
->max_offset
= off
;
3289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3291 fprintf (dump_file
, "get_address_cost:\n");
3292 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3293 GET_MODE_NAME (mem_mode
),
3295 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3296 GET_MODE_NAME (mem_mode
),
3301 for (i
= 2; i
<= MAX_RATIO
; i
++)
3302 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3308 /* Compute the cost of various addressing modes. */
3310 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3311 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3313 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3314 || USE_STORE_PRE_DECREMENT (mem_mode
))
3316 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3317 has_predec
[mem_mode
]
3318 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3320 if (has_predec
[mem_mode
])
3321 data
->ainc_costs
[AINC_PRE_DEC
]
3322 = address_cost (addr
, mem_mode
, as
, speed
);
3324 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3325 || USE_STORE_POST_DECREMENT (mem_mode
))
3327 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3328 has_postdec
[mem_mode
]
3329 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3331 if (has_postdec
[mem_mode
])
3332 data
->ainc_costs
[AINC_POST_DEC
]
3333 = address_cost (addr
, mem_mode
, as
, speed
);
3335 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3336 || USE_STORE_PRE_DECREMENT (mem_mode
))
3338 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3339 has_preinc
[mem_mode
]
3340 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3342 if (has_preinc
[mem_mode
])
3343 data
->ainc_costs
[AINC_PRE_INC
]
3344 = address_cost (addr
, mem_mode
, as
, speed
);
3346 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3347 || USE_STORE_POST_INCREMENT (mem_mode
))
3349 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3350 has_postinc
[mem_mode
]
3351 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3353 if (has_postinc
[mem_mode
])
3354 data
->ainc_costs
[AINC_POST_INC
]
3355 = address_cost (addr
, mem_mode
, as
, speed
);
3357 for (i
= 0; i
< 16; i
++)
3360 var_p
= (i
>> 1) & 1;
3361 off_p
= (i
>> 2) & 1;
3362 rat_p
= (i
>> 3) & 1;
3366 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3367 gen_int_mode (rat
, address_mode
));
3370 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3374 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3375 /* ??? We can run into trouble with some backends by presenting
3376 it with symbols which haven't been properly passed through
3377 targetm.encode_section_info. By setting the local bit, we
3378 enhance the probability of things working. */
3379 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3382 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3384 (PLUS
, address_mode
, base
,
3385 gen_int_mode (off
, address_mode
)));
3388 base
= gen_int_mode (off
, address_mode
);
3393 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3396 /* To avoid splitting addressing modes, pretend that no cse will
3398 old_cse_not_expected
= cse_not_expected
;
3399 cse_not_expected
= true;
3400 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3401 cse_not_expected
= old_cse_not_expected
;
3405 acost
= seq_cost (seq
, speed
);
3406 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3410 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3413 /* On some targets, it is quite expensive to load symbol to a register,
3414 which makes addresses that contain symbols look much more expensive.
3415 However, the symbol will have to be loaded in any case before the
3416 loop (and quite likely we have it in register already), so it does not
3417 make much sense to penalize them too heavily. So make some final
3418 tweaks for the SYMBOL_PRESENT modes:
3420 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3421 var is cheaper, use this mode with small penalty.
3422 If VAR_PRESENT is true, try whether the mode with
3423 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3424 if this is the case, use it. */
3425 add_c
= add_cost (speed
, address_mode
);
3426 for (i
= 0; i
< 8; i
++)
3429 off_p
= (i
>> 1) & 1;
3430 rat_p
= (i
>> 2) & 1;
3432 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3436 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3437 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3440 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3442 fprintf (dump_file
, "Address costs:\n");
3444 for (i
= 0; i
< 16; i
++)
3447 var_p
= (i
>> 1) & 1;
3448 off_p
= (i
>> 2) & 1;
3449 rat_p
= (i
>> 3) & 1;
3451 fprintf (dump_file
, " ");
3453 fprintf (dump_file
, "sym + ");
3455 fprintf (dump_file
, "var + ");
3457 fprintf (dump_file
, "cst + ");
3459 fprintf (dump_file
, "rat * ");
3461 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3462 fprintf (dump_file
, "index costs %d\n", acost
);
3464 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3465 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3466 fprintf (dump_file
, " May include autoinc/dec\n");
3467 fprintf (dump_file
, "\n");
3470 address_cost_data_list
[data_index
] = data
;
3473 bits
= GET_MODE_BITSIZE (address_mode
);
3474 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3476 if ((offset
>> (bits
- 1) & 1))
3481 autoinc_type
= AINC_NONE
;
3482 msize
= GET_MODE_SIZE (mem_mode
);
3483 autoinc_offset
= offset
;
3485 autoinc_offset
+= ratio
* cstep
;
3486 if (symbol_present
|| var_present
|| ratio
!= 1)
3490 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3492 autoinc_type
= AINC_POST_INC
;
3493 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3495 autoinc_type
= AINC_POST_DEC
;
3496 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3498 autoinc_type
= AINC_PRE_INC
;
3499 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3501 autoinc_type
= AINC_PRE_DEC
;
3503 if (autoinc_type
!= AINC_NONE
)
3508 offset_p
= (s_offset
!= 0
3509 && data
->min_offset
<= s_offset
3510 && s_offset
<= data
->max_offset
);
3511 ratio_p
= (ratio
!= 1
3512 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3514 if (ratio
!= 1 && !ratio_p
)
3515 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3517 if (s_offset
&& !offset_p
&& !symbol_present
)
3518 cost
+= add_cost (speed
, address_mode
);
3521 *may_autoinc
= autoinc
;
3523 acost
= data
->ainc_costs
[autoinc_type
];
3525 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3526 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3527 return new_cost (cost
+ acost
, complexity
);
3530 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3531 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3532 calculating the operands of EXPR. Returns true if successful, and returns
3533 the cost in COST. */
3536 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3537 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3540 tree op1
= TREE_OPERAND (expr
, 1);
3541 tree cst
= TREE_OPERAND (mult
, 1);
3542 tree multop
= TREE_OPERAND (mult
, 0);
3543 int m
= exact_log2 (int_cst_value (cst
));
3544 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3546 bool equal_p
= false;
3548 if (!(m
>= 0 && m
< maxm
))
3551 if (operand_equal_p (op1
, mult
, 0))
3554 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3555 ? shiftadd_cost (speed
, mode
, m
)
3557 ? shiftsub1_cost (speed
, mode
, m
)
3558 : shiftsub0_cost (speed
, mode
, m
)));
3559 res
= new_cost (sa_cost
, 0);
3560 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3562 STRIP_NOPS (multop
);
3563 if (!is_gimple_val (multop
))
3564 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3570 /* Estimates cost of forcing expression EXPR into a variable. */
3573 force_expr_to_var_cost (tree expr
, bool speed
)
3575 static bool costs_initialized
= false;
3576 static unsigned integer_cost
[2];
3577 static unsigned symbol_cost
[2];
3578 static unsigned address_cost
[2];
3580 comp_cost cost0
, cost1
, cost
;
3581 enum machine_mode mode
;
3583 if (!costs_initialized
)
3585 tree type
= build_pointer_type (integer_type_node
);
3590 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3591 TREE_STATIC (var
) = 1;
3592 x
= produce_memory_decl_rtl (var
, NULL
);
3593 SET_DECL_RTL (var
, x
);
3595 addr
= build1 (ADDR_EXPR
, type
, var
);
3598 for (i
= 0; i
< 2; i
++)
3600 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3603 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3606 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3607 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3609 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3610 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3611 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3612 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3613 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3614 fprintf (dump_file
, "\n");
3618 costs_initialized
= true;
3623 if (SSA_VAR_P (expr
))
3626 if (is_gimple_min_invariant (expr
))
3628 if (TREE_CODE (expr
) == INTEGER_CST
)
3629 return new_cost (integer_cost
[speed
], 0);
3631 if (TREE_CODE (expr
) == ADDR_EXPR
)
3633 tree obj
= TREE_OPERAND (expr
, 0);
3635 if (TREE_CODE (obj
) == VAR_DECL
3636 || TREE_CODE (obj
) == PARM_DECL
3637 || TREE_CODE (obj
) == RESULT_DECL
)
3638 return new_cost (symbol_cost
[speed
], 0);
3641 return new_cost (address_cost
[speed
], 0);
3644 switch (TREE_CODE (expr
))
3646 case POINTER_PLUS_EXPR
:
3650 op0
= TREE_OPERAND (expr
, 0);
3651 op1
= TREE_OPERAND (expr
, 1);
3658 op0
= TREE_OPERAND (expr
, 0);
3664 /* Just an arbitrary value, FIXME. */
3665 return new_cost (target_spill_cost
[speed
], 0);
3668 if (op0
== NULL_TREE
3669 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3672 cost0
= force_expr_to_var_cost (op0
, speed
);
3674 if (op1
== NULL_TREE
3675 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3678 cost1
= force_expr_to_var_cost (op1
, speed
);
3680 mode
= TYPE_MODE (TREE_TYPE (expr
));
3681 switch (TREE_CODE (expr
))
3683 case POINTER_PLUS_EXPR
:
3687 cost
= new_cost (add_cost (speed
, mode
), 0);
3688 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3690 tree mult
= NULL_TREE
;
3692 if (TREE_CODE (op1
) == MULT_EXPR
)
3694 else if (TREE_CODE (op0
) == MULT_EXPR
)
3697 if (mult
!= NULL_TREE
3698 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3699 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3707 tree inner_mode
, outer_mode
;
3708 outer_mode
= TREE_TYPE (expr
);
3709 inner_mode
= TREE_TYPE (op0
);
3710 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3711 TYPE_MODE (inner_mode
), speed
), 0);
3716 if (cst_and_fits_in_hwi (op0
))
3717 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3719 else if (cst_and_fits_in_hwi (op1
))
3720 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3723 return new_cost (target_spill_cost
[speed
], 0);
3730 cost
= add_costs (cost
, cost0
);
3731 cost
= add_costs (cost
, cost1
);
3733 /* Bound the cost by target_spill_cost. The parts of complicated
3734 computations often are either loop invariant or at least can
3735 be shared between several iv uses, so letting this grow without
3736 limits would not give reasonable results. */
3737 if (cost
.cost
> (int) target_spill_cost
[speed
])
3738 cost
.cost
= target_spill_cost
[speed
];
3743 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3744 invariants the computation depends on. */
3747 force_var_cost (struct ivopts_data
*data
,
3748 tree expr
, bitmap
*depends_on
)
3752 fd_ivopts_data
= data
;
3753 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3756 return force_expr_to_var_cost (expr
, data
->speed
);
3759 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3760 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3761 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3762 invariants the computation depends on. */
3765 split_address_cost (struct ivopts_data
*data
,
3766 tree addr
, bool *symbol_present
, bool *var_present
,
3767 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3770 HOST_WIDE_INT bitsize
;
3771 HOST_WIDE_INT bitpos
;
3773 enum machine_mode mode
;
3774 int unsignedp
, volatilep
;
3776 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3777 &unsignedp
, &volatilep
, false);
3780 || bitpos
% BITS_PER_UNIT
!= 0
3781 || TREE_CODE (core
) != VAR_DECL
)
3783 *symbol_present
= false;
3784 *var_present
= true;
3785 fd_ivopts_data
= data
;
3786 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3787 return new_cost (target_spill_cost
[data
->speed
], 0);
3790 *offset
+= bitpos
/ BITS_PER_UNIT
;
3791 if (TREE_STATIC (core
)
3792 || DECL_EXTERNAL (core
))
3794 *symbol_present
= true;
3795 *var_present
= false;
3799 *symbol_present
= false;
3800 *var_present
= true;
3804 /* Estimates cost of expressing difference of addresses E1 - E2 as
3805 var + symbol + offset. The value of offset is added to OFFSET,
3806 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3807 part is missing. DEPENDS_ON is a set of the invariants the computation
3811 ptr_difference_cost (struct ivopts_data
*data
,
3812 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3813 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3815 HOST_WIDE_INT diff
= 0;
3816 aff_tree aff_e1
, aff_e2
;
3819 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3821 if (ptr_difference_const (e1
, e2
, &diff
))
3824 *symbol_present
= false;
3825 *var_present
= false;
3829 if (integer_zerop (e2
))
3830 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3831 symbol_present
, var_present
, offset
, depends_on
);
3833 *symbol_present
= false;
3834 *var_present
= true;
3836 type
= signed_type_for (TREE_TYPE (e1
));
3837 tree_to_aff_combination (e1
, type
, &aff_e1
);
3838 tree_to_aff_combination (e2
, type
, &aff_e2
);
3839 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3840 aff_combination_add (&aff_e1
, &aff_e2
);
3842 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3845 /* Estimates cost of expressing difference E1 - E2 as
3846 var + symbol + offset. The value of offset is added to OFFSET,
3847 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3848 part is missing. DEPENDS_ON is a set of the invariants the computation
3852 difference_cost (struct ivopts_data
*data
,
3853 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3854 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3856 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3857 unsigned HOST_WIDE_INT off1
, off2
;
3858 aff_tree aff_e1
, aff_e2
;
3861 e1
= strip_offset (e1
, &off1
);
3862 e2
= strip_offset (e2
, &off2
);
3863 *offset
+= off1
- off2
;
3868 if (TREE_CODE (e1
) == ADDR_EXPR
)
3869 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3870 offset
, depends_on
);
3871 *symbol_present
= false;
3873 if (operand_equal_p (e1
, e2
, 0))
3875 *var_present
= false;
3879 *var_present
= true;
3881 if (integer_zerop (e2
))
3882 return force_var_cost (data
, e1
, depends_on
);
3884 if (integer_zerop (e1
))
3886 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3887 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3891 type
= signed_type_for (TREE_TYPE (e1
));
3892 tree_to_aff_combination (e1
, type
, &aff_e1
);
3893 tree_to_aff_combination (e2
, type
, &aff_e2
);
3894 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3895 aff_combination_add (&aff_e1
, &aff_e2
);
3897 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3900 /* Returns true if AFF1 and AFF2 are identical. */
3903 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3907 if (aff1
->n
!= aff2
->n
)
3910 for (i
= 0; i
< aff1
->n
; i
++)
3912 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3915 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3921 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3924 get_expr_id (struct ivopts_data
*data
, tree expr
)
3926 struct iv_inv_expr_ent ent
;
3927 struct iv_inv_expr_ent
**slot
;
3930 ent
.hash
= iterative_hash_expr (expr
, 0);
3931 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3935 *slot
= XNEW (struct iv_inv_expr_ent
);
3936 (*slot
)->expr
= expr
;
3937 (*slot
)->hash
= ent
.hash
;
3938 (*slot
)->id
= data
->inv_expr_id
++;
3942 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3943 requires a new compiler generated temporary. Returns -1 otherwise.
3944 ADDRESS_P is a flag indicating if the expression is for address
3948 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3949 tree cbase
, HOST_WIDE_INT ratio
,
3952 aff_tree ubase_aff
, cbase_aff
;
3960 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3961 && (TREE_CODE (cbase
) == INTEGER_CST
))
3964 /* Strips the constant part. */
3965 if (TREE_CODE (ubase
) == PLUS_EXPR
3966 || TREE_CODE (ubase
) == MINUS_EXPR
3967 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3969 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3970 ubase
= TREE_OPERAND (ubase
, 0);
3973 /* Strips the constant part. */
3974 if (TREE_CODE (cbase
) == PLUS_EXPR
3975 || TREE_CODE (cbase
) == MINUS_EXPR
3976 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3978 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3979 cbase
= TREE_OPERAND (cbase
, 0);
3984 if (((TREE_CODE (ubase
) == SSA_NAME
)
3985 || (TREE_CODE (ubase
) == ADDR_EXPR
3986 && is_gimple_min_invariant (ubase
)))
3987 && (TREE_CODE (cbase
) == INTEGER_CST
))
3990 if (((TREE_CODE (cbase
) == SSA_NAME
)
3991 || (TREE_CODE (cbase
) == ADDR_EXPR
3992 && is_gimple_min_invariant (cbase
)))
3993 && (TREE_CODE (ubase
) == INTEGER_CST
))
3999 if (operand_equal_p (ubase
, cbase
, 0))
4002 if (TREE_CODE (ubase
) == ADDR_EXPR
4003 && TREE_CODE (cbase
) == ADDR_EXPR
)
4007 usym
= TREE_OPERAND (ubase
, 0);
4008 csym
= TREE_OPERAND (cbase
, 0);
4009 if (TREE_CODE (usym
) == ARRAY_REF
)
4011 tree ind
= TREE_OPERAND (usym
, 1);
4012 if (TREE_CODE (ind
) == INTEGER_CST
4013 && tree_fits_shwi_p (ind
)
4014 && TREE_INT_CST_LOW (ind
) == 0)
4015 usym
= TREE_OPERAND (usym
, 0);
4017 if (TREE_CODE (csym
) == ARRAY_REF
)
4019 tree ind
= TREE_OPERAND (csym
, 1);
4020 if (TREE_CODE (ind
) == INTEGER_CST
4021 && tree_fits_shwi_p (ind
)
4022 && TREE_INT_CST_LOW (ind
) == 0)
4023 csym
= TREE_OPERAND (csym
, 0);
4025 if (operand_equal_p (usym
, csym
, 0))
4028 /* Now do more complex comparison */
4029 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4030 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4031 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4035 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4036 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4038 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
4039 aff_combination_add (&ubase_aff
, &cbase_aff
);
4040 expr
= aff_combination_to_tree (&ubase_aff
);
4041 return get_expr_id (data
, expr
);
4046 /* Determines the cost of the computation by that USE is expressed
4047 from induction variable CAND. If ADDRESS_P is true, we just need
4048 to create an address from it, otherwise we want to get it into
4049 register. A set of invariants we depend on is stored in
4050 DEPENDS_ON. AT is the statement at that the value is computed.
4051 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4052 addressing is likely. */
4055 get_computation_cost_at (struct ivopts_data
*data
,
4056 struct iv_use
*use
, struct iv_cand
*cand
,
4057 bool address_p
, bitmap
*depends_on
, gimple at
,
4061 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4063 tree utype
= TREE_TYPE (ubase
), ctype
;
4064 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4065 HOST_WIDE_INT ratio
, aratio
;
4066 bool var_present
, symbol_present
, stmt_is_after_inc
;
4069 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4070 enum machine_mode mem_mode
= (address_p
4071 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4076 /* Only consider real candidates. */
4078 return infinite_cost
;
4080 cbase
= cand
->iv
->base
;
4081 cstep
= cand
->iv
->step
;
4082 ctype
= TREE_TYPE (cbase
);
4084 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4086 /* We do not have a precision to express the values of use. */
4087 return infinite_cost
;
4091 || (use
->iv
->base_object
4092 && cand
->iv
->base_object
4093 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4094 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4096 /* Do not try to express address of an object with computation based
4097 on address of a different object. This may cause problems in rtl
4098 level alias analysis (that does not expect this to be happening,
4099 as this is illegal in C), and would be unlikely to be useful
4101 if (use
->iv
->base_object
4102 && cand
->iv
->base_object
4103 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4104 return infinite_cost
;
4107 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4109 /* TODO -- add direct handling of this case. */
4113 /* CSTEPI is removed from the offset in case statement is after the
4114 increment. If the step is not constant, we use zero instead.
4115 This is a bit imprecise (there is the extra addition), but
4116 redundancy elimination is likely to transform the code so that
4117 it uses value of the variable before increment anyway,
4118 so it is not that much unrealistic. */
4119 if (cst_and_fits_in_hwi (cstep
))
4120 cstepi
= int_cst_value (cstep
);
4124 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4125 return infinite_cost
;
4127 if (rat
.fits_shwi ())
4128 ratio
= rat
.to_shwi ();
4130 return infinite_cost
;
4133 ctype
= TREE_TYPE (cbase
);
4135 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4137 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4138 or ratio == 1, it is better to handle this like
4140 ubase - ratio * cbase + ratio * var
4142 (also holds in the case ratio == -1, TODO. */
4144 if (cst_and_fits_in_hwi (cbase
))
4146 offset
= - ratio
* int_cst_value (cbase
);
4147 cost
= difference_cost (data
,
4148 ubase
, build_int_cst (utype
, 0),
4149 &symbol_present
, &var_present
, &offset
,
4151 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4153 else if (ratio
== 1)
4155 tree real_cbase
= cbase
;
4157 /* Check to see if any adjustment is needed. */
4158 if (cstepi
== 0 && stmt_is_after_inc
)
4160 aff_tree real_cbase_aff
;
4163 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4165 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4167 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4168 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4171 cost
= difference_cost (data
,
4173 &symbol_present
, &var_present
, &offset
,
4175 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4178 && !POINTER_TYPE_P (ctype
)
4179 && multiplier_allowed_in_address_p
4181 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4184 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4185 cost
= difference_cost (data
,
4187 &symbol_present
, &var_present
, &offset
,
4189 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4193 cost
= force_var_cost (data
, cbase
, depends_on
);
4194 cost
= add_costs (cost
,
4195 difference_cost (data
,
4196 ubase
, build_int_cst (utype
, 0),
4197 &symbol_present
, &var_present
,
4198 &offset
, depends_on
));
4199 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4200 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4206 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4207 /* Clear depends on. */
4208 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4209 bitmap_clear (*depends_on
);
4212 /* If we are after the increment, the value of the candidate is higher by
4214 if (stmt_is_after_inc
)
4215 offset
-= ratio
* cstepi
;
4217 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4218 (symbol/var1/const parts may be omitted). If we are looking for an
4219 address, find the cost of addressing this. */
4221 return add_costs (cost
,
4222 get_address_cost (symbol_present
, var_present
,
4223 offset
, ratio
, cstepi
,
4225 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4226 speed
, stmt_is_after_inc
,
4229 /* Otherwise estimate the costs for computing the expression. */
4230 if (!symbol_present
&& !var_present
&& !offset
)
4233 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4237 /* Symbol + offset should be compile-time computable so consider that they
4238 are added once to the variable, if present. */
4239 if (var_present
&& (symbol_present
|| offset
))
4240 cost
.cost
+= adjust_setup_cost (data
,
4241 add_cost (speed
, TYPE_MODE (ctype
)));
4243 /* Having offset does not affect runtime cost in case it is added to
4244 symbol, but it increases complexity. */
4248 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4250 aratio
= ratio
> 0 ? ratio
: -ratio
;
4252 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4257 *can_autoinc
= false;
4260 /* Just get the expression, expand it and measure the cost. */
4261 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4264 return infinite_cost
;
4267 comp
= build_simple_mem_ref (comp
);
4269 return new_cost (computation_cost (comp
, speed
), 0);
4273 /* Determines the cost of the computation by that USE is expressed
4274 from induction variable CAND. If ADDRESS_P is true, we just need
4275 to create an address from it, otherwise we want to get it into
4276 register. A set of invariants we depend on is stored in
4277 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4278 autoinc addressing is likely. */
4281 get_computation_cost (struct ivopts_data
*data
,
4282 struct iv_use
*use
, struct iv_cand
*cand
,
4283 bool address_p
, bitmap
*depends_on
,
4284 bool *can_autoinc
, int *inv_expr_id
)
4286 return get_computation_cost_at (data
,
4287 use
, cand
, address_p
, depends_on
, use
->stmt
,
4288 can_autoinc
, inv_expr_id
);
4291 /* Determines cost of basing replacement of USE on CAND in a generic
4295 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4296 struct iv_use
*use
, struct iv_cand
*cand
)
4300 int inv_expr_id
= -1;
4302 /* The simple case first -- if we need to express value of the preserved
4303 original biv, the cost is 0. This also prevents us from counting the
4304 cost of increment twice -- once at this use and once in the cost of
4306 if (cand
->pos
== IP_ORIGINAL
4307 && cand
->incremented_at
== use
->stmt
)
4309 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4314 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4315 NULL
, &inv_expr_id
);
4317 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4320 return !infinite_cost_p (cost
);
4323 /* Determines cost of basing replacement of USE on CAND in an address. */
4326 determine_use_iv_cost_address (struct ivopts_data
*data
,
4327 struct iv_use
*use
, struct iv_cand
*cand
)
4331 int inv_expr_id
= -1;
4332 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4333 &can_autoinc
, &inv_expr_id
);
4335 if (cand
->ainc_use
== use
)
4338 cost
.cost
-= cand
->cost_step
;
4339 /* If we generated the candidate solely for exploiting autoincrement
4340 opportunities, and it turns out it can't be used, set the cost to
4341 infinity to make sure we ignore it. */
4342 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4343 cost
= infinite_cost
;
4345 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4348 return !infinite_cost_p (cost
);
4351 /* Computes value of candidate CAND at position AT in iteration NITER, and
4352 stores it to VAL. */
4355 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4358 aff_tree step
, delta
, nit
;
4359 struct iv
*iv
= cand
->iv
;
4360 tree type
= TREE_TYPE (iv
->base
);
4361 tree steptype
= type
;
4362 if (POINTER_TYPE_P (type
))
4363 steptype
= sizetype
;
4365 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4366 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4367 aff_combination_convert (&nit
, steptype
);
4368 aff_combination_mult (&nit
, &step
, &delta
);
4369 if (stmt_after_increment (loop
, cand
, at
))
4370 aff_combination_add (&delta
, &step
);
4372 tree_to_aff_combination (iv
->base
, type
, val
);
4373 aff_combination_add (val
, &delta
);
4376 /* Returns period of induction variable iv. */
4379 iv_period (struct iv
*iv
)
4381 tree step
= iv
->step
, period
, type
;
4384 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4386 type
= unsigned_type_for (TREE_TYPE (step
));
4387 /* Period of the iv is lcm (step, type_range)/step -1,
4388 i.e., N*type_range/step - 1. Since type range is power
4389 of two, N == (step >> num_of_ending_zeros_binary (step),
4390 so the final result is
4392 (type_range >> num_of_ending_zeros_binary (step)) - 1
4395 pow2div
= num_ending_zeros (step
);
4397 period
= build_low_bits_mask (type
,
4398 (TYPE_PRECISION (type
)
4399 - tree_to_uhwi (pow2div
)));
4404 /* Returns the comparison operator used when eliminating the iv USE. */
4406 static enum tree_code
4407 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4409 struct loop
*loop
= data
->current_loop
;
4413 ex_bb
= gimple_bb (use
->stmt
);
4414 exit
= EDGE_SUCC (ex_bb
, 0);
4415 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4416 exit
= EDGE_SUCC (ex_bb
, 1);
4418 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4422 strip_wrap_conserving_type_conversions (tree exp
)
4424 while (tree_ssa_useless_type_conversion (exp
)
4425 && (nowrap_type_p (TREE_TYPE (exp
))
4426 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4427 exp
= TREE_OPERAND (exp
, 0);
4431 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4432 check for an exact match. */
4435 expr_equal_p (tree e
, tree what
)
4438 enum tree_code code
;
4440 e
= strip_wrap_conserving_type_conversions (e
);
4441 what
= strip_wrap_conserving_type_conversions (what
);
4443 code
= TREE_CODE (what
);
4444 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4447 if (operand_equal_p (e
, what
, 0))
4450 if (TREE_CODE (e
) != SSA_NAME
)
4453 stmt
= SSA_NAME_DEF_STMT (e
);
4454 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4455 || gimple_assign_rhs_code (stmt
) != code
)
4458 switch (get_gimple_rhs_class (code
))
4460 case GIMPLE_BINARY_RHS
:
4461 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4465 case GIMPLE_UNARY_RHS
:
4466 case GIMPLE_SINGLE_RHS
:
4467 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4473 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4474 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4475 calculation is performed in non-wrapping type.
4477 TODO: More generally, we could test for the situation that
4478 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4479 This would require knowing the sign of OFFSET.
4481 Also, we only look for the first addition in the computation of BASE.
4482 More complex analysis would be better, but introducing it just for
4483 this optimization seems like an overkill. */
4486 difference_cannot_overflow_p (tree base
, tree offset
)
4488 enum tree_code code
;
4491 if (!nowrap_type_p (TREE_TYPE (base
)))
4494 base
= expand_simple_operations (base
);
4496 if (TREE_CODE (base
) == SSA_NAME
)
4498 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4500 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4503 code
= gimple_assign_rhs_code (stmt
);
4504 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4507 e1
= gimple_assign_rhs1 (stmt
);
4508 e2
= gimple_assign_rhs2 (stmt
);
4512 code
= TREE_CODE (base
);
4513 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4515 e1
= TREE_OPERAND (base
, 0);
4516 e2
= TREE_OPERAND (base
, 1);
4519 /* TODO: deeper inspection may be necessary to prove the equality. */
4523 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4524 case POINTER_PLUS_EXPR
:
4525 return expr_equal_p (e2
, offset
);
4532 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4533 comparison with CAND. NITER describes the number of iterations of
4534 the loops. If successful, the comparison in COMP_P is altered accordingly.
4536 We aim to handle the following situation:
4552 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4553 We aim to optimize this to
4561 while (p < p_0 - a + b);
4563 This preserves the correctness, since the pointer arithmetics does not
4564 overflow. More precisely:
4566 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4567 overflow in computing it or the values of p.
4568 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4569 overflow. To prove this, we use the fact that p_0 = base + a. */
4572 iv_elimination_compare_lt (struct ivopts_data
*data
,
4573 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4574 struct tree_niter_desc
*niter
)
4576 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4577 struct affine_tree_combination nit
, tmpa
, tmpb
;
4578 enum tree_code comp
;
4581 /* We need to know that the candidate induction variable does not overflow.
4582 While more complex analysis may be used to prove this, for now just
4583 check that the variable appears in the original program and that it
4584 is computed in a type that guarantees no overflows. */
4585 cand_type
= TREE_TYPE (cand
->iv
->base
);
4586 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4589 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4590 the calculation of the BOUND could overflow, making the comparison
4592 if (!data
->loop_single_exit_p
)
4595 /* We need to be able to decide whether candidate is increasing or decreasing
4596 in order to choose the right comparison operator. */
4597 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4599 step
= int_cst_value (cand
->iv
->step
);
4601 /* Check that the number of iterations matches the expected pattern:
4602 a + 1 > b ? 0 : b - a - 1. */
4603 mbz
= niter
->may_be_zero
;
4604 if (TREE_CODE (mbz
) == GT_EXPR
)
4606 /* Handle a + 1 > b. */
4607 tree op0
= TREE_OPERAND (mbz
, 0);
4608 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4610 a
= TREE_OPERAND (op0
, 0);
4611 b
= TREE_OPERAND (mbz
, 1);
4616 else if (TREE_CODE (mbz
) == LT_EXPR
)
4618 tree op1
= TREE_OPERAND (mbz
, 1);
4620 /* Handle b < a + 1. */
4621 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4623 a
= TREE_OPERAND (op1
, 0);
4624 b
= TREE_OPERAND (mbz
, 0);
4632 /* Expected number of iterations is B - A - 1. Check that it matches
4633 the actual number, i.e., that B - A - NITER = 1. */
4634 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4635 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4636 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4637 aff_combination_scale (&nit
, double_int_minus_one
);
4638 aff_combination_scale (&tmpa
, double_int_minus_one
);
4639 aff_combination_add (&tmpb
, &tmpa
);
4640 aff_combination_add (&tmpb
, &nit
);
4641 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4644 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4646 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4648 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4649 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4652 /* Determine the new comparison operator. */
4653 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4654 if (*comp_p
== NE_EXPR
)
4656 else if (*comp_p
== EQ_EXPR
)
4657 *comp_p
= invert_tree_comparison (comp
, false);
4664 /* Check whether it is possible to express the condition in USE by comparison
4665 of candidate CAND. If so, store the value compared with to BOUND, and the
4666 comparison operator to COMP. */
4669 may_eliminate_iv (struct ivopts_data
*data
,
4670 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4671 enum tree_code
*comp
)
4676 struct loop
*loop
= data
->current_loop
;
4678 struct tree_niter_desc
*desc
= NULL
;
4680 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4683 /* For now works only for exits that dominate the loop latch.
4684 TODO: extend to other conditions inside loop body. */
4685 ex_bb
= gimple_bb (use
->stmt
);
4686 if (use
->stmt
!= last_stmt (ex_bb
)
4687 || gimple_code (use
->stmt
) != GIMPLE_COND
4688 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4691 exit
= EDGE_SUCC (ex_bb
, 0);
4692 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4693 exit
= EDGE_SUCC (ex_bb
, 1);
4694 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4697 desc
= niter_for_exit (data
, exit
);
4701 /* Determine whether we can use the variable to test the exit condition.
4702 This is the case iff the period of the induction variable is greater
4703 than the number of iterations for which the exit condition is true. */
4704 period
= iv_period (cand
->iv
);
4706 /* If the number of iterations is constant, compare against it directly. */
4707 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4709 /* See cand_value_at. */
4710 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4712 if (!tree_int_cst_lt (desc
->niter
, period
))
4717 if (tree_int_cst_lt (period
, desc
->niter
))
4722 /* If not, and if this is the only possible exit of the loop, see whether
4723 we can get a conservative estimate on the number of iterations of the
4724 entire loop and compare against that instead. */
4727 double_int period_value
, max_niter
;
4729 max_niter
= desc
->max
;
4730 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4731 max_niter
+= double_int_one
;
4732 period_value
= tree_to_double_int (period
);
4733 if (max_niter
.ugt (period_value
))
4735 /* See if we can take advantage of inferred loop bound information. */
4736 if (data
->loop_single_exit_p
)
4738 if (!max_loop_iterations (loop
, &max_niter
))
4740 /* The loop bound is already adjusted by adding 1. */
4741 if (max_niter
.ugt (period_value
))
4749 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4751 *bound
= aff_combination_to_tree (&bnd
);
4752 *comp
= iv_elimination_compare (data
, use
);
4754 /* It is unlikely that computing the number of iterations using division
4755 would be more profitable than keeping the original induction variable. */
4756 if (expression_expensive_p (*bound
))
4759 /* Sometimes, it is possible to handle the situation that the number of
4760 iterations may be zero unless additional assumtions by using <
4761 instead of != in the exit condition.
4763 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4764 base the exit condition on it. However, that is often too
4766 if (!integer_zerop (desc
->may_be_zero
))
4767 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4772 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4773 be copied, if is is used in the loop body and DATA->body_includes_call. */
4776 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4778 tree sbound
= bound
;
4779 STRIP_NOPS (sbound
);
4781 if (TREE_CODE (sbound
) == SSA_NAME
4782 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4783 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4784 && data
->body_includes_call
)
4785 return COSTS_N_INSNS (1);
4790 /* Determines cost of basing replacement of USE on CAND in a condition. */
4793 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4794 struct iv_use
*use
, struct iv_cand
*cand
)
4796 tree bound
= NULL_TREE
;
4798 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4799 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4801 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4802 tree
*control_var
, *bound_cst
;
4803 enum tree_code comp
= ERROR_MARK
;
4805 /* Only consider real candidates. */
4808 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4813 /* Try iv elimination. */
4814 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4816 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4817 if (elim_cost
.cost
== 0)
4818 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4819 else if (TREE_CODE (bound
) == INTEGER_CST
)
4821 /* If we replace a loop condition 'i < n' with 'p < base + n',
4822 depends_on_elim will have 'base' and 'n' set, which implies
4823 that both 'base' and 'n' will be live during the loop. More likely,
4824 'base + n' will be loop invariant, resulting in only one live value
4825 during the loop. So in that case we clear depends_on_elim and set
4826 elim_inv_expr_id instead. */
4827 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4829 elim_inv_expr_id
= get_expr_id (data
, bound
);
4830 bitmap_clear (depends_on_elim
);
4832 /* The bound is a loop invariant, so it will be only computed
4834 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4837 elim_cost
= infinite_cost
;
4839 /* Try expressing the original giv. If it is compared with an invariant,
4840 note that we cannot get rid of it. */
4841 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4845 /* When the condition is a comparison of the candidate IV against
4846 zero, prefer this IV.
4848 TODO: The constant that we're subtracting from the cost should
4849 be target-dependent. This information should be added to the
4850 target costs for each backend. */
4851 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4852 && integer_zerop (*bound_cst
)
4853 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4854 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4855 elim_cost
.cost
-= 1;
4857 express_cost
= get_computation_cost (data
, use
, cand
, false,
4858 &depends_on_express
, NULL
,
4859 &express_inv_expr_id
);
4860 fd_ivopts_data
= data
;
4861 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4863 /* Count the cost of the original bound as well. */
4864 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4865 if (bound_cost
.cost
== 0)
4866 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4867 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4868 bound_cost
.cost
= 0;
4869 express_cost
.cost
+= bound_cost
.cost
;
4871 /* Choose the better approach, preferring the eliminated IV. */
4872 if (compare_costs (elim_cost
, express_cost
) <= 0)
4875 depends_on
= depends_on_elim
;
4876 depends_on_elim
= NULL
;
4877 inv_expr_id
= elim_inv_expr_id
;
4881 cost
= express_cost
;
4882 depends_on
= depends_on_express
;
4883 depends_on_express
= NULL
;
4886 inv_expr_id
= express_inv_expr_id
;
4889 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4891 if (depends_on_elim
)
4892 BITMAP_FREE (depends_on_elim
);
4893 if (depends_on_express
)
4894 BITMAP_FREE (depends_on_express
);
4896 return !infinite_cost_p (cost
);
4899 /* Determines cost of basing replacement of USE on CAND. Returns false
4900 if USE cannot be based on CAND. */
4903 determine_use_iv_cost (struct ivopts_data
*data
,
4904 struct iv_use
*use
, struct iv_cand
*cand
)
4908 case USE_NONLINEAR_EXPR
:
4909 return determine_use_iv_cost_generic (data
, use
, cand
);
4912 return determine_use_iv_cost_address (data
, use
, cand
);
4915 return determine_use_iv_cost_condition (data
, use
, cand
);
4922 /* Return true if get_computation_cost indicates that autoincrement is
4923 a possibility for the pair of USE and CAND, false otherwise. */
4926 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4927 struct iv_cand
*cand
)
4933 if (use
->type
!= USE_ADDRESS
)
4936 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4937 &can_autoinc
, NULL
);
4939 BITMAP_FREE (depends_on
);
4941 return !infinite_cost_p (cost
) && can_autoinc
;
4944 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4945 use that allows autoincrement, and set their AINC_USE if possible. */
4948 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4952 for (i
= 0; i
< n_iv_cands (data
); i
++)
4954 struct iv_cand
*cand
= iv_cand (data
, i
);
4955 struct iv_use
*closest_before
= NULL
;
4956 struct iv_use
*closest_after
= NULL
;
4957 if (cand
->pos
!= IP_ORIGINAL
)
4960 for (j
= 0; j
< n_iv_uses (data
); j
++)
4962 struct iv_use
*use
= iv_use (data
, j
);
4963 unsigned uid
= gimple_uid (use
->stmt
);
4965 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4968 if (uid
< gimple_uid (cand
->incremented_at
)
4969 && (closest_before
== NULL
4970 || uid
> gimple_uid (closest_before
->stmt
)))
4971 closest_before
= use
;
4973 if (uid
> gimple_uid (cand
->incremented_at
)
4974 && (closest_after
== NULL
4975 || uid
< gimple_uid (closest_after
->stmt
)))
4976 closest_after
= use
;
4979 if (closest_before
!= NULL
4980 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4981 cand
->ainc_use
= closest_before
;
4982 else if (closest_after
!= NULL
4983 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4984 cand
->ainc_use
= closest_after
;
4988 /* Finds the candidates for the induction variables. */
4991 find_iv_candidates (struct ivopts_data
*data
)
4993 /* Add commonly used ivs. */
4994 add_standard_iv_candidates (data
);
4996 /* Add old induction variables. */
4997 add_old_ivs_candidates (data
);
4999 /* Add induction variables derived from uses. */
5000 add_derived_ivs_candidates (data
);
5002 set_autoinc_for_original_candidates (data
);
5004 /* Record the important candidates. */
5005 record_important_candidates (data
);
5008 /* Determines costs of basing the use of the iv on an iv candidate. */
5011 determine_use_iv_costs (struct ivopts_data
*data
)
5015 struct iv_cand
*cand
;
5016 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5018 alloc_use_cost_map (data
);
5020 for (i
= 0; i
< n_iv_uses (data
); i
++)
5022 use
= iv_use (data
, i
);
5024 if (data
->consider_all_candidates
)
5026 for (j
= 0; j
< n_iv_cands (data
); j
++)
5028 cand
= iv_cand (data
, j
);
5029 determine_use_iv_cost (data
, use
, cand
);
5036 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5038 cand
= iv_cand (data
, j
);
5039 if (!determine_use_iv_cost (data
, use
, cand
))
5040 bitmap_set_bit (to_clear
, j
);
5043 /* Remove the candidates for that the cost is infinite from
5044 the list of related candidates. */
5045 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5046 bitmap_clear (to_clear
);
5050 BITMAP_FREE (to_clear
);
5052 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5054 fprintf (dump_file
, "Use-candidate costs:\n");
5056 for (i
= 0; i
< n_iv_uses (data
); i
++)
5058 use
= iv_use (data
, i
);
5060 fprintf (dump_file
, "Use %d:\n", i
);
5061 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5062 for (j
= 0; j
< use
->n_map_members
; j
++)
5064 if (!use
->cost_map
[j
].cand
5065 || infinite_cost_p (use
->cost_map
[j
].cost
))
5068 fprintf (dump_file
, " %d\t%d\t%d\t",
5069 use
->cost_map
[j
].cand
->id
,
5070 use
->cost_map
[j
].cost
.cost
,
5071 use
->cost_map
[j
].cost
.complexity
);
5072 if (use
->cost_map
[j
].depends_on
)
5073 bitmap_print (dump_file
,
5074 use
->cost_map
[j
].depends_on
, "","");
5075 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5076 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5077 fprintf (dump_file
, "\n");
5080 fprintf (dump_file
, "\n");
5082 fprintf (dump_file
, "\n");
5086 /* Determines cost of the candidate CAND. */
5089 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5091 comp_cost cost_base
;
5092 unsigned cost
, cost_step
;
5101 /* There are two costs associated with the candidate -- its increment
5102 and its initialization. The second is almost negligible for any loop
5103 that rolls enough, so we take it just very little into account. */
5105 base
= cand
->iv
->base
;
5106 cost_base
= force_var_cost (data
, base
, NULL
);
5107 /* It will be exceptional that the iv register happens to be initialized with
5108 the proper value at no cost. In general, there will at least be a regcopy
5110 if (cost_base
.cost
== 0)
5111 cost_base
.cost
= COSTS_N_INSNS (1);
5112 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5114 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5116 /* Prefer the original ivs unless we may gain something by replacing it.
5117 The reason is to make debugging simpler; so this is not relevant for
5118 artificial ivs created by other optimization passes. */
5119 if (cand
->pos
!= IP_ORIGINAL
5120 || !SSA_NAME_VAR (cand
->var_before
)
5121 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5124 /* Prefer not to insert statements into latch unless there are some
5125 already (so that we do not create unnecessary jumps). */
5126 if (cand
->pos
== IP_END
5127 && empty_block_p (ip_end_pos (data
->current_loop
)))
5131 cand
->cost_step
= cost_step
;
5134 /* Determines costs of computation of the candidates. */
5137 determine_iv_costs (struct ivopts_data
*data
)
5141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5143 fprintf (dump_file
, "Candidate costs:\n");
5144 fprintf (dump_file
, " cand\tcost\n");
5147 for (i
= 0; i
< n_iv_cands (data
); i
++)
5149 struct iv_cand
*cand
= iv_cand (data
, i
);
5151 determine_iv_cost (data
, cand
);
5153 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5154 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5158 fprintf (dump_file
, "\n");
5161 /* Calculates cost for having SIZE induction variables. */
5164 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5166 /* We add size to the cost, so that we prefer eliminating ivs
5168 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5169 data
->body_includes_call
);
5172 /* For each size of the induction variable set determine the penalty. */
5175 determine_set_costs (struct ivopts_data
*data
)
5179 gimple_stmt_iterator psi
;
5181 struct loop
*loop
= data
->current_loop
;
5184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5186 fprintf (dump_file
, "Global costs:\n");
5187 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5188 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5189 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5190 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5194 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5196 phi
= gsi_stmt (psi
);
5197 op
= PHI_RESULT (phi
);
5199 if (virtual_operand_p (op
))
5202 if (get_iv (data
, op
))
5208 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5210 struct version_info
*info
= ver_info (data
, j
);
5212 if (info
->inv_id
&& info
->has_nonlin_use
)
5216 data
->regs_used
= n
;
5217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5218 fprintf (dump_file
, " regs_used %d\n", n
);
5220 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5222 fprintf (dump_file
, " cost for size:\n");
5223 fprintf (dump_file
, " ivs\tcost\n");
5224 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5225 fprintf (dump_file
, " %d\t%d\n", j
,
5226 ivopts_global_cost_for_size (data
, j
));
5227 fprintf (dump_file
, "\n");
5231 /* Returns true if A is a cheaper cost pair than B. */
5234 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5244 cmp
= compare_costs (a
->cost
, b
->cost
);
5251 /* In case the costs are the same, prefer the cheaper candidate. */
5252 if (a
->cand
->cost
< b
->cand
->cost
)
5259 /* Returns candidate by that USE is expressed in IVS. */
5261 static struct cost_pair
*
5262 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5264 return ivs
->cand_for_use
[use
->id
];
5267 /* Computes the cost field of IVS structure. */
5270 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5272 comp_cost cost
= ivs
->cand_use_cost
;
5274 cost
.cost
+= ivs
->cand_cost
;
5276 cost
.cost
+= ivopts_global_cost_for_size (data
,
5277 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5282 /* Remove invariants in set INVS to set IVS. */
5285 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5293 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5295 ivs
->n_invariant_uses
[iid
]--;
5296 if (ivs
->n_invariant_uses
[iid
] == 0)
5301 /* Set USE not to be expressed by any candidate in IVS. */
5304 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5307 unsigned uid
= use
->id
, cid
;
5308 struct cost_pair
*cp
;
5310 cp
= ivs
->cand_for_use
[uid
];
5316 ivs
->cand_for_use
[uid
] = NULL
;
5317 ivs
->n_cand_uses
[cid
]--;
5319 if (ivs
->n_cand_uses
[cid
] == 0)
5321 bitmap_clear_bit (ivs
->cands
, cid
);
5322 /* Do not count the pseudocandidates. */
5326 ivs
->cand_cost
-= cp
->cand
->cost
;
5328 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5331 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5333 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5335 if (cp
->inv_expr_id
!= -1)
5337 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5338 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5339 ivs
->num_used_inv_expr
--;
5341 iv_ca_recount_cost (data
, ivs
);
5344 /* Add invariants in set INVS to set IVS. */
5347 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5355 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5357 ivs
->n_invariant_uses
[iid
]++;
5358 if (ivs
->n_invariant_uses
[iid
] == 1)
5363 /* Set cost pair for USE in set IVS to CP. */
5366 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5367 struct iv_use
*use
, struct cost_pair
*cp
)
5369 unsigned uid
= use
->id
, cid
;
5371 if (ivs
->cand_for_use
[uid
] == cp
)
5374 if (ivs
->cand_for_use
[uid
])
5375 iv_ca_set_no_cp (data
, ivs
, use
);
5382 ivs
->cand_for_use
[uid
] = cp
;
5383 ivs
->n_cand_uses
[cid
]++;
5384 if (ivs
->n_cand_uses
[cid
] == 1)
5386 bitmap_set_bit (ivs
->cands
, cid
);
5387 /* Do not count the pseudocandidates. */
5391 ivs
->cand_cost
+= cp
->cand
->cost
;
5393 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5396 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5397 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5399 if (cp
->inv_expr_id
!= -1)
5401 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5402 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5403 ivs
->num_used_inv_expr
++;
5405 iv_ca_recount_cost (data
, ivs
);
5409 /* Extend set IVS by expressing USE by some of the candidates in it
5410 if possible. All important candidates will be considered
5411 if IMPORTANT_CANDIDATES is true. */
5414 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5415 struct iv_use
*use
, bool important_candidates
)
5417 struct cost_pair
*best_cp
= NULL
, *cp
;
5422 gcc_assert (ivs
->upto
>= use
->id
);
5424 if (ivs
->upto
== use
->id
)
5430 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5431 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5433 struct iv_cand
*cand
= iv_cand (data
, i
);
5435 cp
= get_use_iv_cost (data
, use
, cand
);
5437 if (cheaper_cost_pair (cp
, best_cp
))
5441 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5444 /* Get cost for assignment IVS. */
5447 iv_ca_cost (struct iv_ca
*ivs
)
5449 /* This was a conditional expression but it triggered a bug in
5452 return infinite_cost
;
5457 /* Returns true if all dependences of CP are among invariants in IVS. */
5460 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5465 if (!cp
->depends_on
)
5468 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5470 if (ivs
->n_invariant_uses
[i
] == 0)
5477 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5478 it before NEXT_CHANGE. */
5480 static struct iv_ca_delta
*
5481 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5482 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5484 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5487 change
->old_cp
= old_cp
;
5488 change
->new_cp
= new_cp
;
5489 change
->next_change
= next_change
;
5494 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5497 static struct iv_ca_delta
*
5498 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5500 struct iv_ca_delta
*last
;
5508 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5510 last
->next_change
= l2
;
5515 /* Reverse the list of changes DELTA, forming the inverse to it. */
5517 static struct iv_ca_delta
*
5518 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5520 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5521 struct cost_pair
*tmp
;
5523 for (act
= delta
; act
; act
= next
)
5525 next
= act
->next_change
;
5526 act
->next_change
= prev
;
5530 act
->old_cp
= act
->new_cp
;
5537 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5538 reverted instead. */
5541 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5542 struct iv_ca_delta
*delta
, bool forward
)
5544 struct cost_pair
*from
, *to
;
5545 struct iv_ca_delta
*act
;
5548 delta
= iv_ca_delta_reverse (delta
);
5550 for (act
= delta
; act
; act
= act
->next_change
)
5554 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5555 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5559 iv_ca_delta_reverse (delta
);
5562 /* Returns true if CAND is used in IVS. */
5565 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5567 return ivs
->n_cand_uses
[cand
->id
] > 0;
5570 /* Returns number of induction variable candidates in the set IVS. */
5573 iv_ca_n_cands (struct iv_ca
*ivs
)
5575 return ivs
->n_cands
;
5578 /* Free the list of changes DELTA. */
5581 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5583 struct iv_ca_delta
*act
, *next
;
5585 for (act
= *delta
; act
; act
= next
)
5587 next
= act
->next_change
;
5594 /* Allocates new iv candidates assignment. */
5596 static struct iv_ca
*
5597 iv_ca_new (struct ivopts_data
*data
)
5599 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5603 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5604 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5605 nw
->cands
= BITMAP_ALLOC (NULL
);
5608 nw
->cand_use_cost
= no_cost
;
5610 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5612 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5613 nw
->num_used_inv_expr
= 0;
5618 /* Free memory occupied by the set IVS. */
5621 iv_ca_free (struct iv_ca
**ivs
)
5623 free ((*ivs
)->cand_for_use
);
5624 free ((*ivs
)->n_cand_uses
);
5625 BITMAP_FREE ((*ivs
)->cands
);
5626 free ((*ivs
)->n_invariant_uses
);
5627 free ((*ivs
)->used_inv_expr
);
5632 /* Dumps IVS to FILE. */
5635 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5637 const char *pref
= " invariants ";
5639 comp_cost cost
= iv_ca_cost (ivs
);
5641 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5642 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5643 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5644 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5646 for (i
= 0; i
< ivs
->upto
; i
++)
5648 struct iv_use
*use
= iv_use (data
, i
);
5649 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5651 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5652 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5654 fprintf (file
, " use:%d --> ??\n", use
->id
);
5657 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5658 if (ivs
->n_invariant_uses
[i
])
5660 fprintf (file
, "%s%d", pref
, i
);
5663 fprintf (file
, "\n\n");
5666 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5667 new set, and store differences in DELTA. Number of induction variables
5668 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5669 the function will try to find a solution with mimimal iv candidates. */
5672 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5673 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5674 unsigned *n_ivs
, bool min_ncand
)
5679 struct cost_pair
*old_cp
, *new_cp
;
5682 for (i
= 0; i
< ivs
->upto
; i
++)
5684 use
= iv_use (data
, i
);
5685 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5688 && old_cp
->cand
== cand
)
5691 new_cp
= get_use_iv_cost (data
, use
, cand
);
5695 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5698 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5701 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5704 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5705 cost
= iv_ca_cost (ivs
);
5707 *n_ivs
= iv_ca_n_cands (ivs
);
5708 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5713 /* Try narrowing set IVS by removing CAND. Return the cost of
5714 the new set and store the differences in DELTA. */
5717 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5718 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5722 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5724 struct iv_cand
*cnd
;
5728 for (i
= 0; i
< n_iv_uses (data
); i
++)
5730 use
= iv_use (data
, i
);
5732 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5733 if (old_cp
->cand
!= cand
)
5738 if (data
->consider_all_candidates
)
5740 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5745 cnd
= iv_cand (data
, ci
);
5747 cp
= get_use_iv_cost (data
, use
, cnd
);
5751 if (!iv_ca_has_deps (ivs
, cp
))
5754 if (!cheaper_cost_pair (cp
, new_cp
))
5762 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5767 cnd
= iv_cand (data
, ci
);
5769 cp
= get_use_iv_cost (data
, use
, cnd
);
5772 if (!iv_ca_has_deps (ivs
, cp
))
5775 if (!cheaper_cost_pair (cp
, new_cp
))
5784 iv_ca_delta_free (delta
);
5785 return infinite_cost
;
5788 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5791 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5792 cost
= iv_ca_cost (ivs
);
5793 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5798 /* Try optimizing the set of candidates IVS by removing candidates different
5799 from to EXCEPT_CAND from it. Return cost of the new set, and store
5800 differences in DELTA. */
5803 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5804 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5807 struct iv_ca_delta
*act_delta
, *best_delta
;
5809 comp_cost best_cost
, acost
;
5810 struct iv_cand
*cand
;
5813 best_cost
= iv_ca_cost (ivs
);
5815 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5817 cand
= iv_cand (data
, i
);
5819 if (cand
== except_cand
)
5822 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5824 if (compare_costs (acost
, best_cost
) < 0)
5827 iv_ca_delta_free (&best_delta
);
5828 best_delta
= act_delta
;
5831 iv_ca_delta_free (&act_delta
);
5840 /* Recurse to possibly remove other unnecessary ivs. */
5841 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5842 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5843 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5844 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5848 /* Tries to extend the sets IVS in the best possible way in order
5849 to express the USE. If ORIGINALP is true, prefer candidates from
5850 the original set of IVs, otherwise favor important candidates not
5851 based on any memory object. */
5854 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5855 struct iv_use
*use
, bool originalp
)
5857 comp_cost best_cost
, act_cost
;
5860 struct iv_cand
*cand
;
5861 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5862 struct cost_pair
*cp
;
5864 iv_ca_add_use (data
, ivs
, use
, false);
5865 best_cost
= iv_ca_cost (ivs
);
5867 cp
= iv_ca_cand_for_use (ivs
, use
);
5872 iv_ca_add_use (data
, ivs
, use
, true);
5873 best_cost
= iv_ca_cost (ivs
);
5874 cp
= iv_ca_cand_for_use (ivs
, use
);
5878 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5879 iv_ca_set_no_cp (data
, ivs
, use
);
5882 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5883 first try important candidates not based on any memory object. Only if
5884 this fails, try the specific ones. Rationale -- in loops with many
5885 variables the best choice often is to use just one generic biv. If we
5886 added here many ivs specific to the uses, the optimization algorithm later
5887 would be likely to get stuck in a local minimum, thus causing us to create
5888 too many ivs. The approach from few ivs to more seems more likely to be
5889 successful -- starting from few ivs, replacing an expensive use by a
5890 specific iv should always be a win. */
5891 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5893 cand
= iv_cand (data
, i
);
5895 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5898 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5901 if (iv_ca_cand_used_p (ivs
, cand
))
5904 cp
= get_use_iv_cost (data
, use
, cand
);
5908 iv_ca_set_cp (data
, ivs
, use
, cp
);
5909 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5911 iv_ca_set_no_cp (data
, ivs
, use
);
5912 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5914 if (compare_costs (act_cost
, best_cost
) < 0)
5916 best_cost
= act_cost
;
5918 iv_ca_delta_free (&best_delta
);
5919 best_delta
= act_delta
;
5922 iv_ca_delta_free (&act_delta
);
5925 if (infinite_cost_p (best_cost
))
5927 for (i
= 0; i
< use
->n_map_members
; i
++)
5929 cp
= use
->cost_map
+ i
;
5934 /* Already tried this. */
5935 if (cand
->important
)
5937 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5939 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5943 if (iv_ca_cand_used_p (ivs
, cand
))
5947 iv_ca_set_cp (data
, ivs
, use
, cp
);
5948 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5949 iv_ca_set_no_cp (data
, ivs
, use
);
5950 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5953 if (compare_costs (act_cost
, best_cost
) < 0)
5955 best_cost
= act_cost
;
5958 iv_ca_delta_free (&best_delta
);
5959 best_delta
= act_delta
;
5962 iv_ca_delta_free (&act_delta
);
5966 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5967 iv_ca_delta_free (&best_delta
);
5969 return !infinite_cost_p (best_cost
);
5972 /* Finds an initial assignment of candidates to uses. */
5974 static struct iv_ca
*
5975 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5977 struct iv_ca
*ivs
= iv_ca_new (data
);
5980 for (i
= 0; i
< n_iv_uses (data
); i
++)
5981 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5990 /* Tries to improve set of induction variables IVS. */
5993 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5996 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5997 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5998 struct iv_cand
*cand
;
6000 /* Try extending the set of induction variables by one. */
6001 for (i
= 0; i
< n_iv_cands (data
); i
++)
6003 cand
= iv_cand (data
, i
);
6005 if (iv_ca_cand_used_p (ivs
, cand
))
6008 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6012 /* If we successfully added the candidate and the set is small enough,
6013 try optimizing it by removing other candidates. */
6014 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6016 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6017 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6018 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6019 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6022 if (compare_costs (acost
, best_cost
) < 0)
6025 iv_ca_delta_free (&best_delta
);
6026 best_delta
= act_delta
;
6029 iv_ca_delta_free (&act_delta
);
6034 /* Try removing the candidates from the set instead. */
6035 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6037 /* Nothing more we can do. */
6042 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6043 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6044 iv_ca_delta_free (&best_delta
);
6048 /* Attempts to find the optimal set of induction variables. We do simple
6049 greedy heuristic -- we try to replace at most one candidate in the selected
6050 solution and remove the unused ivs while this improves the cost. */
6052 static struct iv_ca
*
6053 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6057 /* Get the initial solution. */
6058 set
= get_initial_solution (data
, originalp
);
6061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6062 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6066 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6068 fprintf (dump_file
, "Initial set of candidates:\n");
6069 iv_ca_dump (data
, dump_file
, set
);
6072 while (try_improve_iv_set (data
, set
))
6074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6076 fprintf (dump_file
, "Improved to:\n");
6077 iv_ca_dump (data
, dump_file
, set
);
6084 static struct iv_ca
*
6085 find_optimal_iv_set (struct ivopts_data
*data
)
6088 struct iv_ca
*set
, *origset
;
6090 comp_cost cost
, origcost
;
6092 /* Determine the cost based on a strategy that starts with original IVs,
6093 and try again using a strategy that prefers candidates not based
6095 origset
= find_optimal_iv_set_1 (data
, true);
6096 set
= find_optimal_iv_set_1 (data
, false);
6098 if (!origset
&& !set
)
6101 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6102 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6104 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6106 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6107 origcost
.cost
, origcost
.complexity
);
6108 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6109 cost
.cost
, cost
.complexity
);
6112 /* Choose the one with the best cost. */
6113 if (compare_costs (origcost
, cost
) <= 0)
6120 iv_ca_free (&origset
);
6122 for (i
= 0; i
< n_iv_uses (data
); i
++)
6124 use
= iv_use (data
, i
);
6125 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6131 /* Creates a new induction variable corresponding to CAND. */
6134 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6136 gimple_stmt_iterator incr_pos
;
6146 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6150 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6158 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6162 /* Mark that the iv is preserved. */
6163 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6164 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6166 /* Rewrite the increment so that it uses var_before directly. */
6167 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6171 gimple_add_tmp_var (cand
->var_before
);
6173 base
= unshare_expr (cand
->iv
->base
);
6175 create_iv (base
, unshare_expr (cand
->iv
->step
),
6176 cand
->var_before
, data
->current_loop
,
6177 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6180 /* Creates new induction variables described in SET. */
6183 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6186 struct iv_cand
*cand
;
6189 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6191 cand
= iv_cand (data
, i
);
6192 create_new_iv (data
, cand
);
6195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6197 fprintf (dump_file
, "\nSelected IV set: \n");
6198 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6200 cand
= iv_cand (data
, i
);
6201 dump_cand (dump_file
, cand
);
6203 fprintf (dump_file
, "\n");
6207 /* Rewrites USE (definition of iv used in a nonlinear expression)
6208 using candidate CAND. */
6211 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6212 struct iv_use
*use
, struct iv_cand
*cand
)
6217 gimple_stmt_iterator bsi
;
6219 /* An important special case -- if we are asked to express value of
6220 the original iv by itself, just exit; there is no need to
6221 introduce a new computation (that might also need casting the
6222 variable to unsigned and back). */
6223 if (cand
->pos
== IP_ORIGINAL
6224 && cand
->incremented_at
== use
->stmt
)
6226 enum tree_code stmt_code
;
6228 gcc_assert (is_gimple_assign (use
->stmt
));
6229 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6231 /* Check whether we may leave the computation unchanged.
6232 This is the case only if it does not rely on other
6233 computations in the loop -- otherwise, the computation
6234 we rely upon may be removed in remove_unused_ivs,
6235 thus leading to ICE. */
6236 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6237 if (stmt_code
== PLUS_EXPR
6238 || stmt_code
== MINUS_EXPR
6239 || stmt_code
== POINTER_PLUS_EXPR
)
6241 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6242 op
= gimple_assign_rhs2 (use
->stmt
);
6243 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6244 op
= gimple_assign_rhs1 (use
->stmt
);
6251 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6255 comp
= get_computation (data
->current_loop
, use
, cand
);
6256 gcc_assert (comp
!= NULL_TREE
);
6258 switch (gimple_code (use
->stmt
))
6261 tgt
= PHI_RESULT (use
->stmt
);
6263 /* If we should keep the biv, do not replace it. */
6264 if (name_info (data
, tgt
)->preserve_biv
)
6267 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6271 tgt
= gimple_assign_lhs (use
->stmt
);
6272 bsi
= gsi_for_stmt (use
->stmt
);
6279 if (!valid_gimple_rhs_p (comp
)
6280 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6281 /* We can't allow re-allocating the stmt as it might be pointed
6283 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6284 >= gimple_num_ops (gsi_stmt (bsi
)))))
6286 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6287 true, GSI_SAME_STMT
);
6288 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6290 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6291 /* As this isn't a plain copy we have to reset alignment
6293 if (SSA_NAME_PTR_INFO (comp
))
6294 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6298 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6300 ass
= gimple_build_assign (tgt
, comp
);
6301 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6303 bsi
= gsi_for_stmt (use
->stmt
);
6304 remove_phi_node (&bsi
, false);
6308 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6309 use
->stmt
= gsi_stmt (bsi
);
6313 /* Performs a peephole optimization to reorder the iv update statement with
6314 a mem ref to enable instruction combining in later phases. The mem ref uses
6315 the iv value before the update, so the reordering transformation requires
6316 adjustment of the offset. CAND is the selected IV_CAND.
6320 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6328 directly propagating t over to (1) will introduce overlapping live range
6329 thus increase register pressure. This peephole transform it into:
6333 t = MEM_REF (base, iv2, 8, 8);
6340 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6343 gimple iv_update
, stmt
;
6345 gimple_stmt_iterator gsi
, gsi_iv
;
6347 if (cand
->pos
!= IP_NORMAL
)
6350 var_after
= cand
->var_after
;
6351 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6353 bb
= gimple_bb (iv_update
);
6354 gsi
= gsi_last_nondebug_bb (bb
);
6355 stmt
= gsi_stmt (gsi
);
6357 /* Only handle conditional statement for now. */
6358 if (gimple_code (stmt
) != GIMPLE_COND
)
6361 gsi_prev_nondebug (&gsi
);
6362 stmt
= gsi_stmt (gsi
);
6363 if (stmt
!= iv_update
)
6366 gsi_prev_nondebug (&gsi
);
6367 if (gsi_end_p (gsi
))
6370 stmt
= gsi_stmt (gsi
);
6371 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6374 if (stmt
!= use
->stmt
)
6377 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6382 fprintf (dump_file
, "Reordering \n");
6383 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6384 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6385 fprintf (dump_file
, "\n");
6388 gsi
= gsi_for_stmt (use
->stmt
);
6389 gsi_iv
= gsi_for_stmt (iv_update
);
6390 gsi_move_before (&gsi_iv
, &gsi
);
6392 cand
->pos
= IP_BEFORE_USE
;
6393 cand
->incremented_at
= use
->stmt
;
6396 /* Rewrites USE (address that is an iv) using candidate CAND. */
6399 rewrite_use_address (struct ivopts_data
*data
,
6400 struct iv_use
*use
, struct iv_cand
*cand
)
6403 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6404 tree base_hint
= NULL_TREE
;
6408 adjust_iv_update_pos (cand
, use
);
6409 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6411 unshare_aff_combination (&aff
);
6413 /* To avoid undefined overflow problems, all IV candidates use unsigned
6414 integer types. The drawback is that this makes it impossible for
6415 create_mem_ref to distinguish an IV that is based on a memory object
6416 from one that represents simply an offset.
6418 To work around this problem, we pass a hint to create_mem_ref that
6419 indicates which variable (if any) in aff is an IV based on a memory
6420 object. Note that we only consider the candidate. If this is not
6421 based on an object, the base of the reference is in some subexpression
6422 of the use -- but these will use pointer types, so they are recognized
6423 by the create_mem_ref heuristics anyway. */
6424 if (cand
->iv
->base_object
)
6425 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6427 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6428 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6429 reference_alias_ptr_type (*use
->op_p
),
6430 iv
, base_hint
, data
->speed
);
6431 copy_ref_info (ref
, *use
->op_p
);
6435 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6439 rewrite_use_compare (struct ivopts_data
*data
,
6440 struct iv_use
*use
, struct iv_cand
*cand
)
6442 tree comp
, *var_p
, op
, bound
;
6443 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6444 enum tree_code compare
;
6445 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6451 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6452 tree var_type
= TREE_TYPE (var
);
6455 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6457 fprintf (dump_file
, "Replacing exit test: ");
6458 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6461 bound
= unshare_expr (fold_convert (var_type
, bound
));
6462 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6464 gsi_insert_seq_on_edge_immediate (
6465 loop_preheader_edge (data
->current_loop
),
6468 gimple_cond_set_lhs (use
->stmt
, var
);
6469 gimple_cond_set_code (use
->stmt
, compare
);
6470 gimple_cond_set_rhs (use
->stmt
, op
);
6474 /* The induction variable elimination failed; just express the original
6476 comp
= get_computation (data
->current_loop
, use
, cand
);
6477 gcc_assert (comp
!= NULL_TREE
);
6479 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6482 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6483 true, GSI_SAME_STMT
);
6486 /* Rewrites USE using candidate CAND. */
6489 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6493 case USE_NONLINEAR_EXPR
:
6494 rewrite_use_nonlinear_expr (data
, use
, cand
);
6498 rewrite_use_address (data
, use
, cand
);
6502 rewrite_use_compare (data
, use
, cand
);
6509 update_stmt (use
->stmt
);
6512 /* Rewrite the uses using the selected induction variables. */
6515 rewrite_uses (struct ivopts_data
*data
)
6518 struct iv_cand
*cand
;
6521 for (i
= 0; i
< n_iv_uses (data
); i
++)
6523 use
= iv_use (data
, i
);
6524 cand
= use
->selected
;
6527 rewrite_use (data
, use
, cand
);
6531 /* Removes the ivs that are not used after rewriting. */
6534 remove_unused_ivs (struct ivopts_data
*data
)
6538 bitmap toremove
= BITMAP_ALLOC (NULL
);
6540 /* Figure out an order in which to release SSA DEFs so that we don't
6541 release something that we'd have to propagate into a debug stmt
6543 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6545 struct version_info
*info
;
6547 info
= ver_info (data
, j
);
6549 && !integer_zerop (info
->iv
->step
)
6551 && !info
->iv
->have_use_for
6552 && !info
->preserve_biv
)
6554 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6556 tree def
= info
->iv
->ssa_name
;
6558 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6560 imm_use_iterator imm_iter
;
6561 use_operand_p use_p
;
6565 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6567 if (!gimple_debug_bind_p (stmt
))
6570 /* We just want to determine whether to do nothing
6571 (count == 0), to substitute the computed
6572 expression into a single use of the SSA DEF by
6573 itself (count == 1), or to use a debug temp
6574 because the SSA DEF is used multiple times or as
6575 part of a larger expression (count > 1). */
6577 if (gimple_debug_bind_get_value (stmt
) != def
)
6581 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6587 struct iv_use dummy_use
;
6588 struct iv_cand
*best_cand
= NULL
, *cand
;
6589 unsigned i
, best_pref
= 0, cand_pref
;
6591 memset (&dummy_use
, 0, sizeof (dummy_use
));
6592 dummy_use
.iv
= info
->iv
;
6593 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6595 cand
= iv_use (data
, i
)->selected
;
6596 if (cand
== best_cand
)
6598 cand_pref
= operand_equal_p (cand
->iv
->step
,
6602 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6603 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6606 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6608 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6611 best_pref
= cand_pref
;
6618 tree comp
= get_computation_at (data
->current_loop
,
6619 &dummy_use
, best_cand
,
6620 SSA_NAME_DEF_STMT (def
));
6626 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6627 DECL_ARTIFICIAL (vexpr
) = 1;
6628 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6629 if (SSA_NAME_VAR (def
))
6630 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6632 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6633 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6634 gimple_stmt_iterator gsi
;
6636 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6637 gsi
= gsi_after_labels (gimple_bb
6638 (SSA_NAME_DEF_STMT (def
)));
6640 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6642 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6646 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6648 if (!gimple_debug_bind_p (stmt
))
6651 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6652 SET_USE (use_p
, comp
);
6660 release_defs_bitset (toremove
);
6662 BITMAP_FREE (toremove
);
6665 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6666 for pointer_map_traverse. */
6669 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6670 void *data ATTRIBUTE_UNUSED
)
6672 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6678 /* Frees data allocated by the optimization of a single loop. */
6681 free_loop_data (struct ivopts_data
*data
)
6689 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6690 pointer_map_destroy (data
->niters
);
6691 data
->niters
= NULL
;
6694 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6696 struct version_info
*info
;
6698 info
= ver_info (data
, i
);
6701 info
->has_nonlin_use
= false;
6702 info
->preserve_biv
= false;
6705 bitmap_clear (data
->relevant
);
6706 bitmap_clear (data
->important_candidates
);
6708 for (i
= 0; i
< n_iv_uses (data
); i
++)
6710 struct iv_use
*use
= iv_use (data
, i
);
6713 BITMAP_FREE (use
->related_cands
);
6714 for (j
= 0; j
< use
->n_map_members
; j
++)
6715 if (use
->cost_map
[j
].depends_on
)
6716 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6717 free (use
->cost_map
);
6720 data
->iv_uses
.truncate (0);
6722 for (i
= 0; i
< n_iv_cands (data
); i
++)
6724 struct iv_cand
*cand
= iv_cand (data
, i
);
6727 if (cand
->depends_on
)
6728 BITMAP_FREE (cand
->depends_on
);
6731 data
->iv_candidates
.truncate (0);
6733 if (data
->version_info_size
< num_ssa_names
)
6735 data
->version_info_size
= 2 * num_ssa_names
;
6736 free (data
->version_info
);
6737 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6740 data
->max_inv_id
= 0;
6742 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6743 SET_DECL_RTL (obj
, NULL_RTX
);
6745 decl_rtl_to_reset
.truncate (0);
6747 data
->inv_expr_tab
.empty ();
6748 data
->inv_expr_id
= 0;
6751 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6755 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6757 free_loop_data (data
);
6758 free (data
->version_info
);
6759 BITMAP_FREE (data
->relevant
);
6760 BITMAP_FREE (data
->important_candidates
);
6762 decl_rtl_to_reset
.release ();
6763 data
->iv_uses
.release ();
6764 data
->iv_candidates
.release ();
6765 data
->inv_expr_tab
.dispose ();
6768 /* Returns true if the loop body BODY includes any function calls. */
6771 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6773 gimple_stmt_iterator gsi
;
6776 for (i
= 0; i
< num_nodes
; i
++)
6777 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6779 gimple stmt
= gsi_stmt (gsi
);
6780 if (is_gimple_call (stmt
)
6781 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6787 /* Optimizes the LOOP. Returns true if anything changed. */
6790 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6792 bool changed
= false;
6793 struct iv_ca
*iv_ca
;
6794 edge exit
= single_dom_exit (loop
);
6797 gcc_assert (!data
->niters
);
6798 data
->current_loop
= loop
;
6799 data
->speed
= optimize_loop_for_speed_p (loop
);
6801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6803 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6807 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6808 exit
->src
->index
, exit
->dest
->index
);
6809 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6810 fprintf (dump_file
, "\n");
6813 fprintf (dump_file
, "\n");
6816 body
= get_loop_body (loop
);
6817 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6818 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6821 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6823 /* For each ssa name determines whether it behaves as an induction variable
6825 if (!find_induction_variables (data
))
6828 /* Finds interesting uses (item 1). */
6829 find_interesting_uses (data
);
6830 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6833 /* Finds candidates for the induction variables (item 2). */
6834 find_iv_candidates (data
);
6836 /* Calculates the costs (item 3, part 1). */
6837 determine_iv_costs (data
);
6838 determine_use_iv_costs (data
);
6839 determine_set_costs (data
);
6841 /* Find the optimal set of induction variables (item 3, part 2). */
6842 iv_ca
= find_optimal_iv_set (data
);
6847 /* Create the new induction variables (item 4, part 1). */
6848 create_new_ivs (data
, iv_ca
);
6849 iv_ca_free (&iv_ca
);
6851 /* Rewrite the uses (item 4, part 2). */
6852 rewrite_uses (data
);
6854 /* Remove the ivs that are unused after rewriting. */
6855 remove_unused_ivs (data
);
6857 /* We have changed the structure of induction variables; it might happen
6858 that definitions in the scev database refer to some of them that were
6863 free_loop_data (data
);
6868 /* Main entry point. Optimizes induction variables in loops. */
6871 tree_ssa_iv_optimize (void)
6874 struct ivopts_data data
;
6877 tree_ssa_iv_optimize_init (&data
);
6879 /* Optimize the loops starting with the innermost ones. */
6880 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6883 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6885 tree_ssa_iv_optimize_loop (&data
, loop
);
6888 tree_ssa_iv_optimize_finalize (&data
);