1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
73 #include "fold-const.h"
74 #include "stor-layout.h"
76 #include "gimple-pretty-print.h"
77 #include "internal-fn.h"
80 #include "gimple-iterator.h"
81 #include "gimplify-me.h"
84 #include "tree-ssa-loop-ivopts.h"
85 #include "tree-ssa-loop-manip.h"
86 #include "tree-ssa-loop-niter.h"
87 #include "tree-ssa-loop.h"
89 #include "insn-config.h"
101 #include "tree-pass.h"
102 #include "tree-chrec.h"
103 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "tree-ssa-address.h"
111 #include "builtins.h"
112 #include "tree-vectorizer.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop
*loop
)
131 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
133 return AVG_LOOP_NITER (loop
);
138 /* Representation of the induction variable. */
141 tree base
; /* Initial value of the iv. */
142 tree base_object
; /* A memory object to that the induction variable points. */
143 tree step
; /* Step of the iv (constant only). */
144 tree ssa_name
; /* The ssa name with the value. */
145 unsigned use_id
; /* The identifier in the use if it is the case. */
146 bool biv_p
; /* Is it a biv? */
147 bool have_use_for
; /* Do we already have a use for it? */
148 bool no_overflow
; /* True if the iv doesn't overflow. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
154 tree name
; /* The ssa name. */
155 struct iv
*iv
; /* Induction variable description. */
156 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv
; /* For the original biv, whether to preserve it. */
159 unsigned inv_id
; /* Id of an invariant. */
165 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
166 USE_ADDRESS
, /* Use in an address. */
167 USE_COMPARE
/* Use is a compare. */
170 /* Cost of a computation. */
173 int cost
; /* The runtime cost. */
174 unsigned complexity
; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
180 static const comp_cost no_cost
= {0, 0};
181 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
183 /* The candidate - cost pair. */
186 struct iv_cand
*cand
; /* The candidate. */
187 comp_cost cost
; /* The cost. */
188 bitmap depends_on
; /* The list of invariants that have to be
190 tree value
; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp
; /* For iv elimination, the comparison. */
194 int inv_expr_id
; /* Loop invariant expression id. */
200 unsigned id
; /* The id of the use. */
201 unsigned sub_id
; /* The id of the sub use. */
202 enum use_type type
; /* Type of the use. */
203 struct iv
*iv
; /* The induction variable it is based on. */
204 gimple stmt
; /* Statement in that it occurs. */
205 tree
*op_p
; /* The place where it occurs. */
206 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
209 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
210 struct cost_pair
*cost_map
;
211 /* The costs wrto the iv candidates. */
213 struct iv_cand
*selected
;
214 /* The selected candidate. */
216 struct iv_use
*next
; /* The next sub use. */
217 tree addr_base
; /* Base address with const offset stripped. */
218 unsigned HOST_WIDE_INT addr_offset
;
219 /* Const offset stripped from base address. */
222 /* The position where the iv is computed. */
225 IP_NORMAL
, /* At the end, just before the exit condition. */
226 IP_END
, /* At the end of the latch block. */
227 IP_BEFORE_USE
, /* Immediately before a specific use. */
228 IP_AFTER_USE
, /* Immediately after a specific use. */
229 IP_ORIGINAL
/* The original biv. */
232 /* The induction variable candidate. */
235 unsigned id
; /* The number of the candidate. */
236 bool important
; /* Whether this is an "important" candidate, i.e. such
237 that it should be considered by all uses. */
238 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
239 gimple incremented_at
;/* For original biv, the statement where it is
241 tree var_before
; /* The variable used for it before increment. */
242 tree var_after
; /* The variable used for it after increment. */
243 struct iv
*iv
; /* The value of the candidate. NULL for
244 "pseudocandidate" used to indicate the possibility
245 to replace the final value of an iv by direct
246 computation of the value. */
247 unsigned cost
; /* Cost of the candidate. */
248 unsigned cost_step
; /* Cost of the candidate's increment operation. */
249 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
250 where it is incremented. */
251 bitmap depends_on
; /* The list of invariants that are used in step of the
255 /* Loop invariant expression hashtable entry. */
256 struct iv_inv_expr_ent
263 /* The data used by the induction variable optimizations. */
265 typedef struct iv_use
*iv_use_p
;
267 typedef struct iv_cand
*iv_cand_p
;
269 /* Hashtable helpers. */
271 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
273 static inline hashval_t
hash (const iv_inv_expr_ent
*);
274 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
277 /* Hash function for loop invariant expressions. */
280 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
285 /* Hash table equality function for expressions. */
288 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
289 const iv_inv_expr_ent
*expr2
)
291 return expr1
->hash
== expr2
->hash
292 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
297 /* The currently optimized loop. */
298 struct loop
*current_loop
;
299 source_location loop_loc
;
301 /* Numbers of iterations for all exits of the current loop. */
302 hash_map
<edge
, tree_niter_desc
*> *niters
;
304 /* Number of registers used in it. */
307 /* The size of version_info array allocated. */
308 unsigned version_info_size
;
310 /* The array of information for the ssa names. */
311 struct version_info
*version_info
;
313 /* The hashtable of loop invariant expressions created
315 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
317 /* Loop invariant expression id. */
320 /* The bitmap of indices in version_info whose value was changed. */
323 /* The uses of induction variables. */
324 vec
<iv_use_p
> iv_uses
;
326 /* The candidates. */
327 vec
<iv_cand_p
> iv_candidates
;
329 /* A bitmap of important candidates. */
330 bitmap important_candidates
;
332 /* Cache used by tree_to_aff_combination_expand. */
333 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
335 /* The maximum invariant id. */
338 /* Obstack for iv structure. */
339 struct obstack iv_obstack
;
341 /* Whether to consider just related and important candidates when replacing a
343 bool consider_all_candidates
;
345 /* Are we optimizing for speed? */
348 /* Whether the loop body includes any function calls. */
349 bool body_includes_call
;
351 /* Whether the loop body can only be exited via single exit. */
352 bool loop_single_exit_p
;
355 /* An assignment of iv candidates to uses. */
359 /* The number of uses covered by the assignment. */
362 /* Number of uses that cannot be expressed by the candidates in the set. */
365 /* Candidate assigned to a use, together with the related costs. */
366 struct cost_pair
**cand_for_use
;
368 /* Number of times each candidate is used. */
369 unsigned *n_cand_uses
;
371 /* The candidates used. */
374 /* The number of candidates in the set. */
377 /* Total number of registers needed. */
380 /* Total cost of expressing uses. */
381 comp_cost cand_use_cost
;
383 /* Total cost of candidates. */
386 /* Number of times each invariant is used. */
387 unsigned *n_invariant_uses
;
389 /* The array holding the number of uses of each loop
390 invariant expressions created by ivopt. */
391 unsigned *used_inv_expr
;
393 /* The number of created loop invariants. */
394 unsigned num_used_inv_expr
;
396 /* Total cost of the assignment. */
400 /* Difference of two iv candidate assignments. */
407 /* An old assignment (for rollback purposes). */
408 struct cost_pair
*old_cp
;
410 /* A new assignment. */
411 struct cost_pair
*new_cp
;
413 /* Next change in the list. */
414 struct iv_ca_delta
*next_change
;
417 /* Bound on number of candidates below that all candidates are considered. */
419 #define CONSIDER_ALL_CANDIDATES_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
422 /* If there are more iv occurrences, we just give up (it is quite unlikely that
423 optimizing such a loop would help, and it would take ages). */
425 #define MAX_CONSIDERED_USES \
426 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
428 /* If there are at most this number of ivs in the set, try removing unnecessary
429 ivs from the set always. */
431 #define ALWAYS_PRUNE_CAND_SET_BOUND \
432 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
434 /* The list of trees for that the decl_rtl field must be reset is stored
437 static vec
<tree
> decl_rtl_to_reset
;
439 static comp_cost
force_expr_to_var_cost (tree
, bool);
441 /* Number of uses recorded in DATA. */
443 static inline unsigned
444 n_iv_uses (struct ivopts_data
*data
)
446 return data
->iv_uses
.length ();
449 /* Ith use recorded in DATA. */
451 static inline struct iv_use
*
452 iv_use (struct ivopts_data
*data
, unsigned i
)
454 return data
->iv_uses
[i
];
457 /* Number of candidates recorded in DATA. */
459 static inline unsigned
460 n_iv_cands (struct ivopts_data
*data
)
462 return data
->iv_candidates
.length ();
465 /* Ith candidate recorded in DATA. */
467 static inline struct iv_cand
*
468 iv_cand (struct ivopts_data
*data
, unsigned i
)
470 return data
->iv_candidates
[i
];
473 /* The single loop exit if it dominates the latch, NULL otherwise. */
476 single_dom_exit (struct loop
*loop
)
478 edge exit
= single_exit (loop
);
483 if (!just_once_each_iteration_p (loop
, exit
->src
))
489 /* Dumps information about the induction variable IV to FILE. */
492 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
494 if (iv
->ssa_name
&& dump_name
)
496 fprintf (file
, "ssa name ");
497 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
498 fprintf (file
, "\n");
501 fprintf (file
, " type ");
502 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
503 fprintf (file
, "\n");
507 fprintf (file
, " base ");
508 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
509 fprintf (file
, "\n");
511 fprintf (file
, " step ");
512 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
513 fprintf (file
, "\n");
517 fprintf (file
, " invariant ");
518 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
519 fprintf (file
, "\n");
524 fprintf (file
, " base object ");
525 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
526 fprintf (file
, "\n");
530 fprintf (file
, " is a biv\n");
533 /* Dumps information about the USE to FILE. */
536 dump_use (FILE *file
, struct iv_use
*use
)
538 fprintf (file
, "use %d", use
->id
);
540 fprintf (file
, ".%d", use
->sub_id
);
542 fprintf (file
, "\n");
546 case USE_NONLINEAR_EXPR
:
547 fprintf (file
, " generic\n");
551 fprintf (file
, " address\n");
555 fprintf (file
, " compare\n");
562 fprintf (file
, " in statement ");
563 print_gimple_stmt (file
, use
->stmt
, 0, 0);
564 fprintf (file
, "\n");
566 fprintf (file
, " at position ");
568 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
569 fprintf (file
, "\n");
571 dump_iv (file
, use
->iv
, false);
573 if (use
->related_cands
)
575 fprintf (file
, " related candidates ");
576 dump_bitmap (file
, use
->related_cands
);
580 /* Dumps information about the uses to FILE. */
583 dump_uses (FILE *file
, struct ivopts_data
*data
)
588 for (i
= 0; i
< n_iv_uses (data
); i
++)
590 use
= iv_use (data
, i
);
593 dump_use (file
, use
);
597 fprintf (file
, "\n");
601 /* Dumps information about induction variable candidate CAND to FILE. */
604 dump_cand (FILE *file
, struct iv_cand
*cand
)
606 struct iv
*iv
= cand
->iv
;
608 fprintf (file
, "candidate %d%s\n",
609 cand
->id
, cand
->important
? " (important)" : "");
611 if (cand
->depends_on
)
613 fprintf (file
, " depends on ");
614 dump_bitmap (file
, cand
->depends_on
);
619 fprintf (file
, " final value replacement\n");
623 if (cand
->var_before
)
625 fprintf (file
, " var_before ");
626 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
627 fprintf (file
, "\n");
631 fprintf (file
, " var_after ");
632 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
633 fprintf (file
, "\n");
639 fprintf (file
, " incremented before exit test\n");
643 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
647 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
651 fprintf (file
, " incremented at end\n");
655 fprintf (file
, " original biv\n");
659 dump_iv (file
, iv
, false);
662 /* Returns the info for ssa version VER. */
664 static inline struct version_info
*
665 ver_info (struct ivopts_data
*data
, unsigned ver
)
667 return data
->version_info
+ ver
;
670 /* Returns the info for ssa name NAME. */
672 static inline struct version_info
*
673 name_info (struct ivopts_data
*data
, tree name
)
675 return ver_info (data
, SSA_NAME_VERSION (name
));
678 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
682 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
684 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
688 if (sbb
== loop
->latch
)
694 return stmt
== last_stmt (bb
);
697 /* Returns true if STMT if after the place where the original induction
698 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
699 if the positions are identical. */
702 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
704 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
705 basic_block stmt_bb
= gimple_bb (stmt
);
707 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
710 if (stmt_bb
!= cand_bb
)
714 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
716 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
719 /* Returns true if STMT if after the place where the induction variable
720 CAND is incremented in LOOP. */
723 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
731 return stmt_after_ip_normal_pos (loop
, stmt
);
735 return stmt_after_inc_pos (cand
, stmt
, false);
738 return stmt_after_inc_pos (cand
, stmt
, true);
745 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
748 abnormal_ssa_name_p (tree exp
)
753 if (TREE_CODE (exp
) != SSA_NAME
)
756 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
759 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
760 abnormal phi node. Callback for for_each_index. */
763 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
764 void *data ATTRIBUTE_UNUSED
)
766 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
768 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
770 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
774 return !abnormal_ssa_name_p (*index
);
777 /* Returns true if EXPR contains a ssa name that occurs in an
778 abnormal phi node. */
781 contains_abnormal_ssa_name_p (tree expr
)
784 enum tree_code_class codeclass
;
789 code
= TREE_CODE (expr
);
790 codeclass
= TREE_CODE_CLASS (code
);
792 if (code
== SSA_NAME
)
793 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
795 if (code
== INTEGER_CST
796 || is_gimple_min_invariant (expr
))
799 if (code
== ADDR_EXPR
)
800 return !for_each_index (&TREE_OPERAND (expr
, 0),
801 idx_contains_abnormal_ssa_name_p
,
804 if (code
== COND_EXPR
)
805 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
806 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
818 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
830 /* Returns the structure describing number of iterations determined from
831 EXIT of DATA->current_loop, or NULL if something goes wrong. */
833 static struct tree_niter_desc
*
834 niter_for_exit (struct ivopts_data
*data
, edge exit
)
836 struct tree_niter_desc
*desc
;
837 tree_niter_desc
**slot
;
841 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
845 slot
= data
->niters
->get (exit
);
849 /* Try to determine number of iterations. We cannot safely work with ssa
850 names that appear in phi nodes on abnormal edges, so that we do not
851 create overlapping life ranges for them (PR 27283). */
852 desc
= XNEW (struct tree_niter_desc
);
853 if (!number_of_iterations_exit (data
->current_loop
,
855 || contains_abnormal_ssa_name_p (desc
->niter
))
860 data
->niters
->put (exit
, desc
);
868 /* Returns the structure describing number of iterations determined from
869 single dominating exit of DATA->current_loop, or NULL if something
872 static struct tree_niter_desc
*
873 niter_for_single_dom_exit (struct ivopts_data
*data
)
875 edge exit
= single_dom_exit (data
->current_loop
);
880 return niter_for_exit (data
, exit
);
883 /* Initializes data structures used by the iv optimization pass, stored
887 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
889 data
->version_info_size
= 2 * num_ssa_names
;
890 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
891 data
->relevant
= BITMAP_ALLOC (NULL
);
892 data
->important_candidates
= BITMAP_ALLOC (NULL
);
893 data
->max_inv_id
= 0;
895 data
->iv_uses
.create (20);
896 data
->iv_candidates
.create (20);
897 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
898 data
->inv_expr_id
= 0;
899 data
->name_expansion_cache
= NULL
;
900 decl_rtl_to_reset
.create (20);
901 gcc_obstack_init (&data
->iv_obstack
);
904 /* Returns a memory object to that EXPR points. In case we are able to
905 determine that it does not point to any such object, NULL is returned. */
908 determine_base_object (tree expr
)
910 enum tree_code code
= TREE_CODE (expr
);
913 /* If this is a pointer casted to any type, we need to determine
914 the base object for the pointer; so handle conversions before
915 throwing away non-pointer expressions. */
916 if (CONVERT_EXPR_P (expr
))
917 return determine_base_object (TREE_OPERAND (expr
, 0));
919 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
928 obj
= TREE_OPERAND (expr
, 0);
929 base
= get_base_address (obj
);
934 if (TREE_CODE (base
) == MEM_REF
)
935 return determine_base_object (TREE_OPERAND (base
, 0));
937 return fold_convert (ptr_type_node
,
938 build_fold_addr_expr (base
));
940 case POINTER_PLUS_EXPR
:
941 return determine_base_object (TREE_OPERAND (expr
, 0));
945 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
949 return fold_convert (ptr_type_node
, expr
);
953 /* Return true if address expression with non-DECL_P operand appears
957 contain_complex_addr_expr (tree expr
)
962 switch (TREE_CODE (expr
))
964 case POINTER_PLUS_EXPR
:
967 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
968 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
972 return (!DECL_P (TREE_OPERAND (expr
, 0)));
981 /* Allocates an induction variable with given initial value BASE and step STEP
982 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
985 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
986 bool no_overflow
= false)
989 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
991 gcc_assert (step
!= NULL_TREE
);
993 /* Lower address expression in base except ones with DECL_P as operand.
995 1) More accurate cost can be computed for address expressions;
996 2) Duplicate candidates won't be created for bases in different
997 forms, like &a[0] and &a. */
999 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1000 || contain_complex_addr_expr (expr
))
1003 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1004 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1008 iv
->base_object
= determine_base_object (base
);
1011 iv
->have_use_for
= false;
1013 iv
->ssa_name
= NULL_TREE
;
1014 iv
->no_overflow
= no_overflow
;
1019 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1020 doesn't overflow. */
1023 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1026 struct version_info
*info
= name_info (data
, iv
);
1028 gcc_assert (!info
->iv
);
1030 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1031 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1032 info
->iv
->ssa_name
= iv
;
1035 /* Finds induction variable declaration for VAR. */
1038 get_iv (struct ivopts_data
*data
, tree var
)
1041 tree type
= TREE_TYPE (var
);
1043 if (!POINTER_TYPE_P (type
)
1044 && !INTEGRAL_TYPE_P (type
))
1047 if (!name_info (data
, var
)->iv
)
1049 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1052 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1053 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1056 return name_info (data
, var
)->iv
;
1059 /* Return the first non-invariant ssa var found in EXPR. */
1062 extract_single_var_from_expr (tree expr
)
1066 enum tree_code code
;
1068 if (!expr
|| is_gimple_min_invariant (expr
))
1071 code
= TREE_CODE (expr
);
1072 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1074 n
= TREE_OPERAND_LENGTH (expr
);
1075 for (i
= 0; i
< n
; i
++)
1077 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1083 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1086 /* Finds basic ivs. */
1089 find_bivs (struct ivopts_data
*data
)
1093 tree step
, type
, base
, stop
;
1095 struct loop
*loop
= data
->current_loop
;
1098 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1102 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1105 if (virtual_operand_p (PHI_RESULT (phi
)))
1108 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1111 if (integer_zerop (iv
.step
))
1115 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1116 /* Stop expanding iv base at the first ssa var referred by iv step.
1117 Ideally we should stop at any ssa var, because that's expensive
1118 and unusual to happen, we just do it on the first one.
1120 See PR64705 for the rationale. */
1121 stop
= extract_single_var_from_expr (step
);
1122 base
= expand_simple_operations (base
, stop
);
1123 if (contains_abnormal_ssa_name_p (base
)
1124 || contains_abnormal_ssa_name_p (step
))
1127 type
= TREE_TYPE (PHI_RESULT (phi
));
1128 base
= fold_convert (type
, base
);
1131 if (POINTER_TYPE_P (type
))
1132 step
= convert_to_ptrofftype (step
);
1134 step
= fold_convert (type
, step
);
1137 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1144 /* Marks basic ivs. */
1147 mark_bivs (struct ivopts_data
*data
)
1152 struct iv
*iv
, *incr_iv
;
1153 struct loop
*loop
= data
->current_loop
;
1154 basic_block incr_bb
;
1157 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1161 iv
= get_iv (data
, PHI_RESULT (phi
));
1165 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1166 def
= SSA_NAME_DEF_STMT (var
);
1167 /* Don't mark iv peeled from other one as biv. */
1169 && gimple_code (def
) == GIMPLE_PHI
1170 && gimple_bb (def
) == loop
->header
)
1173 incr_iv
= get_iv (data
, var
);
1177 /* If the increment is in the subloop, ignore it. */
1178 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1179 if (incr_bb
->loop_father
!= data
->current_loop
1180 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1184 incr_iv
->biv_p
= true;
1188 /* Checks whether STMT defines a linear induction variable and stores its
1189 parameters to IV. */
1192 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1195 struct loop
*loop
= data
->current_loop
;
1197 iv
->base
= NULL_TREE
;
1198 iv
->step
= NULL_TREE
;
1200 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1203 lhs
= gimple_assign_lhs (stmt
);
1204 if (TREE_CODE (lhs
) != SSA_NAME
)
1207 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1210 /* Stop expanding iv base at the first ssa var referred by iv step.
1211 Ideally we should stop at any ssa var, because that's expensive
1212 and unusual to happen, we just do it on the first one.
1214 See PR64705 for the rationale. */
1215 stop
= extract_single_var_from_expr (iv
->step
);
1216 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1217 if (contains_abnormal_ssa_name_p (iv
->base
)
1218 || contains_abnormal_ssa_name_p (iv
->step
))
1221 /* If STMT could throw, then do not consider STMT as defining a GIV.
1222 While this will suppress optimizations, we can not safely delete this
1223 GIV and associated statements, even if it appears it is not used. */
1224 if (stmt_could_throw_p (stmt
))
1230 /* Finds general ivs in statement STMT. */
1233 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1237 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1240 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1243 /* Finds general ivs in basic block BB. */
1246 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1248 gimple_stmt_iterator bsi
;
1250 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1251 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1254 /* Finds general ivs. */
1257 find_givs (struct ivopts_data
*data
)
1259 struct loop
*loop
= data
->current_loop
;
1260 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1263 for (i
= 0; i
< loop
->num_nodes
; i
++)
1264 find_givs_in_bb (data
, body
[i
]);
1268 /* For each ssa name defined in LOOP determines whether it is an induction
1269 variable and if so, its initial value and step. */
1272 find_induction_variables (struct ivopts_data
*data
)
1277 if (!find_bivs (data
))
1283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1285 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1289 fprintf (dump_file
, " number of iterations ");
1290 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1291 if (!integer_zerop (niter
->may_be_zero
))
1293 fprintf (dump_file
, "; zero if ");
1294 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1296 fprintf (dump_file
, "\n\n");
1299 fprintf (dump_file
, "Induction variables:\n\n");
1301 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1303 if (ver_info (data
, i
)->iv
)
1304 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1311 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1312 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1313 is the const offset stripped from IV base. For uses of other types,
1314 ADDR_BASE and ADDR_OFFSET are zero by default. */
1316 static struct iv_use
*
1317 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1318 gimple stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1319 unsigned HOST_WIDE_INT addr_offset
= 0)
1321 struct iv_use
*use
= XCNEW (struct iv_use
);
1323 use
->id
= n_iv_uses (data
);
1325 use
->type
= use_type
;
1329 use
->related_cands
= BITMAP_ALLOC (NULL
);
1331 use
->addr_base
= addr_base
;
1332 use
->addr_offset
= addr_offset
;
1334 data
->iv_uses
.safe_push (use
);
1339 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1340 The sub use is recorded under the one whose use id is ID_GROUP. */
1342 static struct iv_use
*
1343 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1344 struct iv
*iv
, gimple stmt
, enum use_type use_type
,
1345 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1346 unsigned int id_group
)
1348 struct iv_use
*use
= XCNEW (struct iv_use
);
1349 struct iv_use
*group
= iv_use (data
, id_group
);
1351 use
->id
= group
->id
;
1353 use
->type
= use_type
;
1357 use
->related_cands
= NULL
;
1358 use
->addr_base
= addr_base
;
1359 use
->addr_offset
= addr_offset
;
1361 /* Sub use list is maintained in offset ascending order. */
1362 if (addr_offset
<= group
->addr_offset
)
1364 use
->related_cands
= group
->related_cands
;
1365 group
->related_cands
= NULL
;
1367 data
->iv_uses
[id_group
] = use
;
1375 group
= group
->next
;
1377 while (group
&& addr_offset
> group
->addr_offset
);
1378 use
->next
= pre
->next
;
1385 /* Checks whether OP is a loop-level invariant and if so, records it.
1386 NONLINEAR_USE is true if the invariant is used in a way we do not
1387 handle specially. */
1390 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1393 struct version_info
*info
;
1395 if (TREE_CODE (op
) != SSA_NAME
1396 || virtual_operand_p (op
))
1399 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1401 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1404 info
= name_info (data
, op
);
1406 info
->has_nonlin_use
|= nonlinear_use
;
1408 info
->inv_id
= ++data
->max_inv_id
;
1409 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1412 /* Checks whether the use OP is interesting and if so, records it. */
1414 static struct iv_use
*
1415 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1421 if (TREE_CODE (op
) != SSA_NAME
)
1424 iv
= get_iv (data
, op
);
1428 if (iv
->have_use_for
)
1430 use
= iv_use (data
, iv
->use_id
);
1432 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1436 if (integer_zerop (iv
->step
))
1438 record_invariant (data
, op
, true);
1441 iv
->have_use_for
= true;
1443 stmt
= SSA_NAME_DEF_STMT (op
);
1444 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1445 || is_gimple_assign (stmt
));
1447 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1448 iv
->use_id
= use
->id
;
1453 /* Given a condition in statement STMT, checks whether it is a compare
1454 of an induction variable and an invariant. If this is the case,
1455 CONTROL_VAR is set to location of the iv, BOUND to the location of
1456 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1457 induction variable descriptions, and true is returned. If this is not
1458 the case, CONTROL_VAR and BOUND are set to the arguments of the
1459 condition and false is returned. */
1462 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1463 tree
**control_var
, tree
**bound
,
1464 struct iv
**iv_var
, struct iv
**iv_bound
)
1466 /* The objects returned when COND has constant operands. */
1467 static struct iv const_iv
;
1469 tree
*op0
= &zero
, *op1
= &zero
;
1470 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1473 if (gimple_code (stmt
) == GIMPLE_COND
)
1475 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1476 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1477 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1481 op0
= gimple_assign_rhs1_ptr (stmt
);
1482 op1
= gimple_assign_rhs2_ptr (stmt
);
1485 zero
= integer_zero_node
;
1486 const_iv
.step
= integer_zero_node
;
1488 if (TREE_CODE (*op0
) == SSA_NAME
)
1489 iv0
= get_iv (data
, *op0
);
1490 if (TREE_CODE (*op1
) == SSA_NAME
)
1491 iv1
= get_iv (data
, *op1
);
1493 /* Exactly one of the compared values must be an iv, and the other one must
1498 if (integer_zerop (iv0
->step
))
1500 /* Control variable may be on the other side. */
1501 std::swap (op0
, op1
);
1502 std::swap (iv0
, iv1
);
1504 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1519 /* Checks whether the condition in STMT is interesting and if so,
1523 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1525 tree
*var_p
, *bound_p
;
1528 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1530 find_interesting_uses_op (data
, *var_p
);
1531 find_interesting_uses_op (data
, *bound_p
);
1535 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1538 /* Returns the outermost loop EXPR is obviously invariant in
1539 relative to the loop LOOP, i.e. if all its operands are defined
1540 outside of the returned loop. Returns NULL if EXPR is not
1541 even obviously invariant in LOOP. */
1544 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1549 if (is_gimple_min_invariant (expr
))
1550 return current_loops
->tree_root
;
1552 if (TREE_CODE (expr
) == SSA_NAME
)
1554 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1557 if (flow_bb_inside_loop_p (loop
, def_bb
))
1559 return superloop_at_depth (loop
,
1560 loop_depth (def_bb
->loop_father
) + 1);
1563 return current_loops
->tree_root
;
1569 unsigned maxdepth
= 0;
1570 len
= TREE_OPERAND_LENGTH (expr
);
1571 for (i
= 0; i
< len
; i
++)
1573 struct loop
*ivloop
;
1574 if (!TREE_OPERAND (expr
, i
))
1577 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1580 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1583 return superloop_at_depth (loop
, maxdepth
);
1586 /* Returns true if expression EXPR is obviously invariant in LOOP,
1587 i.e. if all its operands are defined outside of the LOOP. LOOP
1588 should not be the function body. */
1591 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1596 gcc_assert (loop_depth (loop
) > 0);
1598 if (is_gimple_min_invariant (expr
))
1601 if (TREE_CODE (expr
) == SSA_NAME
)
1603 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1605 && flow_bb_inside_loop_p (loop
, def_bb
))
1614 len
= TREE_OPERAND_LENGTH (expr
);
1615 for (i
= 0; i
< len
; i
++)
1616 if (TREE_OPERAND (expr
, i
)
1617 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1623 /* Cumulates the steps of indices into DATA and replaces their values with the
1624 initial ones. Returns false when the value of the index cannot be determined.
1625 Callback for for_each_index. */
1627 struct ifs_ivopts_data
1629 struct ivopts_data
*ivopts_data
;
1635 idx_find_step (tree base
, tree
*idx
, void *data
)
1637 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1639 bool use_overflow_semantics
= false;
1640 tree step
, iv_base
, iv_step
, lbound
, off
;
1641 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1643 /* If base is a component ref, require that the offset of the reference
1645 if (TREE_CODE (base
) == COMPONENT_REF
)
1647 off
= component_ref_field_offset (base
);
1648 return expr_invariant_in_loop_p (loop
, off
);
1651 /* If base is array, first check whether we will be able to move the
1652 reference out of the loop (in order to take its address in strength
1653 reduction). In order for this to work we need both lower bound
1654 and step to be loop invariants. */
1655 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1657 /* Moreover, for a range, the size needs to be invariant as well. */
1658 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1659 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1662 step
= array_ref_element_size (base
);
1663 lbound
= array_ref_low_bound (base
);
1665 if (!expr_invariant_in_loop_p (loop
, step
)
1666 || !expr_invariant_in_loop_p (loop
, lbound
))
1670 if (TREE_CODE (*idx
) != SSA_NAME
)
1673 iv
= get_iv (dta
->ivopts_data
, *idx
);
1677 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1678 *&x[0], which is not folded and does not trigger the
1679 ARRAY_REF path below. */
1682 if (integer_zerop (iv
->step
))
1685 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1687 step
= array_ref_element_size (base
);
1689 /* We only handle addresses whose step is an integer constant. */
1690 if (TREE_CODE (step
) != INTEGER_CST
)
1694 /* The step for pointer arithmetics already is 1 byte. */
1695 step
= size_one_node
;
1699 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1700 use_overflow_semantics
= true;
1702 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1703 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1704 use_overflow_semantics
))
1706 /* The index might wrap. */
1710 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1711 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1716 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1717 object is passed to it in DATA. */
1720 idx_record_use (tree base
, tree
*idx
,
1723 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1724 find_interesting_uses_op (data
, *idx
);
1725 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1727 find_interesting_uses_op (data
, array_ref_element_size (base
));
1728 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1733 /* If we can prove that TOP = cst * BOT for some constant cst,
1734 store cst to MUL and return true. Otherwise return false.
1735 The returned value is always sign-extended, regardless of the
1736 signedness of TOP and BOT. */
1739 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1742 enum tree_code code
;
1743 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1744 widest_int res
, p0
, p1
;
1749 if (operand_equal_p (top
, bot
, 0))
1755 code
= TREE_CODE (top
);
1759 mby
= TREE_OPERAND (top
, 1);
1760 if (TREE_CODE (mby
) != INTEGER_CST
)
1763 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1766 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1771 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1772 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1775 if (code
== MINUS_EXPR
)
1777 *mul
= wi::sext (p0
+ p1
, precision
);
1781 if (TREE_CODE (bot
) != INTEGER_CST
)
1784 p0
= widest_int::from (top
, SIGNED
);
1785 p1
= widest_int::from (bot
, SIGNED
);
1788 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1796 /* Return true if memory reference REF with step STEP may be unaligned. */
1799 may_be_unaligned_p (tree ref
, tree step
)
1801 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1802 thus they are not misaligned. */
1803 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1806 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1807 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1808 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1810 unsigned HOST_WIDE_INT bitpos
;
1811 unsigned int ref_align
;
1812 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1813 if (ref_align
< align
1814 || (bitpos
% align
) != 0
1815 || (bitpos
% BITS_PER_UNIT
) != 0)
1818 unsigned int trailing_zeros
= tree_ctz (step
);
1819 if (trailing_zeros
< HOST_BITS_PER_INT
1820 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1826 /* Return true if EXPR may be non-addressable. */
1829 may_be_nonaddressable_p (tree expr
)
1831 switch (TREE_CODE (expr
))
1833 case TARGET_MEM_REF
:
1834 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1835 target, thus they are always addressable. */
1839 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1840 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1842 case VIEW_CONVERT_EXPR
:
1843 /* This kind of view-conversions may wrap non-addressable objects
1844 and make them look addressable. After some processing the
1845 non-addressability may be uncovered again, causing ADDR_EXPRs
1846 of inappropriate objects to be built. */
1847 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1848 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1851 /* ... fall through ... */
1854 case ARRAY_RANGE_REF
:
1855 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1868 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1870 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1871 If there is an existing use which has same stripped iv base and step,
1872 this function records this one as a sub use to that; otherwise records
1873 it as a normal one. */
1875 static struct iv_use
*
1876 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1877 struct iv
*iv
, gimple stmt
, enum use_type use_type
)
1882 unsigned HOST_WIDE_INT addr_offset
;
1884 /* Only support sub use for address type uses, that is, with base
1886 if (!iv
->base_object
)
1887 return record_use (data
, use_p
, iv
, stmt
, use_type
);
1889 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1890 for (i
= 0; i
< n_iv_uses (data
); i
++)
1892 use
= iv_use (data
, i
);
1893 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1896 /* Check if it has the same stripped base and step. */
1897 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1898 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1899 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1903 if (i
== n_iv_uses (data
))
1904 return record_use (data
, use_p
, iv
, stmt
,
1905 use_type
, addr_base
, addr_offset
);
1907 return record_sub_use (data
, use_p
, iv
, stmt
,
1908 use_type
, addr_base
, addr_offset
, i
);
1911 /* Finds addresses in *OP_P inside STMT. */
1914 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1916 tree base
= *op_p
, step
= size_zero_node
;
1918 struct ifs_ivopts_data ifs_ivopts_data
;
1920 /* Do not play with volatile memory references. A bit too conservative,
1921 perhaps, but safe. */
1922 if (gimple_has_volatile_ops (stmt
))
1925 /* Ignore bitfields for now. Not really something terribly complicated
1927 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1930 base
= unshare_expr (base
);
1932 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1934 tree type
= build_pointer_type (TREE_TYPE (base
));
1938 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1940 civ
= get_iv (data
, TMR_BASE (base
));
1944 TMR_BASE (base
) = civ
->base
;
1947 if (TMR_INDEX2 (base
)
1948 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1950 civ
= get_iv (data
, TMR_INDEX2 (base
));
1954 TMR_INDEX2 (base
) = civ
->base
;
1957 if (TMR_INDEX (base
)
1958 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1960 civ
= get_iv (data
, TMR_INDEX (base
));
1964 TMR_INDEX (base
) = civ
->base
;
1969 if (TMR_STEP (base
))
1970 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1972 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1976 if (integer_zerop (step
))
1978 base
= tree_mem_ref_addr (type
, base
);
1982 ifs_ivopts_data
.ivopts_data
= data
;
1983 ifs_ivopts_data
.stmt
= stmt
;
1984 ifs_ivopts_data
.step
= size_zero_node
;
1985 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1986 || integer_zerop (ifs_ivopts_data
.step
))
1988 step
= ifs_ivopts_data
.step
;
1990 /* Check that the base expression is addressable. This needs
1991 to be done after substituting bases of IVs into it. */
1992 if (may_be_nonaddressable_p (base
))
1995 /* Moreover, on strict alignment platforms, check that it is
1996 sufficiently aligned. */
1997 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2000 base
= build_fold_addr_expr (base
);
2002 /* Substituting bases of IVs into the base expression might
2003 have caused folding opportunities. */
2004 if (TREE_CODE (base
) == ADDR_EXPR
)
2006 tree
*ref
= &TREE_OPERAND (base
, 0);
2007 while (handled_component_p (*ref
))
2008 ref
= &TREE_OPERAND (*ref
, 0);
2009 if (TREE_CODE (*ref
) == MEM_REF
)
2011 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2012 TREE_OPERAND (*ref
, 0),
2013 TREE_OPERAND (*ref
, 1));
2020 civ
= alloc_iv (data
, base
, step
);
2021 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2025 for_each_index (op_p
, idx_record_use
, data
);
2028 /* Finds and records invariants used in STMT. */
2031 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
2034 use_operand_p use_p
;
2037 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2039 op
= USE_FROM_PTR (use_p
);
2040 record_invariant (data
, op
, false);
2044 /* Finds interesting uses of induction variables in the statement STMT. */
2047 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
2050 tree op
, *lhs
, *rhs
;
2052 use_operand_p use_p
;
2053 enum tree_code code
;
2055 find_invariants_stmt (data
, stmt
);
2057 if (gimple_code (stmt
) == GIMPLE_COND
)
2059 find_interesting_uses_cond (data
, stmt
);
2063 if (is_gimple_assign (stmt
))
2065 lhs
= gimple_assign_lhs_ptr (stmt
);
2066 rhs
= gimple_assign_rhs1_ptr (stmt
);
2068 if (TREE_CODE (*lhs
) == SSA_NAME
)
2070 /* If the statement defines an induction variable, the uses are not
2071 interesting by themselves. */
2073 iv
= get_iv (data
, *lhs
);
2075 if (iv
&& !integer_zerop (iv
->step
))
2079 code
= gimple_assign_rhs_code (stmt
);
2080 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2081 && (REFERENCE_CLASS_P (*rhs
)
2082 || is_gimple_val (*rhs
)))
2084 if (REFERENCE_CLASS_P (*rhs
))
2085 find_interesting_uses_address (data
, stmt
, rhs
);
2087 find_interesting_uses_op (data
, *rhs
);
2089 if (REFERENCE_CLASS_P (*lhs
))
2090 find_interesting_uses_address (data
, stmt
, lhs
);
2093 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2095 find_interesting_uses_cond (data
, stmt
);
2099 /* TODO -- we should also handle address uses of type
2101 memory = call (whatever);
2108 if (gimple_code (stmt
) == GIMPLE_PHI
2109 && gimple_bb (stmt
) == data
->current_loop
->header
)
2111 iv
= get_iv (data
, PHI_RESULT (stmt
));
2113 if (iv
&& !integer_zerop (iv
->step
))
2117 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2119 op
= USE_FROM_PTR (use_p
);
2121 if (TREE_CODE (op
) != SSA_NAME
)
2124 iv
= get_iv (data
, op
);
2128 find_interesting_uses_op (data
, op
);
2132 /* Finds interesting uses of induction variables outside of loops
2133 on loop exit edge EXIT. */
2136 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2142 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2145 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2146 if (!virtual_operand_p (def
))
2147 find_interesting_uses_op (data
, def
);
2151 /* Finds uses of the induction variables that are interesting. */
2154 find_interesting_uses (struct ivopts_data
*data
)
2157 gimple_stmt_iterator bsi
;
2158 basic_block
*body
= get_loop_body (data
->current_loop
);
2160 struct version_info
*info
;
2163 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2164 fprintf (dump_file
, "Uses:\n\n");
2166 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2171 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2172 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2173 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2174 find_interesting_uses_outside (data
, e
);
2176 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2177 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2178 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2179 if (!is_gimple_debug (gsi_stmt (bsi
)))
2180 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2187 fprintf (dump_file
, "\n");
2189 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2191 info
= ver_info (data
, i
);
2194 fprintf (dump_file
, " ");
2195 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2196 fprintf (dump_file
, " is invariant (%d)%s\n",
2197 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2201 fprintf (dump_file
, "\n");
2207 /* Compute maximum offset of [base + offset] addressing mode
2208 for memory reference represented by USE. */
2210 static HOST_WIDE_INT
2211 compute_max_addr_offset (struct iv_use
*use
)
2215 HOST_WIDE_INT i
, off
;
2216 unsigned list_index
, num
;
2218 machine_mode mem_mode
, addr_mode
;
2219 static vec
<HOST_WIDE_INT
> max_offset_list
;
2221 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2222 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2224 num
= max_offset_list
.length ();
2225 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2226 if (list_index
>= num
)
2228 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2229 for (; num
< max_offset_list
.length (); num
++)
2230 max_offset_list
[num
] = -1;
2233 off
= max_offset_list
[list_index
];
2237 addr_mode
= targetm
.addr_space
.address_mode (as
);
2238 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2239 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2241 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2242 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2243 width
= HOST_BITS_PER_WIDE_INT
- 1;
2245 for (i
= width
; i
> 0; i
--)
2247 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2248 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2249 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2252 /* For some strict-alignment targets, the offset must be naturally
2253 aligned. Try an aligned offset if mem_mode is not QImode. */
2254 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2255 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2257 off
-= GET_MODE_SIZE (mem_mode
);
2258 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2259 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2266 max_offset_list
[list_index
] = off
;
2270 /* Check if all small groups should be split. Return true if and
2273 1) At least one groups contain two uses with different offsets.
2274 2) No group contains more than two uses with different offsets.
2276 Return false otherwise. We want to split such groups because:
2278 1) Small groups don't have much benefit and may interfer with
2279 general candidate selection.
2280 2) Size for problem with only small groups is usually small and
2281 general algorithm can handle it well.
2283 TODO -- Above claim may not hold when auto increment is supported. */
2286 split_all_small_groups (struct ivopts_data
*data
)
2288 bool split_p
= false;
2289 unsigned int i
, n
, distinct
;
2290 struct iv_use
*pre
, *use
;
2292 n
= n_iv_uses (data
);
2293 for (i
= 0; i
< n
; i
++)
2295 use
= iv_use (data
, i
);
2300 gcc_assert (use
->type
== USE_ADDRESS
);
2301 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2303 if (pre
->addr_offset
!= use
->addr_offset
)
2316 /* For each group of address type uses, this function further groups
2317 these uses according to the maximum offset supported by target's
2318 [base + offset] addressing mode. */
2321 group_address_uses (struct ivopts_data
*data
)
2323 HOST_WIDE_INT max_offset
= -1;
2324 unsigned int i
, n
, sub_id
;
2325 struct iv_use
*pre
, *use
;
2326 unsigned HOST_WIDE_INT addr_offset_first
;
2328 /* Reset max offset to split all small groups. */
2329 if (split_all_small_groups (data
))
2332 n
= n_iv_uses (data
);
2333 for (i
= 0; i
< n
; i
++)
2335 use
= iv_use (data
, i
);
2339 gcc_assert (use
->type
== USE_ADDRESS
);
2340 if (max_offset
!= 0)
2341 max_offset
= compute_max_addr_offset (use
);
2346 addr_offset_first
= use
->addr_offset
;
2347 /* Only uses with offset that can fit in offset part against
2348 the first use can be grouped together. */
2349 for (pre
= use
, use
= use
->next
;
2350 use
&& (use
->addr_offset
- addr_offset_first
2351 <= (unsigned HOST_WIDE_INT
) max_offset
);
2352 pre
= use
, use
= use
->next
)
2355 use
->sub_id
= ++sub_id
;
2358 /* Break the list and create new group. */
2362 use
->id
= n_iv_uses (data
);
2363 use
->related_cands
= BITMAP_ALLOC (NULL
);
2364 data
->iv_uses
.safe_push (use
);
2369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2370 dump_uses (dump_file
, data
);
2373 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2374 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2375 we are at the top-level of the processed address. */
2378 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2379 HOST_WIDE_INT
*offset
)
2381 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2382 enum tree_code code
;
2383 tree type
, orig_type
= TREE_TYPE (expr
);
2384 HOST_WIDE_INT off0
, off1
, st
;
2385 tree orig_expr
= expr
;
2389 type
= TREE_TYPE (expr
);
2390 code
= TREE_CODE (expr
);
2396 if (!cst_and_fits_in_hwi (expr
)
2397 || integer_zerop (expr
))
2400 *offset
= int_cst_value (expr
);
2401 return build_int_cst (orig_type
, 0);
2403 case POINTER_PLUS_EXPR
:
2406 op0
= TREE_OPERAND (expr
, 0);
2407 op1
= TREE_OPERAND (expr
, 1);
2409 op0
= strip_offset_1 (op0
, false, false, &off0
);
2410 op1
= strip_offset_1 (op1
, false, false, &off1
);
2412 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2413 if (op0
== TREE_OPERAND (expr
, 0)
2414 && op1
== TREE_OPERAND (expr
, 1))
2417 if (integer_zerop (op1
))
2419 else if (integer_zerop (op0
))
2421 if (code
== MINUS_EXPR
)
2422 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2427 expr
= fold_build2 (code
, type
, op0
, op1
);
2429 return fold_convert (orig_type
, expr
);
2432 op1
= TREE_OPERAND (expr
, 1);
2433 if (!cst_and_fits_in_hwi (op1
))
2436 op0
= TREE_OPERAND (expr
, 0);
2437 op0
= strip_offset_1 (op0
, false, false, &off0
);
2438 if (op0
== TREE_OPERAND (expr
, 0))
2441 *offset
= off0
* int_cst_value (op1
);
2442 if (integer_zerop (op0
))
2445 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2447 return fold_convert (orig_type
, expr
);
2450 case ARRAY_RANGE_REF
:
2454 step
= array_ref_element_size (expr
);
2455 if (!cst_and_fits_in_hwi (step
))
2458 st
= int_cst_value (step
);
2459 op1
= TREE_OPERAND (expr
, 1);
2460 op1
= strip_offset_1 (op1
, false, false, &off1
);
2461 *offset
= off1
* st
;
2464 && integer_zerop (op1
))
2466 /* Strip the component reference completely. */
2467 op0
= TREE_OPERAND (expr
, 0);
2468 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2481 tmp
= component_ref_field_offset (expr
);
2482 field
= TREE_OPERAND (expr
, 1);
2484 && cst_and_fits_in_hwi (tmp
)
2485 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2487 HOST_WIDE_INT boffset
, abs_off
;
2489 /* Strip the component reference completely. */
2490 op0
= TREE_OPERAND (expr
, 0);
2491 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2492 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2493 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2497 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2504 op0
= TREE_OPERAND (expr
, 0);
2505 op0
= strip_offset_1 (op0
, true, true, &off0
);
2508 if (op0
== TREE_OPERAND (expr
, 0))
2511 expr
= build_fold_addr_expr (op0
);
2512 return fold_convert (orig_type
, expr
);
2515 /* ??? Offset operand? */
2516 inside_addr
= false;
2523 /* Default handling of expressions for that we want to recurse into
2524 the first operand. */
2525 op0
= TREE_OPERAND (expr
, 0);
2526 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2529 if (op0
== TREE_OPERAND (expr
, 0)
2530 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2533 expr
= copy_node (expr
);
2534 TREE_OPERAND (expr
, 0) = op0
;
2536 TREE_OPERAND (expr
, 1) = op1
;
2538 /* Inside address, we might strip the top level component references,
2539 thus changing type of the expression. Handling of ADDR_EXPR
2541 expr
= fold_convert (orig_type
, expr
);
2546 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2549 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2552 tree core
= strip_offset_1 (expr
, false, false, &off
);
2557 /* Returns variant of TYPE that can be used as base for different uses.
2558 We return unsigned type with the same precision, which avoids problems
2562 generic_type_for (tree type
)
2564 if (POINTER_TYPE_P (type
))
2565 return unsigned_type_for (type
);
2567 if (TYPE_UNSIGNED (type
))
2570 return unsigned_type_for (type
);
2573 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2574 the bitmap to that we should store it. */
2576 static struct ivopts_data
*fd_ivopts_data
;
2578 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2580 bitmap
*depends_on
= (bitmap
*) data
;
2581 struct version_info
*info
;
2583 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2585 info
= name_info (fd_ivopts_data
, *expr_p
);
2587 if (!info
->inv_id
|| info
->has_nonlin_use
)
2591 *depends_on
= BITMAP_ALLOC (NULL
);
2592 bitmap_set_bit (*depends_on
, info
->inv_id
);
2597 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2598 position to POS. If USE is not NULL, the candidate is set as related to
2599 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2600 replacement of the final value of the iv by a direct computation. */
2602 static struct iv_cand
*
2603 add_candidate_1 (struct ivopts_data
*data
,
2604 tree base
, tree step
, bool important
, enum iv_position pos
,
2605 struct iv_use
*use
, gimple incremented_at
)
2608 struct iv_cand
*cand
= NULL
;
2609 tree type
, orig_type
;
2611 /* For non-original variables, make sure their values are computed in a type
2612 that does not invoke undefined behavior on overflows (since in general,
2613 we cannot prove that these induction variables are non-wrapping). */
2614 if (pos
!= IP_ORIGINAL
)
2616 orig_type
= TREE_TYPE (base
);
2617 type
= generic_type_for (orig_type
);
2618 if (type
!= orig_type
)
2620 base
= fold_convert (type
, base
);
2621 step
= fold_convert (type
, step
);
2625 for (i
= 0; i
< n_iv_cands (data
); i
++)
2627 cand
= iv_cand (data
, i
);
2629 if (cand
->pos
!= pos
)
2632 if (cand
->incremented_at
!= incremented_at
2633 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2634 && cand
->ainc_use
!= use
))
2648 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2649 && operand_equal_p (step
, cand
->iv
->step
, 0)
2650 && (TYPE_PRECISION (TREE_TYPE (base
))
2651 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2655 if (i
== n_iv_cands (data
))
2657 cand
= XCNEW (struct iv_cand
);
2663 cand
->iv
= alloc_iv (data
, base
, step
);
2666 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2668 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2669 cand
->var_after
= cand
->var_before
;
2671 cand
->important
= important
;
2672 cand
->incremented_at
= incremented_at
;
2673 data
->iv_candidates
.safe_push (cand
);
2676 && TREE_CODE (step
) != INTEGER_CST
)
2678 fd_ivopts_data
= data
;
2679 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2682 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2683 cand
->ainc_use
= use
;
2685 cand
->ainc_use
= NULL
;
2687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2688 dump_cand (dump_file
, cand
);
2691 if (important
&& !cand
->important
)
2693 cand
->important
= true;
2694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2695 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2700 bitmap_set_bit (use
->related_cands
, i
);
2701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2702 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2709 /* Returns true if incrementing the induction variable at the end of the LOOP
2712 The purpose is to avoid splitting latch edge with a biv increment, thus
2713 creating a jump, possibly confusing other optimization passes and leaving
2714 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2715 is not available (so we do not have a better alternative), or if the latch
2716 edge is already nonempty. */
2719 allow_ip_end_pos_p (struct loop
*loop
)
2721 if (!ip_normal_pos (loop
))
2724 if (!empty_block_p (ip_end_pos (loop
)))
2730 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2731 Important field is set to IMPORTANT. */
2734 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2735 bool important
, struct iv_use
*use
)
2737 basic_block use_bb
= gimple_bb (use
->stmt
);
2738 machine_mode mem_mode
;
2739 unsigned HOST_WIDE_INT cstepi
;
2741 /* If we insert the increment in any position other than the standard
2742 ones, we must ensure that it is incremented once per iteration.
2743 It must not be in an inner nested loop, or one side of an if
2745 if (use_bb
->loop_father
!= data
->current_loop
2746 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2747 || stmt_could_throw_p (use
->stmt
)
2748 || !cst_and_fits_in_hwi (step
))
2751 cstepi
= int_cst_value (step
);
2753 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2754 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2755 || USE_STORE_PRE_INCREMENT (mem_mode
))
2756 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2757 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2758 || USE_STORE_PRE_DECREMENT (mem_mode
))
2759 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2761 enum tree_code code
= MINUS_EXPR
;
2763 tree new_step
= step
;
2765 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2767 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2768 code
= POINTER_PLUS_EXPR
;
2771 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2772 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2773 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2776 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2777 || USE_STORE_POST_INCREMENT (mem_mode
))
2778 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2779 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2780 || USE_STORE_POST_DECREMENT (mem_mode
))
2781 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2783 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2788 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2789 position to POS. If USE is not NULL, the candidate is set as related to
2790 it. The candidate computation is scheduled on all available positions. */
2793 add_candidate (struct ivopts_data
*data
,
2794 tree base
, tree step
, bool important
, struct iv_use
*use
)
2796 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2798 if (ip_normal_pos (data
->current_loop
))
2799 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2800 if (ip_end_pos (data
->current_loop
)
2801 && allow_ip_end_pos_p (data
->current_loop
))
2802 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2804 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2805 add_autoinc_candidates (data
, base
, step
, important
, use
);
2808 /* Adds standard iv candidates. */
2811 add_standard_iv_candidates (struct ivopts_data
*data
)
2813 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2815 /* The same for a double-integer type if it is still fast enough. */
2817 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2818 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2819 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2820 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2822 /* The same for a double-integer type if it is still fast enough. */
2824 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2825 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2826 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2827 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2831 /* Adds candidates bases on the old induction variable IV. */
2834 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2838 struct iv_cand
*cand
;
2840 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2842 /* The same, but with initial value zero. */
2843 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2844 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2846 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2847 iv
->step
, true, NULL
);
2849 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2850 if (gimple_code (phi
) == GIMPLE_PHI
)
2852 /* Additionally record the possibility of leaving the original iv
2854 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2855 /* Don't add candidate if it's from another PHI node because
2856 it's an affine iv appearing in the form of PEELED_CHREC. */
2857 phi
= SSA_NAME_DEF_STMT (def
);
2858 if (gimple_code (phi
) != GIMPLE_PHI
)
2860 cand
= add_candidate_1 (data
,
2861 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2862 SSA_NAME_DEF_STMT (def
));
2863 cand
->var_before
= iv
->ssa_name
;
2864 cand
->var_after
= def
;
2867 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2871 /* Adds candidates based on the old induction variables. */
2874 add_old_ivs_candidates (struct ivopts_data
*data
)
2880 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2882 iv
= ver_info (data
, i
)->iv
;
2883 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2884 add_old_iv_candidates (data
, iv
);
2888 /* Adds candidates based on the value of the induction variable IV and USE. */
2891 add_iv_value_candidates (struct ivopts_data
*data
,
2892 struct iv
*iv
, struct iv_use
*use
)
2894 unsigned HOST_WIDE_INT offset
;
2898 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2900 /* The same, but with initial value zero. Make such variable important,
2901 since it is generic enough so that possibly many uses may be based
2903 basetype
= TREE_TYPE (iv
->base
);
2904 if (POINTER_TYPE_P (basetype
))
2905 basetype
= sizetype
;
2906 add_candidate (data
, build_int_cst (basetype
, 0),
2907 iv
->step
, true, use
);
2909 /* Third, try removing the constant offset. Make sure to even
2910 add a candidate for &a[0] vs. (T *)&a. */
2911 base
= strip_offset (iv
->base
, &offset
);
2913 || base
!= iv
->base
)
2914 add_candidate (data
, base
, iv
->step
, false, use
);
2917 /* Adds candidates based on the uses. */
2920 add_derived_ivs_candidates (struct ivopts_data
*data
)
2924 for (i
= 0; i
< n_iv_uses (data
); i
++)
2926 struct iv_use
*use
= iv_use (data
, i
);
2933 case USE_NONLINEAR_EXPR
:
2936 /* Just add the ivs based on the value of the iv used here. */
2937 add_iv_value_candidates (data
, use
->iv
, use
);
2946 /* Record important candidates and add them to related_cands bitmaps
2950 record_important_candidates (struct ivopts_data
*data
)
2955 for (i
= 0; i
< n_iv_cands (data
); i
++)
2957 struct iv_cand
*cand
= iv_cand (data
, i
);
2959 if (cand
->important
)
2960 bitmap_set_bit (data
->important_candidates
, i
);
2963 data
->consider_all_candidates
= (n_iv_cands (data
)
2964 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2966 if (data
->consider_all_candidates
)
2968 /* We will not need "related_cands" bitmaps in this case,
2969 so release them to decrease peak memory consumption. */
2970 for (i
= 0; i
< n_iv_uses (data
); i
++)
2972 use
= iv_use (data
, i
);
2973 BITMAP_FREE (use
->related_cands
);
2978 /* Add important candidates to the related_cands bitmaps. */
2979 for (i
= 0; i
< n_iv_uses (data
); i
++)
2980 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2981 data
->important_candidates
);
2985 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2986 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2987 we allocate a simple list to every use. */
2990 alloc_use_cost_map (struct ivopts_data
*data
)
2992 unsigned i
, size
, s
;
2994 for (i
= 0; i
< n_iv_uses (data
); i
++)
2996 struct iv_use
*use
= iv_use (data
, i
);
2998 if (data
->consider_all_candidates
)
2999 size
= n_iv_cands (data
);
3002 s
= bitmap_count_bits (use
->related_cands
);
3004 /* Round up to the power of two, so that moduling by it is fast. */
3005 size
= s
? (1 << ceil_log2 (s
)) : 1;
3008 use
->n_map_members
= size
;
3009 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3013 /* Returns description of computation cost of expression whose runtime
3014 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3017 new_cost (unsigned runtime
, unsigned complexity
)
3021 cost
.cost
= runtime
;
3022 cost
.complexity
= complexity
;
3027 /* Returns true if COST is infinite. */
3030 infinite_cost_p (comp_cost cost
)
3032 return cost
.cost
== INFTY
;
3035 /* Adds costs COST1 and COST2. */
3038 add_costs (comp_cost cost1
, comp_cost cost2
)
3040 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3041 return infinite_cost
;
3043 cost1
.cost
+= cost2
.cost
;
3044 cost1
.complexity
+= cost2
.complexity
;
3048 /* Subtracts costs COST1 and COST2. */
3051 sub_costs (comp_cost cost1
, comp_cost cost2
)
3053 cost1
.cost
-= cost2
.cost
;
3054 cost1
.complexity
-= cost2
.complexity
;
3059 /* Returns a negative number if COST1 < COST2, a positive number if
3060 COST1 > COST2, and 0 if COST1 = COST2. */
3063 compare_costs (comp_cost cost1
, comp_cost cost2
)
3065 if (cost1
.cost
== cost2
.cost
)
3066 return cost1
.complexity
- cost2
.complexity
;
3068 return cost1
.cost
- cost2
.cost
;
3071 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3072 on invariants DEPENDS_ON and that the value used in expressing it
3073 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3076 set_use_iv_cost (struct ivopts_data
*data
,
3077 struct iv_use
*use
, struct iv_cand
*cand
,
3078 comp_cost cost
, bitmap depends_on
, tree value
,
3079 enum tree_code comp
, int inv_expr_id
)
3083 if (infinite_cost_p (cost
))
3085 BITMAP_FREE (depends_on
);
3089 if (data
->consider_all_candidates
)
3091 use
->cost_map
[cand
->id
].cand
= cand
;
3092 use
->cost_map
[cand
->id
].cost
= cost
;
3093 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3094 use
->cost_map
[cand
->id
].value
= value
;
3095 use
->cost_map
[cand
->id
].comp
= comp
;
3096 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3100 /* n_map_members is a power of two, so this computes modulo. */
3101 s
= cand
->id
& (use
->n_map_members
- 1);
3102 for (i
= s
; i
< use
->n_map_members
; i
++)
3103 if (!use
->cost_map
[i
].cand
)
3105 for (i
= 0; i
< s
; i
++)
3106 if (!use
->cost_map
[i
].cand
)
3112 use
->cost_map
[i
].cand
= cand
;
3113 use
->cost_map
[i
].cost
= cost
;
3114 use
->cost_map
[i
].depends_on
= depends_on
;
3115 use
->cost_map
[i
].value
= value
;
3116 use
->cost_map
[i
].comp
= comp
;
3117 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3120 /* Gets cost of (USE, CANDIDATE) pair. */
3122 static struct cost_pair
*
3123 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3124 struct iv_cand
*cand
)
3127 struct cost_pair
*ret
;
3132 if (data
->consider_all_candidates
)
3134 ret
= use
->cost_map
+ cand
->id
;
3141 /* n_map_members is a power of two, so this computes modulo. */
3142 s
= cand
->id
& (use
->n_map_members
- 1);
3143 for (i
= s
; i
< use
->n_map_members
; i
++)
3144 if (use
->cost_map
[i
].cand
== cand
)
3145 return use
->cost_map
+ i
;
3146 else if (use
->cost_map
[i
].cand
== NULL
)
3148 for (i
= 0; i
< s
; i
++)
3149 if (use
->cost_map
[i
].cand
== cand
)
3150 return use
->cost_map
+ i
;
3151 else if (use
->cost_map
[i
].cand
== NULL
)
3157 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3159 produce_memory_decl_rtl (tree obj
, int *regno
)
3161 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3162 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3166 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3168 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3169 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3170 SET_SYMBOL_REF_DECL (x
, obj
);
3171 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3172 set_mem_addr_space (x
, as
);
3173 targetm
.encode_section_info (obj
, x
, true);
3177 x
= gen_raw_REG (address_mode
, (*regno
)++);
3178 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3179 set_mem_addr_space (x
, as
);
3185 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3186 walk_tree. DATA contains the actual fake register number. */
3189 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3191 tree obj
= NULL_TREE
;
3193 int *regno
= (int *) data
;
3195 switch (TREE_CODE (*expr_p
))
3198 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3199 handled_component_p (*expr_p
);
3200 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3203 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3204 x
= produce_memory_decl_rtl (obj
, regno
);
3209 obj
= SSA_NAME_VAR (*expr_p
);
3210 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3213 if (!DECL_RTL_SET_P (obj
))
3214 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3223 if (DECL_RTL_SET_P (obj
))
3226 if (DECL_MODE (obj
) == BLKmode
)
3227 x
= produce_memory_decl_rtl (obj
, regno
);
3229 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3239 decl_rtl_to_reset
.safe_push (obj
);
3240 SET_DECL_RTL (obj
, x
);
3246 /* Determines cost of the computation of EXPR. */
3249 computation_cost (tree expr
, bool speed
)
3253 tree type
= TREE_TYPE (expr
);
3255 /* Avoid using hard regs in ways which may be unsupported. */
3256 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3257 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3258 enum node_frequency real_frequency
= node
->frequency
;
3260 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3261 crtl
->maybe_hot_insn_p
= speed
;
3262 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3264 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3267 default_rtl_profile ();
3268 node
->frequency
= real_frequency
;
3270 cost
= seq_cost (seq
, speed
);
3272 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3273 TYPE_ADDR_SPACE (type
), speed
);
3274 else if (!REG_P (rslt
))
3275 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3280 /* Returns variable containing the value of candidate CAND at statement AT. */
3283 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3285 if (stmt_after_increment (loop
, cand
, stmt
))
3286 return cand
->var_after
;
3288 return cand
->var_before
;
3291 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3292 same precision that is at least as wide as the precision of TYPE, stores
3293 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3297 determine_common_wider_type (tree
*a
, tree
*b
)
3299 tree wider_type
= NULL
;
3301 tree atype
= TREE_TYPE (*a
);
3303 if (CONVERT_EXPR_P (*a
))
3305 suba
= TREE_OPERAND (*a
, 0);
3306 wider_type
= TREE_TYPE (suba
);
3307 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3313 if (CONVERT_EXPR_P (*b
))
3315 subb
= TREE_OPERAND (*b
, 0);
3316 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3327 /* Determines the expression by that USE is expressed from induction variable
3328 CAND at statement AT in LOOP. The expression is stored in a decomposed
3329 form into AFF. Returns false if USE cannot be expressed using CAND. */
3332 get_computation_aff (struct loop
*loop
,
3333 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3334 struct aff_tree
*aff
)
3336 tree ubase
= use
->iv
->base
;
3337 tree ustep
= use
->iv
->step
;
3338 tree cbase
= cand
->iv
->base
;
3339 tree cstep
= cand
->iv
->step
, cstep_common
;
3340 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3341 tree common_type
, var
;
3343 aff_tree cbase_aff
, var_aff
;
3346 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3348 /* We do not have a precision to express the values of use. */
3352 var
= var_at_stmt (loop
, cand
, at
);
3353 uutype
= unsigned_type_for (utype
);
3355 /* If the conversion is not noop, perform it. */
3356 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3358 cstep
= fold_convert (uutype
, cstep
);
3359 cbase
= fold_convert (uutype
, cbase
);
3360 var
= fold_convert (uutype
, var
);
3363 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3366 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3367 type, we achieve better folding by computing their difference in this
3368 wider type, and cast the result to UUTYPE. We do not need to worry about
3369 overflows, as all the arithmetics will in the end be performed in UUTYPE
3371 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3373 /* use = ubase - ratio * cbase + ratio * var. */
3374 tree_to_aff_combination (ubase
, common_type
, aff
);
3375 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3376 tree_to_aff_combination (var
, uutype
, &var_aff
);
3378 /* We need to shift the value if we are after the increment. */
3379 if (stmt_after_increment (loop
, cand
, at
))
3383 if (common_type
!= uutype
)
3384 cstep_common
= fold_convert (common_type
, cstep
);
3386 cstep_common
= cstep
;
3388 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3389 aff_combination_add (&cbase_aff
, &cstep_aff
);
3392 aff_combination_scale (&cbase_aff
, -rat
);
3393 aff_combination_add (aff
, &cbase_aff
);
3394 if (common_type
!= uutype
)
3395 aff_combination_convert (aff
, uutype
);
3397 aff_combination_scale (&var_aff
, rat
);
3398 aff_combination_add (aff
, &var_aff
);
3403 /* Return the type of USE. */
3406 get_use_type (struct iv_use
*use
)
3408 tree base_type
= TREE_TYPE (use
->iv
->base
);
3411 if (use
->type
== USE_ADDRESS
)
3413 /* The base_type may be a void pointer. Create a pointer type based on
3414 the mem_ref instead. */
3415 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3416 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3417 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3425 /* Determines the expression by that USE is expressed from induction variable
3426 CAND at statement AT in LOOP. The computation is unshared. */
3429 get_computation_at (struct loop
*loop
,
3430 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3433 tree type
= get_use_type (use
);
3435 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3437 unshare_aff_combination (&aff
);
3438 return fold_convert (type
, aff_combination_to_tree (&aff
));
3441 /* Determines the expression by that USE is expressed from induction variable
3442 CAND in LOOP. The computation is unshared. */
3445 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3447 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3450 /* Adjust the cost COST for being in loop setup rather than loop body.
3451 If we're optimizing for space, the loop setup overhead is constant;
3452 if we're optimizing for speed, amortize it over the per-iteration cost. */
3454 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3458 else if (optimize_loop_for_speed_p (data
->current_loop
))
3459 return cost
/ avg_loop_niter (data
->current_loop
);
3464 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3465 validity for a memory reference accessing memory of mode MODE in
3466 address space AS. */
3470 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3473 #define MAX_RATIO 128
3474 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3475 static vec
<sbitmap
> valid_mult_list
;
3478 if (data_index
>= valid_mult_list
.length ())
3479 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3481 valid_mult
= valid_mult_list
[data_index
];
3484 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3485 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3486 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3490 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3491 bitmap_clear (valid_mult
);
3492 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3493 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3494 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3496 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3497 if (memory_address_addr_space_p (mode
, addr
, as
)
3498 || memory_address_addr_space_p (mode
, scaled
, as
))
3499 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3504 fprintf (dump_file
, " allowed multipliers:");
3505 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3506 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3507 fprintf (dump_file
, " %d", (int) i
);
3508 fprintf (dump_file
, "\n");
3509 fprintf (dump_file
, "\n");
3512 valid_mult_list
[data_index
] = valid_mult
;
3515 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3518 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3521 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3522 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3523 variable is omitted. Compute the cost for a memory reference that accesses
3524 a memory location of mode MEM_MODE in address space AS.
3526 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3527 size of MEM_MODE / RATIO) is available. To make this determination, we
3528 look at the size of the increment to be made, which is given in CSTEP.
3529 CSTEP may be zero if the step is unknown.
3530 STMT_AFTER_INC is true iff the statement we're looking at is after the
3531 increment of the original biv.
3533 TODO -- there must be some better way. This all is quite crude. */
3537 AINC_PRE_INC
, /* Pre increment. */
3538 AINC_PRE_DEC
, /* Pre decrement. */
3539 AINC_POST_INC
, /* Post increment. */
3540 AINC_POST_DEC
, /* Post decrement. */
3541 AINC_NONE
/* Also the number of auto increment types. */
3544 typedef struct address_cost_data_s
3546 HOST_WIDE_INT min_offset
, max_offset
;
3547 unsigned costs
[2][2][2][2];
3548 unsigned ainc_costs
[AINC_NONE
];
3549 } *address_cost_data
;
3553 get_address_cost (bool symbol_present
, bool var_present
,
3554 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3555 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3556 addr_space_t as
, bool speed
,
3557 bool stmt_after_inc
, bool *may_autoinc
)
3559 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3560 static vec
<address_cost_data
> address_cost_data_list
;
3561 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3562 address_cost_data data
;
3563 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3564 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3565 unsigned cost
, acost
, complexity
;
3566 enum ainc_type autoinc_type
;
3567 bool offset_p
, ratio_p
, autoinc
;
3568 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3569 unsigned HOST_WIDE_INT mask
;
3572 if (data_index
>= address_cost_data_list
.length ())
3573 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3575 data
= address_cost_data_list
[data_index
];
3579 HOST_WIDE_INT rat
, off
= 0;
3580 int old_cse_not_expected
, width
;
3581 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3586 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3588 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3590 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3591 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3592 width
= HOST_BITS_PER_WIDE_INT
- 1;
3593 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3595 for (i
= width
; i
>= 0; i
--)
3597 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3598 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3599 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3602 data
->min_offset
= (i
== -1? 0 : off
);
3604 for (i
= width
; i
>= 0; i
--)
3606 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3607 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3608 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3610 /* For some strict-alignment targets, the offset must be naturally
3611 aligned. Try an aligned offset if mem_mode is not QImode. */
3612 off
= mem_mode
!= QImode
3613 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3614 - GET_MODE_SIZE (mem_mode
)
3618 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3619 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3625 data
->max_offset
= off
;
3627 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3629 fprintf (dump_file
, "get_address_cost:\n");
3630 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3631 GET_MODE_NAME (mem_mode
),
3633 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3634 GET_MODE_NAME (mem_mode
),
3639 for (i
= 2; i
<= MAX_RATIO
; i
++)
3640 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3646 /* Compute the cost of various addressing modes. */
3648 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3649 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3651 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3652 || USE_STORE_PRE_DECREMENT (mem_mode
))
3654 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3655 has_predec
[mem_mode
]
3656 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3658 if (has_predec
[mem_mode
])
3659 data
->ainc_costs
[AINC_PRE_DEC
]
3660 = address_cost (addr
, mem_mode
, as
, speed
);
3662 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3663 || USE_STORE_POST_DECREMENT (mem_mode
))
3665 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3666 has_postdec
[mem_mode
]
3667 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3669 if (has_postdec
[mem_mode
])
3670 data
->ainc_costs
[AINC_POST_DEC
]
3671 = address_cost (addr
, mem_mode
, as
, speed
);
3673 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3674 || USE_STORE_PRE_DECREMENT (mem_mode
))
3676 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3677 has_preinc
[mem_mode
]
3678 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3680 if (has_preinc
[mem_mode
])
3681 data
->ainc_costs
[AINC_PRE_INC
]
3682 = address_cost (addr
, mem_mode
, as
, speed
);
3684 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3685 || USE_STORE_POST_INCREMENT (mem_mode
))
3687 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3688 has_postinc
[mem_mode
]
3689 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3691 if (has_postinc
[mem_mode
])
3692 data
->ainc_costs
[AINC_POST_INC
]
3693 = address_cost (addr
, mem_mode
, as
, speed
);
3695 for (i
= 0; i
< 16; i
++)
3698 var_p
= (i
>> 1) & 1;
3699 off_p
= (i
>> 2) & 1;
3700 rat_p
= (i
>> 3) & 1;
3704 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3705 gen_int_mode (rat
, address_mode
));
3708 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3712 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3713 /* ??? We can run into trouble with some backends by presenting
3714 it with symbols which haven't been properly passed through
3715 targetm.encode_section_info. By setting the local bit, we
3716 enhance the probability of things working. */
3717 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3720 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3722 (PLUS
, address_mode
, base
,
3723 gen_int_mode (off
, address_mode
)));
3726 base
= gen_int_mode (off
, address_mode
);
3731 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3734 /* To avoid splitting addressing modes, pretend that no cse will
3736 old_cse_not_expected
= cse_not_expected
;
3737 cse_not_expected
= true;
3738 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3739 cse_not_expected
= old_cse_not_expected
;
3743 acost
= seq_cost (seq
, speed
);
3744 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3748 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3751 /* On some targets, it is quite expensive to load symbol to a register,
3752 which makes addresses that contain symbols look much more expensive.
3753 However, the symbol will have to be loaded in any case before the
3754 loop (and quite likely we have it in register already), so it does not
3755 make much sense to penalize them too heavily. So make some final
3756 tweaks for the SYMBOL_PRESENT modes:
3758 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3759 var is cheaper, use this mode with small penalty.
3760 If VAR_PRESENT is true, try whether the mode with
3761 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3762 if this is the case, use it. */
3763 add_c
= add_cost (speed
, address_mode
);
3764 for (i
= 0; i
< 8; i
++)
3767 off_p
= (i
>> 1) & 1;
3768 rat_p
= (i
>> 2) & 1;
3770 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3774 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3775 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3778 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3780 fprintf (dump_file
, "Address costs:\n");
3782 for (i
= 0; i
< 16; i
++)
3785 var_p
= (i
>> 1) & 1;
3786 off_p
= (i
>> 2) & 1;
3787 rat_p
= (i
>> 3) & 1;
3789 fprintf (dump_file
, " ");
3791 fprintf (dump_file
, "sym + ");
3793 fprintf (dump_file
, "var + ");
3795 fprintf (dump_file
, "cst + ");
3797 fprintf (dump_file
, "rat * ");
3799 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3800 fprintf (dump_file
, "index costs %d\n", acost
);
3802 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3803 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3804 fprintf (dump_file
, " May include autoinc/dec\n");
3805 fprintf (dump_file
, "\n");
3808 address_cost_data_list
[data_index
] = data
;
3811 bits
= GET_MODE_BITSIZE (address_mode
);
3812 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3814 if ((offset
>> (bits
- 1) & 1))
3819 autoinc_type
= AINC_NONE
;
3820 msize
= GET_MODE_SIZE (mem_mode
);
3821 autoinc_offset
= offset
;
3823 autoinc_offset
+= ratio
* cstep
;
3824 if (symbol_present
|| var_present
|| ratio
!= 1)
3828 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3830 autoinc_type
= AINC_POST_INC
;
3831 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3833 autoinc_type
= AINC_POST_DEC
;
3834 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3836 autoinc_type
= AINC_PRE_INC
;
3837 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3839 autoinc_type
= AINC_PRE_DEC
;
3841 if (autoinc_type
!= AINC_NONE
)
3846 offset_p
= (s_offset
!= 0
3847 && data
->min_offset
<= s_offset
3848 && s_offset
<= data
->max_offset
);
3849 ratio_p
= (ratio
!= 1
3850 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3852 if (ratio
!= 1 && !ratio_p
)
3853 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3855 if (s_offset
&& !offset_p
&& !symbol_present
)
3856 cost
+= add_cost (speed
, address_mode
);
3859 *may_autoinc
= autoinc
;
3861 acost
= data
->ainc_costs
[autoinc_type
];
3863 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3864 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3865 return new_cost (cost
+ acost
, complexity
);
3868 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3869 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3870 calculating the operands of EXPR. Returns true if successful, and returns
3871 the cost in COST. */
3874 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3875 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3878 tree op1
= TREE_OPERAND (expr
, 1);
3879 tree cst
= TREE_OPERAND (mult
, 1);
3880 tree multop
= TREE_OPERAND (mult
, 0);
3881 int m
= exact_log2 (int_cst_value (cst
));
3882 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3883 int as_cost
, sa_cost
;
3886 if (!(m
>= 0 && m
< maxm
))
3889 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3891 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3893 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3894 use that in preference to a shift insn followed by an add insn. */
3895 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3896 ? shiftadd_cost (speed
, mode
, m
)
3898 ? shiftsub1_cost (speed
, mode
, m
)
3899 : shiftsub0_cost (speed
, mode
, m
)));
3901 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3902 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3904 STRIP_NOPS (multop
);
3905 if (!is_gimple_val (multop
))
3906 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3912 /* Estimates cost of forcing expression EXPR into a variable. */
3915 force_expr_to_var_cost (tree expr
, bool speed
)
3917 static bool costs_initialized
= false;
3918 static unsigned integer_cost
[2];
3919 static unsigned symbol_cost
[2];
3920 static unsigned address_cost
[2];
3922 comp_cost cost0
, cost1
, cost
;
3925 if (!costs_initialized
)
3927 tree type
= build_pointer_type (integer_type_node
);
3932 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3933 TREE_STATIC (var
) = 1;
3934 x
= produce_memory_decl_rtl (var
, NULL
);
3935 SET_DECL_RTL (var
, x
);
3937 addr
= build1 (ADDR_EXPR
, type
, var
);
3940 for (i
= 0; i
< 2; i
++)
3942 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3945 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3948 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3949 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3951 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3952 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3953 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3954 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3955 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3956 fprintf (dump_file
, "\n");
3960 costs_initialized
= true;
3965 if (SSA_VAR_P (expr
))
3968 if (is_gimple_min_invariant (expr
))
3970 if (TREE_CODE (expr
) == INTEGER_CST
)
3971 return new_cost (integer_cost
[speed
], 0);
3973 if (TREE_CODE (expr
) == ADDR_EXPR
)
3975 tree obj
= TREE_OPERAND (expr
, 0);
3977 if (TREE_CODE (obj
) == VAR_DECL
3978 || TREE_CODE (obj
) == PARM_DECL
3979 || TREE_CODE (obj
) == RESULT_DECL
)
3980 return new_cost (symbol_cost
[speed
], 0);
3983 return new_cost (address_cost
[speed
], 0);
3986 switch (TREE_CODE (expr
))
3988 case POINTER_PLUS_EXPR
:
3992 op0
= TREE_OPERAND (expr
, 0);
3993 op1
= TREE_OPERAND (expr
, 1);
4000 op0
= TREE_OPERAND (expr
, 0);
4006 /* Just an arbitrary value, FIXME. */
4007 return new_cost (target_spill_cost
[speed
], 0);
4010 if (op0
== NULL_TREE
4011 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4014 cost0
= force_expr_to_var_cost (op0
, speed
);
4016 if (op1
== NULL_TREE
4017 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4020 cost1
= force_expr_to_var_cost (op1
, speed
);
4022 mode
= TYPE_MODE (TREE_TYPE (expr
));
4023 switch (TREE_CODE (expr
))
4025 case POINTER_PLUS_EXPR
:
4029 cost
= new_cost (add_cost (speed
, mode
), 0);
4030 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4032 tree mult
= NULL_TREE
;
4034 if (TREE_CODE (op1
) == MULT_EXPR
)
4036 else if (TREE_CODE (op0
) == MULT_EXPR
)
4039 if (mult
!= NULL_TREE
4040 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4041 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4049 tree inner_mode
, outer_mode
;
4050 outer_mode
= TREE_TYPE (expr
);
4051 inner_mode
= TREE_TYPE (op0
);
4052 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4053 TYPE_MODE (inner_mode
), speed
), 0);
4058 if (cst_and_fits_in_hwi (op0
))
4059 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4061 else if (cst_and_fits_in_hwi (op1
))
4062 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4065 return new_cost (target_spill_cost
[speed
], 0);
4072 cost
= add_costs (cost
, cost0
);
4073 cost
= add_costs (cost
, cost1
);
4075 /* Bound the cost by target_spill_cost. The parts of complicated
4076 computations often are either loop invariant or at least can
4077 be shared between several iv uses, so letting this grow without
4078 limits would not give reasonable results. */
4079 if (cost
.cost
> (int) target_spill_cost
[speed
])
4080 cost
.cost
= target_spill_cost
[speed
];
4085 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4086 invariants the computation depends on. */
4089 force_var_cost (struct ivopts_data
*data
,
4090 tree expr
, bitmap
*depends_on
)
4094 fd_ivopts_data
= data
;
4095 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4098 return force_expr_to_var_cost (expr
, data
->speed
);
4101 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4102 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4103 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4104 invariants the computation depends on. */
4107 split_address_cost (struct ivopts_data
*data
,
4108 tree addr
, bool *symbol_present
, bool *var_present
,
4109 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4112 HOST_WIDE_INT bitsize
;
4113 HOST_WIDE_INT bitpos
;
4116 int unsignedp
, volatilep
;
4118 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4119 &unsignedp
, &volatilep
, false);
4122 || bitpos
% BITS_PER_UNIT
!= 0
4123 || TREE_CODE (core
) != VAR_DECL
)
4125 *symbol_present
= false;
4126 *var_present
= true;
4127 fd_ivopts_data
= data
;
4128 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4129 return new_cost (target_spill_cost
[data
->speed
], 0);
4132 *offset
+= bitpos
/ BITS_PER_UNIT
;
4133 if (TREE_STATIC (core
)
4134 || DECL_EXTERNAL (core
))
4136 *symbol_present
= true;
4137 *var_present
= false;
4141 *symbol_present
= false;
4142 *var_present
= true;
4146 /* Estimates cost of expressing difference of addresses E1 - E2 as
4147 var + symbol + offset. The value of offset is added to OFFSET,
4148 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4149 part is missing. DEPENDS_ON is a set of the invariants the computation
4153 ptr_difference_cost (struct ivopts_data
*data
,
4154 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4155 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4157 HOST_WIDE_INT diff
= 0;
4158 aff_tree aff_e1
, aff_e2
;
4161 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4163 if (ptr_difference_const (e1
, e2
, &diff
))
4166 *symbol_present
= false;
4167 *var_present
= false;
4171 if (integer_zerop (e2
))
4172 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4173 symbol_present
, var_present
, offset
, depends_on
);
4175 *symbol_present
= false;
4176 *var_present
= true;
4178 type
= signed_type_for (TREE_TYPE (e1
));
4179 tree_to_aff_combination (e1
, type
, &aff_e1
);
4180 tree_to_aff_combination (e2
, type
, &aff_e2
);
4181 aff_combination_scale (&aff_e2
, -1);
4182 aff_combination_add (&aff_e1
, &aff_e2
);
4184 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4187 /* Estimates cost of expressing difference E1 - E2 as
4188 var + symbol + offset. The value of offset is added to OFFSET,
4189 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4190 part is missing. DEPENDS_ON is a set of the invariants the computation
4194 difference_cost (struct ivopts_data
*data
,
4195 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4196 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4198 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4199 unsigned HOST_WIDE_INT off1
, off2
;
4200 aff_tree aff_e1
, aff_e2
;
4203 e1
= strip_offset (e1
, &off1
);
4204 e2
= strip_offset (e2
, &off2
);
4205 *offset
+= off1
- off2
;
4210 if (TREE_CODE (e1
) == ADDR_EXPR
)
4211 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4212 offset
, depends_on
);
4213 *symbol_present
= false;
4215 if (operand_equal_p (e1
, e2
, 0))
4217 *var_present
= false;
4221 *var_present
= true;
4223 if (integer_zerop (e2
))
4224 return force_var_cost (data
, e1
, depends_on
);
4226 if (integer_zerop (e1
))
4228 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4229 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4233 type
= signed_type_for (TREE_TYPE (e1
));
4234 tree_to_aff_combination (e1
, type
, &aff_e1
);
4235 tree_to_aff_combination (e2
, type
, &aff_e2
);
4236 aff_combination_scale (&aff_e2
, -1);
4237 aff_combination_add (&aff_e1
, &aff_e2
);
4239 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4242 /* Returns true if AFF1 and AFF2 are identical. */
4245 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4249 if (aff1
->n
!= aff2
->n
)
4252 for (i
= 0; i
< aff1
->n
; i
++)
4254 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4257 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4263 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4266 get_expr_id (struct ivopts_data
*data
, tree expr
)
4268 struct iv_inv_expr_ent ent
;
4269 struct iv_inv_expr_ent
**slot
;
4272 ent
.hash
= iterative_hash_expr (expr
, 0);
4273 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4277 *slot
= XNEW (struct iv_inv_expr_ent
);
4278 (*slot
)->expr
= expr
;
4279 (*slot
)->hash
= ent
.hash
;
4280 (*slot
)->id
= data
->inv_expr_id
++;
4284 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4285 requires a new compiler generated temporary. Returns -1 otherwise.
4286 ADDRESS_P is a flag indicating if the expression is for address
4290 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4291 tree cbase
, HOST_WIDE_INT ratio
,
4294 aff_tree ubase_aff
, cbase_aff
;
4302 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4303 && (TREE_CODE (cbase
) == INTEGER_CST
))
4306 /* Strips the constant part. */
4307 if (TREE_CODE (ubase
) == PLUS_EXPR
4308 || TREE_CODE (ubase
) == MINUS_EXPR
4309 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4311 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4312 ubase
= TREE_OPERAND (ubase
, 0);
4315 /* Strips the constant part. */
4316 if (TREE_CODE (cbase
) == PLUS_EXPR
4317 || TREE_CODE (cbase
) == MINUS_EXPR
4318 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4320 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4321 cbase
= TREE_OPERAND (cbase
, 0);
4326 if (((TREE_CODE (ubase
) == SSA_NAME
)
4327 || (TREE_CODE (ubase
) == ADDR_EXPR
4328 && is_gimple_min_invariant (ubase
)))
4329 && (TREE_CODE (cbase
) == INTEGER_CST
))
4332 if (((TREE_CODE (cbase
) == SSA_NAME
)
4333 || (TREE_CODE (cbase
) == ADDR_EXPR
4334 && is_gimple_min_invariant (cbase
)))
4335 && (TREE_CODE (ubase
) == INTEGER_CST
))
4341 if (operand_equal_p (ubase
, cbase
, 0))
4344 if (TREE_CODE (ubase
) == ADDR_EXPR
4345 && TREE_CODE (cbase
) == ADDR_EXPR
)
4349 usym
= TREE_OPERAND (ubase
, 0);
4350 csym
= TREE_OPERAND (cbase
, 0);
4351 if (TREE_CODE (usym
) == ARRAY_REF
)
4353 tree ind
= TREE_OPERAND (usym
, 1);
4354 if (TREE_CODE (ind
) == INTEGER_CST
4355 && tree_fits_shwi_p (ind
)
4356 && tree_to_shwi (ind
) == 0)
4357 usym
= TREE_OPERAND (usym
, 0);
4359 if (TREE_CODE (csym
) == ARRAY_REF
)
4361 tree ind
= TREE_OPERAND (csym
, 1);
4362 if (TREE_CODE (ind
) == INTEGER_CST
4363 && tree_fits_shwi_p (ind
)
4364 && tree_to_shwi (ind
) == 0)
4365 csym
= TREE_OPERAND (csym
, 0);
4367 if (operand_equal_p (usym
, csym
, 0))
4370 /* Now do more complex comparison */
4371 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4372 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4373 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4377 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4378 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4380 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4381 aff_combination_add (&ubase_aff
, &cbase_aff
);
4382 expr
= aff_combination_to_tree (&ubase_aff
);
4383 return get_expr_id (data
, expr
);
4388 /* Determines the cost of the computation by that USE is expressed
4389 from induction variable CAND. If ADDRESS_P is true, we just need
4390 to create an address from it, otherwise we want to get it into
4391 register. A set of invariants we depend on is stored in
4392 DEPENDS_ON. AT is the statement at that the value is computed.
4393 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4394 addressing is likely. */
4397 get_computation_cost_at (struct ivopts_data
*data
,
4398 struct iv_use
*use
, struct iv_cand
*cand
,
4399 bool address_p
, bitmap
*depends_on
, gimple at
,
4403 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4405 tree utype
= TREE_TYPE (ubase
), ctype
;
4406 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4407 HOST_WIDE_INT ratio
, aratio
;
4408 bool var_present
, symbol_present
, stmt_is_after_inc
;
4411 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4412 machine_mode mem_mode
= (address_p
4413 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4418 /* Only consider real candidates. */
4420 return infinite_cost
;
4422 cbase
= cand
->iv
->base
;
4423 cstep
= cand
->iv
->step
;
4424 ctype
= TREE_TYPE (cbase
);
4426 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4428 /* We do not have a precision to express the values of use. */
4429 return infinite_cost
;
4433 || (use
->iv
->base_object
4434 && cand
->iv
->base_object
4435 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4436 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4438 /* Do not try to express address of an object with computation based
4439 on address of a different object. This may cause problems in rtl
4440 level alias analysis (that does not expect this to be happening,
4441 as this is illegal in C), and would be unlikely to be useful
4443 if (use
->iv
->base_object
4444 && cand
->iv
->base_object
4445 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4446 return infinite_cost
;
4449 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4451 /* TODO -- add direct handling of this case. */
4455 /* CSTEPI is removed from the offset in case statement is after the
4456 increment. If the step is not constant, we use zero instead.
4457 This is a bit imprecise (there is the extra addition), but
4458 redundancy elimination is likely to transform the code so that
4459 it uses value of the variable before increment anyway,
4460 so it is not that much unrealistic. */
4461 if (cst_and_fits_in_hwi (cstep
))
4462 cstepi
= int_cst_value (cstep
);
4466 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4467 return infinite_cost
;
4469 if (wi::fits_shwi_p (rat
))
4470 ratio
= rat
.to_shwi ();
4472 return infinite_cost
;
4475 ctype
= TREE_TYPE (cbase
);
4477 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4479 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4480 or ratio == 1, it is better to handle this like
4482 ubase - ratio * cbase + ratio * var
4484 (also holds in the case ratio == -1, TODO. */
4486 if (cst_and_fits_in_hwi (cbase
))
4488 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4489 cost
= difference_cost (data
,
4490 ubase
, build_int_cst (utype
, 0),
4491 &symbol_present
, &var_present
, &offset
,
4493 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4495 else if (ratio
== 1)
4497 tree real_cbase
= cbase
;
4499 /* Check to see if any adjustment is needed. */
4500 if (cstepi
== 0 && stmt_is_after_inc
)
4502 aff_tree real_cbase_aff
;
4505 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4507 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4509 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4510 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4513 cost
= difference_cost (data
,
4515 &symbol_present
, &var_present
, &offset
,
4517 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4520 && !POINTER_TYPE_P (ctype
)
4521 && multiplier_allowed_in_address_p
4523 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4526 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4527 cost
= difference_cost (data
,
4529 &symbol_present
, &var_present
, &offset
,
4531 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4535 cost
= force_var_cost (data
, cbase
, depends_on
);
4536 cost
= add_costs (cost
,
4537 difference_cost (data
,
4538 ubase
, build_int_cst (utype
, 0),
4539 &symbol_present
, &var_present
,
4540 &offset
, depends_on
));
4541 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4542 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4545 /* Set of invariants depended on by sub use has already been computed
4546 for the first use in the group. */
4550 if (depends_on
&& *depends_on
)
4551 bitmap_clear (*depends_on
);
4553 else if (inv_expr_id
)
4556 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4557 /* Clear depends on. */
4558 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4559 bitmap_clear (*depends_on
);
4562 /* If we are after the increment, the value of the candidate is higher by
4564 if (stmt_is_after_inc
)
4565 offset
-= ratio
* cstepi
;
4567 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4568 (symbol/var1/const parts may be omitted). If we are looking for an
4569 address, find the cost of addressing this. */
4571 return add_costs (cost
,
4572 get_address_cost (symbol_present
, var_present
,
4573 offset
, ratio
, cstepi
,
4575 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4576 speed
, stmt_is_after_inc
,
4579 /* Otherwise estimate the costs for computing the expression. */
4580 if (!symbol_present
&& !var_present
&& !offset
)
4583 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4587 /* Symbol + offset should be compile-time computable so consider that they
4588 are added once to the variable, if present. */
4589 if (var_present
&& (symbol_present
|| offset
))
4590 cost
.cost
+= adjust_setup_cost (data
,
4591 add_cost (speed
, TYPE_MODE (ctype
)));
4593 /* Having offset does not affect runtime cost in case it is added to
4594 symbol, but it increases complexity. */
4598 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4600 aratio
= ratio
> 0 ? ratio
: -ratio
;
4602 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4607 *can_autoinc
= false;
4610 /* Just get the expression, expand it and measure the cost. */
4611 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4614 return infinite_cost
;
4617 comp
= build_simple_mem_ref (comp
);
4619 return new_cost (computation_cost (comp
, speed
), 0);
4623 /* Determines the cost of the computation by that USE is expressed
4624 from induction variable CAND. If ADDRESS_P is true, we just need
4625 to create an address from it, otherwise we want to get it into
4626 register. A set of invariants we depend on is stored in
4627 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4628 autoinc addressing is likely. */
4631 get_computation_cost (struct ivopts_data
*data
,
4632 struct iv_use
*use
, struct iv_cand
*cand
,
4633 bool address_p
, bitmap
*depends_on
,
4634 bool *can_autoinc
, int *inv_expr_id
)
4636 return get_computation_cost_at (data
,
4637 use
, cand
, address_p
, depends_on
, use
->stmt
,
4638 can_autoinc
, inv_expr_id
);
4641 /* Determines cost of basing replacement of USE on CAND in a generic
4645 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4646 struct iv_use
*use
, struct iv_cand
*cand
)
4650 int inv_expr_id
= -1;
4652 /* The simple case first -- if we need to express value of the preserved
4653 original biv, the cost is 0. This also prevents us from counting the
4654 cost of increment twice -- once at this use and once in the cost of
4656 if (cand
->pos
== IP_ORIGINAL
4657 && cand
->incremented_at
== use
->stmt
)
4659 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4664 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4665 NULL
, &inv_expr_id
);
4667 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4670 return !infinite_cost_p (cost
);
4673 /* Determines cost of basing replacement of USE on CAND in an address. */
4676 determine_use_iv_cost_address (struct ivopts_data
*data
,
4677 struct iv_use
*use
, struct iv_cand
*cand
)
4681 int inv_expr_id
= -1;
4682 struct iv_use
*sub_use
;
4684 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4685 &can_autoinc
, &inv_expr_id
);
4687 if (cand
->ainc_use
== use
)
4690 cost
.cost
-= cand
->cost_step
;
4691 /* If we generated the candidate solely for exploiting autoincrement
4692 opportunities, and it turns out it can't be used, set the cost to
4693 infinity to make sure we ignore it. */
4694 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4695 cost
= infinite_cost
;
4697 for (sub_use
= use
->next
;
4698 sub_use
&& !infinite_cost_p (cost
);
4699 sub_use
= sub_use
->next
)
4701 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4702 &can_autoinc
, &inv_expr_id
);
4703 cost
= add_costs (cost
, sub_cost
);
4706 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4709 return !infinite_cost_p (cost
);
4712 /* Computes value of candidate CAND at position AT in iteration NITER, and
4713 stores it to VAL. */
4716 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4719 aff_tree step
, delta
, nit
;
4720 struct iv
*iv
= cand
->iv
;
4721 tree type
= TREE_TYPE (iv
->base
);
4722 tree steptype
= type
;
4723 if (POINTER_TYPE_P (type
))
4724 steptype
= sizetype
;
4725 steptype
= unsigned_type_for (type
);
4727 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4728 aff_combination_convert (&step
, steptype
);
4729 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4730 aff_combination_convert (&nit
, steptype
);
4731 aff_combination_mult (&nit
, &step
, &delta
);
4732 if (stmt_after_increment (loop
, cand
, at
))
4733 aff_combination_add (&delta
, &step
);
4735 tree_to_aff_combination (iv
->base
, type
, val
);
4736 if (!POINTER_TYPE_P (type
))
4737 aff_combination_convert (val
, steptype
);
4738 aff_combination_add (val
, &delta
);
4741 /* Returns period of induction variable iv. */
4744 iv_period (struct iv
*iv
)
4746 tree step
= iv
->step
, period
, type
;
4749 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4751 type
= unsigned_type_for (TREE_TYPE (step
));
4752 /* Period of the iv is lcm (step, type_range)/step -1,
4753 i.e., N*type_range/step - 1. Since type range is power
4754 of two, N == (step >> num_of_ending_zeros_binary (step),
4755 so the final result is
4757 (type_range >> num_of_ending_zeros_binary (step)) - 1
4760 pow2div
= num_ending_zeros (step
);
4762 period
= build_low_bits_mask (type
,
4763 (TYPE_PRECISION (type
)
4764 - tree_to_uhwi (pow2div
)));
4769 /* Returns the comparison operator used when eliminating the iv USE. */
4771 static enum tree_code
4772 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4774 struct loop
*loop
= data
->current_loop
;
4778 ex_bb
= gimple_bb (use
->stmt
);
4779 exit
= EDGE_SUCC (ex_bb
, 0);
4780 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4781 exit
= EDGE_SUCC (ex_bb
, 1);
4783 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4786 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4787 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4788 calculation is performed in non-wrapping type.
4790 TODO: More generally, we could test for the situation that
4791 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4792 This would require knowing the sign of OFFSET. */
4795 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4797 enum tree_code code
;
4799 aff_tree aff_e1
, aff_e2
, aff_offset
;
4801 if (!nowrap_type_p (TREE_TYPE (base
)))
4804 base
= expand_simple_operations (base
);
4806 if (TREE_CODE (base
) == SSA_NAME
)
4808 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4810 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4813 code
= gimple_assign_rhs_code (stmt
);
4814 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4817 e1
= gimple_assign_rhs1 (stmt
);
4818 e2
= gimple_assign_rhs2 (stmt
);
4822 code
= TREE_CODE (base
);
4823 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4825 e1
= TREE_OPERAND (base
, 0);
4826 e2
= TREE_OPERAND (base
, 1);
4829 /* Use affine expansion as deeper inspection to prove the equality. */
4830 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4831 &aff_e2
, &data
->name_expansion_cache
);
4832 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4833 &aff_offset
, &data
->name_expansion_cache
);
4834 aff_combination_scale (&aff_offset
, -1);
4838 aff_combination_add (&aff_e2
, &aff_offset
);
4839 if (aff_combination_zero_p (&aff_e2
))
4842 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4843 &aff_e1
, &data
->name_expansion_cache
);
4844 aff_combination_add (&aff_e1
, &aff_offset
);
4845 return aff_combination_zero_p (&aff_e1
);
4847 case POINTER_PLUS_EXPR
:
4848 aff_combination_add (&aff_e2
, &aff_offset
);
4849 return aff_combination_zero_p (&aff_e2
);
4856 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4857 comparison with CAND. NITER describes the number of iterations of
4858 the loops. If successful, the comparison in COMP_P is altered accordingly.
4860 We aim to handle the following situation:
4876 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4877 We aim to optimize this to
4885 while (p < p_0 - a + b);
4887 This preserves the correctness, since the pointer arithmetics does not
4888 overflow. More precisely:
4890 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4891 overflow in computing it or the values of p.
4892 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4893 overflow. To prove this, we use the fact that p_0 = base + a. */
4896 iv_elimination_compare_lt (struct ivopts_data
*data
,
4897 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4898 struct tree_niter_desc
*niter
)
4900 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4901 struct aff_tree nit
, tmpa
, tmpb
;
4902 enum tree_code comp
;
4905 /* We need to know that the candidate induction variable does not overflow.
4906 While more complex analysis may be used to prove this, for now just
4907 check that the variable appears in the original program and that it
4908 is computed in a type that guarantees no overflows. */
4909 cand_type
= TREE_TYPE (cand
->iv
->base
);
4910 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4913 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4914 the calculation of the BOUND could overflow, making the comparison
4916 if (!data
->loop_single_exit_p
)
4919 /* We need to be able to decide whether candidate is increasing or decreasing
4920 in order to choose the right comparison operator. */
4921 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4923 step
= int_cst_value (cand
->iv
->step
);
4925 /* Check that the number of iterations matches the expected pattern:
4926 a + 1 > b ? 0 : b - a - 1. */
4927 mbz
= niter
->may_be_zero
;
4928 if (TREE_CODE (mbz
) == GT_EXPR
)
4930 /* Handle a + 1 > b. */
4931 tree op0
= TREE_OPERAND (mbz
, 0);
4932 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4934 a
= TREE_OPERAND (op0
, 0);
4935 b
= TREE_OPERAND (mbz
, 1);
4940 else if (TREE_CODE (mbz
) == LT_EXPR
)
4942 tree op1
= TREE_OPERAND (mbz
, 1);
4944 /* Handle b < a + 1. */
4945 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4947 a
= TREE_OPERAND (op1
, 0);
4948 b
= TREE_OPERAND (mbz
, 0);
4956 /* Expected number of iterations is B - A - 1. Check that it matches
4957 the actual number, i.e., that B - A - NITER = 1. */
4958 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4959 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4960 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4961 aff_combination_scale (&nit
, -1);
4962 aff_combination_scale (&tmpa
, -1);
4963 aff_combination_add (&tmpb
, &tmpa
);
4964 aff_combination_add (&tmpb
, &nit
);
4965 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4968 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4970 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4972 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4973 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4976 /* Determine the new comparison operator. */
4977 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4978 if (*comp_p
== NE_EXPR
)
4980 else if (*comp_p
== EQ_EXPR
)
4981 *comp_p
= invert_tree_comparison (comp
, false);
4988 /* Check whether it is possible to express the condition in USE by comparison
4989 of candidate CAND. If so, store the value compared with to BOUND, and the
4990 comparison operator to COMP. */
4993 may_eliminate_iv (struct ivopts_data
*data
,
4994 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4995 enum tree_code
*comp
)
5000 struct loop
*loop
= data
->current_loop
;
5002 struct tree_niter_desc
*desc
= NULL
;
5004 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5007 /* For now works only for exits that dominate the loop latch.
5008 TODO: extend to other conditions inside loop body. */
5009 ex_bb
= gimple_bb (use
->stmt
);
5010 if (use
->stmt
!= last_stmt (ex_bb
)
5011 || gimple_code (use
->stmt
) != GIMPLE_COND
5012 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5015 exit
= EDGE_SUCC (ex_bb
, 0);
5016 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5017 exit
= EDGE_SUCC (ex_bb
, 1);
5018 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5021 desc
= niter_for_exit (data
, exit
);
5025 /* Determine whether we can use the variable to test the exit condition.
5026 This is the case iff the period of the induction variable is greater
5027 than the number of iterations for which the exit condition is true. */
5028 period
= iv_period (cand
->iv
);
5030 /* If the number of iterations is constant, compare against it directly. */
5031 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5033 /* See cand_value_at. */
5034 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5036 if (!tree_int_cst_lt (desc
->niter
, period
))
5041 if (tree_int_cst_lt (period
, desc
->niter
))
5046 /* If not, and if this is the only possible exit of the loop, see whether
5047 we can get a conservative estimate on the number of iterations of the
5048 entire loop and compare against that instead. */
5051 widest_int period_value
, max_niter
;
5053 max_niter
= desc
->max
;
5054 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5056 period_value
= wi::to_widest (period
);
5057 if (wi::gtu_p (max_niter
, period_value
))
5059 /* See if we can take advantage of inferred loop bound information. */
5060 if (data
->loop_single_exit_p
)
5062 if (!max_loop_iterations (loop
, &max_niter
))
5064 /* The loop bound is already adjusted by adding 1. */
5065 if (wi::gtu_p (max_niter
, period_value
))
5073 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5075 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5076 aff_combination_to_tree (&bnd
));
5077 *comp
= iv_elimination_compare (data
, use
);
5079 /* It is unlikely that computing the number of iterations using division
5080 would be more profitable than keeping the original induction variable. */
5081 if (expression_expensive_p (*bound
))
5084 /* Sometimes, it is possible to handle the situation that the number of
5085 iterations may be zero unless additional assumtions by using <
5086 instead of != in the exit condition.
5088 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5089 base the exit condition on it. However, that is often too
5091 if (!integer_zerop (desc
->may_be_zero
))
5092 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5097 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5098 be copied, if is is used in the loop body and DATA->body_includes_call. */
5101 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5103 tree sbound
= bound
;
5104 STRIP_NOPS (sbound
);
5106 if (TREE_CODE (sbound
) == SSA_NAME
5107 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5108 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5109 && data
->body_includes_call
)
5110 return COSTS_N_INSNS (1);
5115 /* Determines cost of basing replacement of USE on CAND in a condition. */
5118 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5119 struct iv_use
*use
, struct iv_cand
*cand
)
5121 tree bound
= NULL_TREE
;
5123 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5124 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5126 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5127 tree
*control_var
, *bound_cst
;
5128 enum tree_code comp
= ERROR_MARK
;
5130 /* Only consider real candidates. */
5133 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5138 /* Try iv elimination. */
5139 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5141 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5142 if (elim_cost
.cost
== 0)
5143 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5144 else if (TREE_CODE (bound
) == INTEGER_CST
)
5146 /* If we replace a loop condition 'i < n' with 'p < base + n',
5147 depends_on_elim will have 'base' and 'n' set, which implies
5148 that both 'base' and 'n' will be live during the loop. More likely,
5149 'base + n' will be loop invariant, resulting in only one live value
5150 during the loop. So in that case we clear depends_on_elim and set
5151 elim_inv_expr_id instead. */
5152 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5154 elim_inv_expr_id
= get_expr_id (data
, bound
);
5155 bitmap_clear (depends_on_elim
);
5157 /* The bound is a loop invariant, so it will be only computed
5159 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5162 elim_cost
= infinite_cost
;
5164 /* Try expressing the original giv. If it is compared with an invariant,
5165 note that we cannot get rid of it. */
5166 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5170 /* When the condition is a comparison of the candidate IV against
5171 zero, prefer this IV.
5173 TODO: The constant that we're subtracting from the cost should
5174 be target-dependent. This information should be added to the
5175 target costs for each backend. */
5176 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5177 && integer_zerop (*bound_cst
)
5178 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5179 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5180 elim_cost
.cost
-= 1;
5182 express_cost
= get_computation_cost (data
, use
, cand
, false,
5183 &depends_on_express
, NULL
,
5184 &express_inv_expr_id
);
5185 fd_ivopts_data
= data
;
5186 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5188 /* Count the cost of the original bound as well. */
5189 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5190 if (bound_cost
.cost
== 0)
5191 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5192 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5193 bound_cost
.cost
= 0;
5194 express_cost
.cost
+= bound_cost
.cost
;
5196 /* Choose the better approach, preferring the eliminated IV. */
5197 if (compare_costs (elim_cost
, express_cost
) <= 0)
5200 depends_on
= depends_on_elim
;
5201 depends_on_elim
= NULL
;
5202 inv_expr_id
= elim_inv_expr_id
;
5206 cost
= express_cost
;
5207 depends_on
= depends_on_express
;
5208 depends_on_express
= NULL
;
5211 inv_expr_id
= express_inv_expr_id
;
5214 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5216 if (depends_on_elim
)
5217 BITMAP_FREE (depends_on_elim
);
5218 if (depends_on_express
)
5219 BITMAP_FREE (depends_on_express
);
5221 return !infinite_cost_p (cost
);
5224 /* Determines cost of basing replacement of USE on CAND. Returns false
5225 if USE cannot be based on CAND. */
5228 determine_use_iv_cost (struct ivopts_data
*data
,
5229 struct iv_use
*use
, struct iv_cand
*cand
)
5233 case USE_NONLINEAR_EXPR
:
5234 return determine_use_iv_cost_generic (data
, use
, cand
);
5237 return determine_use_iv_cost_address (data
, use
, cand
);
5240 return determine_use_iv_cost_condition (data
, use
, cand
);
5247 /* Return true if get_computation_cost indicates that autoincrement is
5248 a possibility for the pair of USE and CAND, false otherwise. */
5251 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5252 struct iv_cand
*cand
)
5258 if (use
->type
!= USE_ADDRESS
)
5261 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5262 &can_autoinc
, NULL
);
5264 BITMAP_FREE (depends_on
);
5266 return !infinite_cost_p (cost
) && can_autoinc
;
5269 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5270 use that allows autoincrement, and set their AINC_USE if possible. */
5273 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5277 for (i
= 0; i
< n_iv_cands (data
); i
++)
5279 struct iv_cand
*cand
= iv_cand (data
, i
);
5280 struct iv_use
*closest_before
= NULL
;
5281 struct iv_use
*closest_after
= NULL
;
5282 if (cand
->pos
!= IP_ORIGINAL
)
5285 for (j
= 0; j
< n_iv_uses (data
); j
++)
5287 struct iv_use
*use
= iv_use (data
, j
);
5288 unsigned uid
= gimple_uid (use
->stmt
);
5290 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5293 if (uid
< gimple_uid (cand
->incremented_at
)
5294 && (closest_before
== NULL
5295 || uid
> gimple_uid (closest_before
->stmt
)))
5296 closest_before
= use
;
5298 if (uid
> gimple_uid (cand
->incremented_at
)
5299 && (closest_after
== NULL
5300 || uid
< gimple_uid (closest_after
->stmt
)))
5301 closest_after
= use
;
5304 if (closest_before
!= NULL
5305 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5306 cand
->ainc_use
= closest_before
;
5307 else if (closest_after
!= NULL
5308 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5309 cand
->ainc_use
= closest_after
;
5313 /* Finds the candidates for the induction variables. */
5316 find_iv_candidates (struct ivopts_data
*data
)
5318 /* Add commonly used ivs. */
5319 add_standard_iv_candidates (data
);
5321 /* Add old induction variables. */
5322 add_old_ivs_candidates (data
);
5324 /* Add induction variables derived from uses. */
5325 add_derived_ivs_candidates (data
);
5327 set_autoinc_for_original_candidates (data
);
5329 /* Record the important candidates. */
5330 record_important_candidates (data
);
5333 /* Determines costs of basing the use of the iv on an iv candidate. */
5336 determine_use_iv_costs (struct ivopts_data
*data
)
5340 struct iv_cand
*cand
;
5341 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5343 alloc_use_cost_map (data
);
5345 for (i
= 0; i
< n_iv_uses (data
); i
++)
5347 use
= iv_use (data
, i
);
5349 if (data
->consider_all_candidates
)
5351 for (j
= 0; j
< n_iv_cands (data
); j
++)
5353 cand
= iv_cand (data
, j
);
5354 determine_use_iv_cost (data
, use
, cand
);
5361 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5363 cand
= iv_cand (data
, j
);
5364 if (!determine_use_iv_cost (data
, use
, cand
))
5365 bitmap_set_bit (to_clear
, j
);
5368 /* Remove the candidates for that the cost is infinite from
5369 the list of related candidates. */
5370 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5371 bitmap_clear (to_clear
);
5375 BITMAP_FREE (to_clear
);
5377 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5379 fprintf (dump_file
, "Use-candidate costs:\n");
5381 for (i
= 0; i
< n_iv_uses (data
); i
++)
5383 use
= iv_use (data
, i
);
5385 fprintf (dump_file
, "Use %d:\n", i
);
5386 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5387 for (j
= 0; j
< use
->n_map_members
; j
++)
5389 if (!use
->cost_map
[j
].cand
5390 || infinite_cost_p (use
->cost_map
[j
].cost
))
5393 fprintf (dump_file
, " %d\t%d\t%d\t",
5394 use
->cost_map
[j
].cand
->id
,
5395 use
->cost_map
[j
].cost
.cost
,
5396 use
->cost_map
[j
].cost
.complexity
);
5397 if (use
->cost_map
[j
].depends_on
)
5398 bitmap_print (dump_file
,
5399 use
->cost_map
[j
].depends_on
, "","");
5400 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5401 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5402 fprintf (dump_file
, "\n");
5405 fprintf (dump_file
, "\n");
5407 fprintf (dump_file
, "\n");
5411 /* Determines cost of the candidate CAND. */
5414 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5416 comp_cost cost_base
;
5417 unsigned cost
, cost_step
;
5426 /* There are two costs associated with the candidate -- its increment
5427 and its initialization. The second is almost negligible for any loop
5428 that rolls enough, so we take it just very little into account. */
5430 base
= cand
->iv
->base
;
5431 cost_base
= force_var_cost (data
, base
, NULL
);
5432 /* It will be exceptional that the iv register happens to be initialized with
5433 the proper value at no cost. In general, there will at least be a regcopy
5435 if (cost_base
.cost
== 0)
5436 cost_base
.cost
= COSTS_N_INSNS (1);
5437 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5439 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5441 /* Prefer the original ivs unless we may gain something by replacing it.
5442 The reason is to make debugging simpler; so this is not relevant for
5443 artificial ivs created by other optimization passes. */
5444 if (cand
->pos
!= IP_ORIGINAL
5445 || !SSA_NAME_VAR (cand
->var_before
)
5446 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5449 /* Prefer not to insert statements into latch unless there are some
5450 already (so that we do not create unnecessary jumps). */
5451 if (cand
->pos
== IP_END
5452 && empty_block_p (ip_end_pos (data
->current_loop
)))
5456 cand
->cost_step
= cost_step
;
5459 /* Determines costs of computation of the candidates. */
5462 determine_iv_costs (struct ivopts_data
*data
)
5466 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5468 fprintf (dump_file
, "Candidate costs:\n");
5469 fprintf (dump_file
, " cand\tcost\n");
5472 for (i
= 0; i
< n_iv_cands (data
); i
++)
5474 struct iv_cand
*cand
= iv_cand (data
, i
);
5476 determine_iv_cost (data
, cand
);
5478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5479 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5482 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5483 fprintf (dump_file
, "\n");
5486 /* Calculates cost for having SIZE induction variables. */
5489 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5491 /* We add size to the cost, so that we prefer eliminating ivs
5493 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5494 data
->body_includes_call
);
5497 /* For each size of the induction variable set determine the penalty. */
5500 determine_set_costs (struct ivopts_data
*data
)
5506 struct loop
*loop
= data
->current_loop
;
5509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5511 fprintf (dump_file
, "Global costs:\n");
5512 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5513 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5514 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5515 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5519 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5522 op
= PHI_RESULT (phi
);
5524 if (virtual_operand_p (op
))
5527 if (get_iv (data
, op
))
5533 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5535 struct version_info
*info
= ver_info (data
, j
);
5537 if (info
->inv_id
&& info
->has_nonlin_use
)
5541 data
->regs_used
= n
;
5542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5543 fprintf (dump_file
, " regs_used %d\n", n
);
5545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5547 fprintf (dump_file
, " cost for size:\n");
5548 fprintf (dump_file
, " ivs\tcost\n");
5549 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5550 fprintf (dump_file
, " %d\t%d\n", j
,
5551 ivopts_global_cost_for_size (data
, j
));
5552 fprintf (dump_file
, "\n");
5556 /* Returns true if A is a cheaper cost pair than B. */
5559 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5569 cmp
= compare_costs (a
->cost
, b
->cost
);
5576 /* In case the costs are the same, prefer the cheaper candidate. */
5577 if (a
->cand
->cost
< b
->cand
->cost
)
5584 /* Returns candidate by that USE is expressed in IVS. */
5586 static struct cost_pair
*
5587 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5589 return ivs
->cand_for_use
[use
->id
];
5592 /* Computes the cost field of IVS structure. */
5595 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5597 comp_cost cost
= ivs
->cand_use_cost
;
5599 cost
.cost
+= ivs
->cand_cost
;
5601 cost
.cost
+= ivopts_global_cost_for_size (data
,
5602 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5607 /* Remove invariants in set INVS to set IVS. */
5610 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5618 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5620 ivs
->n_invariant_uses
[iid
]--;
5621 if (ivs
->n_invariant_uses
[iid
] == 0)
5626 /* Set USE not to be expressed by any candidate in IVS. */
5629 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5632 unsigned uid
= use
->id
, cid
;
5633 struct cost_pair
*cp
;
5635 cp
= ivs
->cand_for_use
[uid
];
5641 ivs
->cand_for_use
[uid
] = NULL
;
5642 ivs
->n_cand_uses
[cid
]--;
5644 if (ivs
->n_cand_uses
[cid
] == 0)
5646 bitmap_clear_bit (ivs
->cands
, cid
);
5647 /* Do not count the pseudocandidates. */
5651 ivs
->cand_cost
-= cp
->cand
->cost
;
5653 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5656 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5658 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5660 if (cp
->inv_expr_id
!= -1)
5662 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5663 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5664 ivs
->num_used_inv_expr
--;
5666 iv_ca_recount_cost (data
, ivs
);
5669 /* Add invariants in set INVS to set IVS. */
5672 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5680 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5682 ivs
->n_invariant_uses
[iid
]++;
5683 if (ivs
->n_invariant_uses
[iid
] == 1)
5688 /* Set cost pair for USE in set IVS to CP. */
5691 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5692 struct iv_use
*use
, struct cost_pair
*cp
)
5694 unsigned uid
= use
->id
, cid
;
5696 if (ivs
->cand_for_use
[uid
] == cp
)
5699 if (ivs
->cand_for_use
[uid
])
5700 iv_ca_set_no_cp (data
, ivs
, use
);
5707 ivs
->cand_for_use
[uid
] = cp
;
5708 ivs
->n_cand_uses
[cid
]++;
5709 if (ivs
->n_cand_uses
[cid
] == 1)
5711 bitmap_set_bit (ivs
->cands
, cid
);
5712 /* Do not count the pseudocandidates. */
5716 ivs
->cand_cost
+= cp
->cand
->cost
;
5718 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5721 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5722 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5724 if (cp
->inv_expr_id
!= -1)
5726 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5727 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5728 ivs
->num_used_inv_expr
++;
5730 iv_ca_recount_cost (data
, ivs
);
5734 /* Extend set IVS by expressing USE by some of the candidates in it
5735 if possible. Consider all important candidates if candidates in
5736 set IVS don't give any result. */
5739 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5742 struct cost_pair
*best_cp
= NULL
, *cp
;
5745 struct iv_cand
*cand
;
5747 gcc_assert (ivs
->upto
>= use
->id
);
5751 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5753 cand
= iv_cand (data
, i
);
5754 cp
= get_use_iv_cost (data
, use
, cand
);
5755 if (cheaper_cost_pair (cp
, best_cp
))
5759 if (best_cp
== NULL
)
5761 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5763 cand
= iv_cand (data
, i
);
5764 cp
= get_use_iv_cost (data
, use
, cand
);
5765 if (cheaper_cost_pair (cp
, best_cp
))
5770 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5773 /* Get cost for assignment IVS. */
5776 iv_ca_cost (struct iv_ca
*ivs
)
5778 /* This was a conditional expression but it triggered a bug in
5781 return infinite_cost
;
5786 /* Returns true if all dependences of CP are among invariants in IVS. */
5789 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5794 if (!cp
->depends_on
)
5797 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5799 if (ivs
->n_invariant_uses
[i
] == 0)
5806 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5807 it before NEXT_CHANGE. */
5809 static struct iv_ca_delta
*
5810 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5811 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5813 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5816 change
->old_cp
= old_cp
;
5817 change
->new_cp
= new_cp
;
5818 change
->next_change
= next_change
;
5823 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5826 static struct iv_ca_delta
*
5827 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5829 struct iv_ca_delta
*last
;
5837 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5839 last
->next_change
= l2
;
5844 /* Reverse the list of changes DELTA, forming the inverse to it. */
5846 static struct iv_ca_delta
*
5847 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5849 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5851 for (act
= delta
; act
; act
= next
)
5853 next
= act
->next_change
;
5854 act
->next_change
= prev
;
5857 std::swap (act
->old_cp
, act
->new_cp
);
5863 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5864 reverted instead. */
5867 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5868 struct iv_ca_delta
*delta
, bool forward
)
5870 struct cost_pair
*from
, *to
;
5871 struct iv_ca_delta
*act
;
5874 delta
= iv_ca_delta_reverse (delta
);
5876 for (act
= delta
; act
; act
= act
->next_change
)
5880 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5881 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5885 iv_ca_delta_reverse (delta
);
5888 /* Returns true if CAND is used in IVS. */
5891 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5893 return ivs
->n_cand_uses
[cand
->id
] > 0;
5896 /* Returns number of induction variable candidates in the set IVS. */
5899 iv_ca_n_cands (struct iv_ca
*ivs
)
5901 return ivs
->n_cands
;
5904 /* Free the list of changes DELTA. */
5907 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5909 struct iv_ca_delta
*act
, *next
;
5911 for (act
= *delta
; act
; act
= next
)
5913 next
= act
->next_change
;
5920 /* Allocates new iv candidates assignment. */
5922 static struct iv_ca
*
5923 iv_ca_new (struct ivopts_data
*data
)
5925 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5929 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5930 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5931 nw
->cands
= BITMAP_ALLOC (NULL
);
5934 nw
->cand_use_cost
= no_cost
;
5936 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5938 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5939 nw
->num_used_inv_expr
= 0;
5944 /* Free memory occupied by the set IVS. */
5947 iv_ca_free (struct iv_ca
**ivs
)
5949 free ((*ivs
)->cand_for_use
);
5950 free ((*ivs
)->n_cand_uses
);
5951 BITMAP_FREE ((*ivs
)->cands
);
5952 free ((*ivs
)->n_invariant_uses
);
5953 free ((*ivs
)->used_inv_expr
);
5958 /* Dumps IVS to FILE. */
5961 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5963 const char *pref
= " invariants ";
5965 comp_cost cost
= iv_ca_cost (ivs
);
5967 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5968 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5969 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5970 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5972 for (i
= 0; i
< ivs
->upto
; i
++)
5974 struct iv_use
*use
= iv_use (data
, i
);
5975 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5977 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5978 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5980 fprintf (file
, " use:%d --> ??\n", use
->id
);
5983 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5984 if (ivs
->n_invariant_uses
[i
])
5986 fprintf (file
, "%s%d", pref
, i
);
5989 fprintf (file
, "\n\n");
5992 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5993 new set, and store differences in DELTA. Number of induction variables
5994 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5995 the function will try to find a solution with mimimal iv candidates. */
5998 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5999 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6000 unsigned *n_ivs
, bool min_ncand
)
6005 struct cost_pair
*old_cp
, *new_cp
;
6008 for (i
= 0; i
< ivs
->upto
; i
++)
6010 use
= iv_use (data
, i
);
6011 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6014 && old_cp
->cand
== cand
)
6017 new_cp
= get_use_iv_cost (data
, use
, cand
);
6021 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6024 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6027 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6030 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6031 cost
= iv_ca_cost (ivs
);
6033 *n_ivs
= iv_ca_n_cands (ivs
);
6034 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6039 /* Try narrowing set IVS by removing CAND. Return the cost of
6040 the new set and store the differences in DELTA. START is
6041 the candidate with which we start narrowing. */
6044 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6045 struct iv_cand
*cand
, struct iv_cand
*start
,
6046 struct iv_ca_delta
**delta
)
6050 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6052 struct iv_cand
*cnd
;
6053 comp_cost cost
, best_cost
, acost
;
6056 for (i
= 0; i
< n_iv_uses (data
); i
++)
6058 use
= iv_use (data
, i
);
6060 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6061 if (old_cp
->cand
!= cand
)
6064 best_cost
= iv_ca_cost (ivs
);
6065 /* Start narrowing with START. */
6066 new_cp
= get_use_iv_cost (data
, use
, start
);
6068 if (data
->consider_all_candidates
)
6070 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6072 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6075 cnd
= iv_cand (data
, ci
);
6077 cp
= get_use_iv_cost (data
, use
, cnd
);
6081 iv_ca_set_cp (data
, ivs
, use
, cp
);
6082 acost
= iv_ca_cost (ivs
);
6084 if (compare_costs (acost
, best_cost
) < 0)
6093 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6095 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6098 cnd
= iv_cand (data
, ci
);
6100 cp
= get_use_iv_cost (data
, use
, cnd
);
6104 iv_ca_set_cp (data
, ivs
, use
, cp
);
6105 acost
= iv_ca_cost (ivs
);
6107 if (compare_costs (acost
, best_cost
) < 0)
6114 /* Restore to old cp for use. */
6115 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6119 iv_ca_delta_free (delta
);
6120 return infinite_cost
;
6123 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6126 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6127 cost
= iv_ca_cost (ivs
);
6128 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6133 /* Try optimizing the set of candidates IVS by removing candidates different
6134 from to EXCEPT_CAND from it. Return cost of the new set, and store
6135 differences in DELTA. */
6138 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6139 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6142 struct iv_ca_delta
*act_delta
, *best_delta
;
6144 comp_cost best_cost
, acost
;
6145 struct iv_cand
*cand
;
6148 best_cost
= iv_ca_cost (ivs
);
6150 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6152 cand
= iv_cand (data
, i
);
6154 if (cand
== except_cand
)
6157 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6159 if (compare_costs (acost
, best_cost
) < 0)
6162 iv_ca_delta_free (&best_delta
);
6163 best_delta
= act_delta
;
6166 iv_ca_delta_free (&act_delta
);
6175 /* Recurse to possibly remove other unnecessary ivs. */
6176 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6177 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6178 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6179 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6183 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6184 cheaper local cost for USE than BEST_CP. Return pointer to
6185 the corresponding cost_pair, otherwise just return BEST_CP. */
6187 static struct cost_pair
*
6188 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6189 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6190 struct cost_pair
*best_cp
)
6192 struct iv_cand
*cand
;
6193 struct cost_pair
*cp
;
6195 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6196 if (cand_idx
== old_cand
->id
)
6199 cand
= iv_cand (data
, cand_idx
);
6200 cp
= get_use_iv_cost (data
, use
, cand
);
6201 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6207 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6208 which are used by more than one iv uses. For each of those candidates,
6209 this function tries to represent iv uses under that candidate using
6210 other ones with lower local cost, then tries to prune the new set.
6211 If the new set has lower cost, It returns the new cost after recording
6212 candidate replacement in list DELTA. */
6215 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6216 struct iv_ca_delta
**delta
)
6218 bitmap_iterator bi
, bj
;
6219 unsigned int i
, j
, k
;
6221 struct iv_cand
*cand
;
6222 comp_cost orig_cost
, acost
;
6223 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6224 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6227 orig_cost
= iv_ca_cost (ivs
);
6229 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6231 if (ivs
->n_cand_uses
[i
] == 1
6232 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6235 cand
= iv_cand (data
, i
);
6238 /* Represent uses under current candidate using other ones with
6239 lower local cost. */
6240 for (j
= 0; j
< ivs
->upto
; j
++)
6242 use
= iv_use (data
, j
);
6243 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6245 if (old_cp
->cand
!= cand
)
6249 if (data
->consider_all_candidates
)
6250 for (k
= 0; k
< n_iv_cands (data
); k
++)
6251 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6252 old_cp
->cand
, best_cp
);
6254 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6255 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6256 old_cp
->cand
, best_cp
);
6258 if (best_cp
== old_cp
)
6261 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6263 /* No need for further prune. */
6267 /* Prune the new candidate set. */
6268 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6269 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6270 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6271 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6273 if (compare_costs (acost
, orig_cost
) < 0)
6279 iv_ca_delta_free (&act_delta
);
6285 /* Tries to extend the sets IVS in the best possible way in order
6286 to express the USE. If ORIGINALP is true, prefer candidates from
6287 the original set of IVs, otherwise favor important candidates not
6288 based on any memory object. */
6291 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6292 struct iv_use
*use
, bool originalp
)
6294 comp_cost best_cost
, act_cost
;
6297 struct iv_cand
*cand
;
6298 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6299 struct cost_pair
*cp
;
6301 iv_ca_add_use (data
, ivs
, use
);
6302 best_cost
= iv_ca_cost (ivs
);
6303 cp
= iv_ca_cand_for_use (ivs
, use
);
6306 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6307 iv_ca_set_no_cp (data
, ivs
, use
);
6310 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6311 first try important candidates not based on any memory object. Only if
6312 this fails, try the specific ones. Rationale -- in loops with many
6313 variables the best choice often is to use just one generic biv. If we
6314 added here many ivs specific to the uses, the optimization algorithm later
6315 would be likely to get stuck in a local minimum, thus causing us to create
6316 too many ivs. The approach from few ivs to more seems more likely to be
6317 successful -- starting from few ivs, replacing an expensive use by a
6318 specific iv should always be a win. */
6319 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6321 cand
= iv_cand (data
, i
);
6323 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6326 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6329 if (iv_ca_cand_used_p (ivs
, cand
))
6332 cp
= get_use_iv_cost (data
, use
, cand
);
6336 iv_ca_set_cp (data
, ivs
, use
, cp
);
6337 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6339 iv_ca_set_no_cp (data
, ivs
, use
);
6340 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6342 if (compare_costs (act_cost
, best_cost
) < 0)
6344 best_cost
= act_cost
;
6346 iv_ca_delta_free (&best_delta
);
6347 best_delta
= act_delta
;
6350 iv_ca_delta_free (&act_delta
);
6353 if (infinite_cost_p (best_cost
))
6355 for (i
= 0; i
< use
->n_map_members
; i
++)
6357 cp
= use
->cost_map
+ i
;
6362 /* Already tried this. */
6363 if (cand
->important
)
6365 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6367 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6371 if (iv_ca_cand_used_p (ivs
, cand
))
6375 iv_ca_set_cp (data
, ivs
, use
, cp
);
6376 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6377 iv_ca_set_no_cp (data
, ivs
, use
);
6378 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6381 if (compare_costs (act_cost
, best_cost
) < 0)
6383 best_cost
= act_cost
;
6386 iv_ca_delta_free (&best_delta
);
6387 best_delta
= act_delta
;
6390 iv_ca_delta_free (&act_delta
);
6394 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6395 iv_ca_delta_free (&best_delta
);
6397 return !infinite_cost_p (best_cost
);
6400 /* Finds an initial assignment of candidates to uses. */
6402 static struct iv_ca
*
6403 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6405 struct iv_ca
*ivs
= iv_ca_new (data
);
6408 for (i
= 0; i
< n_iv_uses (data
); i
++)
6409 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6418 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6419 points to a bool variable, this function tries to break local
6420 optimal fixed-point by replacing candidates in IVS if it's true. */
6423 try_improve_iv_set (struct ivopts_data
*data
,
6424 struct iv_ca
*ivs
, bool *try_replace_p
)
6427 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6428 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6429 struct iv_cand
*cand
;
6431 /* Try extending the set of induction variables by one. */
6432 for (i
= 0; i
< n_iv_cands (data
); i
++)
6434 cand
= iv_cand (data
, i
);
6436 if (iv_ca_cand_used_p (ivs
, cand
))
6439 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6443 /* If we successfully added the candidate and the set is small enough,
6444 try optimizing it by removing other candidates. */
6445 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6447 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6448 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6449 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6450 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6453 if (compare_costs (acost
, best_cost
) < 0)
6456 iv_ca_delta_free (&best_delta
);
6457 best_delta
= act_delta
;
6460 iv_ca_delta_free (&act_delta
);
6465 /* Try removing the candidates from the set instead. */
6466 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6468 if (!best_delta
&& *try_replace_p
)
6470 *try_replace_p
= false;
6471 /* So far candidate selecting algorithm tends to choose fewer IVs
6472 so that it can handle cases in which loops have many variables
6473 but the best choice is often to use only one general biv. One
6474 weakness is it can't handle opposite cases, in which different
6475 candidates should be chosen with respect to each use. To solve
6476 the problem, we replace candidates in a manner described by the
6477 comments of iv_ca_replace, thus give general algorithm a chance
6478 to break local optimal fixed-point in these cases. */
6479 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6486 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6487 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6488 iv_ca_delta_free (&best_delta
);
6492 /* Attempts to find the optimal set of induction variables. We do simple
6493 greedy heuristic -- we try to replace at most one candidate in the selected
6494 solution and remove the unused ivs while this improves the cost. */
6496 static struct iv_ca
*
6497 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6500 bool try_replace_p
= true;
6502 /* Get the initial solution. */
6503 set
= get_initial_solution (data
, originalp
);
6506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6507 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6511 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6513 fprintf (dump_file
, "Initial set of candidates:\n");
6514 iv_ca_dump (data
, dump_file
, set
);
6517 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6521 fprintf (dump_file
, "Improved to:\n");
6522 iv_ca_dump (data
, dump_file
, set
);
6529 static struct iv_ca
*
6530 find_optimal_iv_set (struct ivopts_data
*data
)
6533 struct iv_ca
*set
, *origset
;
6535 comp_cost cost
, origcost
;
6537 /* Determine the cost based on a strategy that starts with original IVs,
6538 and try again using a strategy that prefers candidates not based
6540 origset
= find_optimal_iv_set_1 (data
, true);
6541 set
= find_optimal_iv_set_1 (data
, false);
6543 if (!origset
&& !set
)
6546 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6547 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6551 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6552 origcost
.cost
, origcost
.complexity
);
6553 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6554 cost
.cost
, cost
.complexity
);
6557 /* Choose the one with the best cost. */
6558 if (compare_costs (origcost
, cost
) <= 0)
6565 iv_ca_free (&origset
);
6567 for (i
= 0; i
< n_iv_uses (data
); i
++)
6569 use
= iv_use (data
, i
);
6570 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6576 /* Creates a new induction variable corresponding to CAND. */
6579 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6581 gimple_stmt_iterator incr_pos
;
6591 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6595 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6603 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6607 /* Mark that the iv is preserved. */
6608 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6609 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6611 /* Rewrite the increment so that it uses var_before directly. */
6612 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6616 gimple_add_tmp_var (cand
->var_before
);
6618 base
= unshare_expr (cand
->iv
->base
);
6620 create_iv (base
, unshare_expr (cand
->iv
->step
),
6621 cand
->var_before
, data
->current_loop
,
6622 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6625 /* Creates new induction variables described in SET. */
6628 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6631 struct iv_cand
*cand
;
6634 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6636 cand
= iv_cand (data
, i
);
6637 create_new_iv (data
, cand
);
6640 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6642 fprintf (dump_file
, "Selected IV set for loop %d",
6643 data
->current_loop
->num
);
6644 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6645 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6646 LOCATION_LINE (data
->loop_loc
));
6647 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6648 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6650 cand
= iv_cand (data
, i
);
6651 dump_cand (dump_file
, cand
);
6653 fprintf (dump_file
, "\n");
6657 /* Rewrites USE (definition of iv used in a nonlinear expression)
6658 using candidate CAND. */
6661 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6662 struct iv_use
*use
, struct iv_cand
*cand
)
6667 gimple_stmt_iterator bsi
;
6669 /* An important special case -- if we are asked to express value of
6670 the original iv by itself, just exit; there is no need to
6671 introduce a new computation (that might also need casting the
6672 variable to unsigned and back). */
6673 if (cand
->pos
== IP_ORIGINAL
6674 && cand
->incremented_at
== use
->stmt
)
6676 enum tree_code stmt_code
;
6678 gcc_assert (is_gimple_assign (use
->stmt
));
6679 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6681 /* Check whether we may leave the computation unchanged.
6682 This is the case only if it does not rely on other
6683 computations in the loop -- otherwise, the computation
6684 we rely upon may be removed in remove_unused_ivs,
6685 thus leading to ICE. */
6686 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6687 if (stmt_code
== PLUS_EXPR
6688 || stmt_code
== MINUS_EXPR
6689 || stmt_code
== POINTER_PLUS_EXPR
)
6691 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6692 op
= gimple_assign_rhs2 (use
->stmt
);
6693 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6694 op
= gimple_assign_rhs1 (use
->stmt
);
6701 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6705 comp
= get_computation (data
->current_loop
, use
, cand
);
6706 gcc_assert (comp
!= NULL_TREE
);
6708 switch (gimple_code (use
->stmt
))
6711 tgt
= PHI_RESULT (use
->stmt
);
6713 /* If we should keep the biv, do not replace it. */
6714 if (name_info (data
, tgt
)->preserve_biv
)
6717 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6721 tgt
= gimple_assign_lhs (use
->stmt
);
6722 bsi
= gsi_for_stmt (use
->stmt
);
6729 if (!valid_gimple_rhs_p (comp
)
6730 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6731 /* We can't allow re-allocating the stmt as it might be pointed
6733 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6734 >= gimple_num_ops (gsi_stmt (bsi
)))))
6736 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6737 true, GSI_SAME_STMT
);
6738 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6740 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6741 /* As this isn't a plain copy we have to reset alignment
6743 if (SSA_NAME_PTR_INFO (comp
))
6744 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6748 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6750 ass
= gimple_build_assign (tgt
, comp
);
6751 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6753 bsi
= gsi_for_stmt (use
->stmt
);
6754 remove_phi_node (&bsi
, false);
6758 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6759 use
->stmt
= gsi_stmt (bsi
);
6763 /* Performs a peephole optimization to reorder the iv update statement with
6764 a mem ref to enable instruction combining in later phases. The mem ref uses
6765 the iv value before the update, so the reordering transformation requires
6766 adjustment of the offset. CAND is the selected IV_CAND.
6770 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6778 directly propagating t over to (1) will introduce overlapping live range
6779 thus increase register pressure. This peephole transform it into:
6783 t = MEM_REF (base, iv2, 8, 8);
6790 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6793 gimple iv_update
, stmt
;
6795 gimple_stmt_iterator gsi
, gsi_iv
;
6797 if (cand
->pos
!= IP_NORMAL
)
6800 var_after
= cand
->var_after
;
6801 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6803 bb
= gimple_bb (iv_update
);
6804 gsi
= gsi_last_nondebug_bb (bb
);
6805 stmt
= gsi_stmt (gsi
);
6807 /* Only handle conditional statement for now. */
6808 if (gimple_code (stmt
) != GIMPLE_COND
)
6811 gsi_prev_nondebug (&gsi
);
6812 stmt
= gsi_stmt (gsi
);
6813 if (stmt
!= iv_update
)
6816 gsi_prev_nondebug (&gsi
);
6817 if (gsi_end_p (gsi
))
6820 stmt
= gsi_stmt (gsi
);
6821 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6824 if (stmt
!= use
->stmt
)
6827 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6832 fprintf (dump_file
, "Reordering \n");
6833 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6834 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6835 fprintf (dump_file
, "\n");
6838 gsi
= gsi_for_stmt (use
->stmt
);
6839 gsi_iv
= gsi_for_stmt (iv_update
);
6840 gsi_move_before (&gsi_iv
, &gsi
);
6842 cand
->pos
= IP_BEFORE_USE
;
6843 cand
->incremented_at
= use
->stmt
;
6846 /* Rewrites USE (address that is an iv) using candidate CAND. */
6849 rewrite_use_address_1 (struct ivopts_data
*data
,
6850 struct iv_use
*use
, struct iv_cand
*cand
)
6853 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6854 tree base_hint
= NULL_TREE
;
6858 adjust_iv_update_pos (cand
, use
);
6859 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6861 unshare_aff_combination (&aff
);
6863 /* To avoid undefined overflow problems, all IV candidates use unsigned
6864 integer types. The drawback is that this makes it impossible for
6865 create_mem_ref to distinguish an IV that is based on a memory object
6866 from one that represents simply an offset.
6868 To work around this problem, we pass a hint to create_mem_ref that
6869 indicates which variable (if any) in aff is an IV based on a memory
6870 object. Note that we only consider the candidate. If this is not
6871 based on an object, the base of the reference is in some subexpression
6872 of the use -- but these will use pointer types, so they are recognized
6873 by the create_mem_ref heuristics anyway. */
6874 if (cand
->iv
->base_object
)
6875 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6877 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6878 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6879 reference_alias_ptr_type (*use
->op_p
),
6880 iv
, base_hint
, data
->speed
);
6881 copy_ref_info (ref
, *use
->op_p
);
6885 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6886 first use of a group, rewrites sub uses in the group too. */
6889 rewrite_use_address (struct ivopts_data
*data
,
6890 struct iv_use
*use
, struct iv_cand
*cand
)
6892 struct iv_use
*next
;
6894 gcc_assert (use
->sub_id
== 0);
6895 rewrite_use_address_1 (data
, use
, cand
);
6896 update_stmt (use
->stmt
);
6898 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
6900 rewrite_use_address_1 (data
, next
, cand
);
6901 update_stmt (next
->stmt
);
6907 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6911 rewrite_use_compare (struct ivopts_data
*data
,
6912 struct iv_use
*use
, struct iv_cand
*cand
)
6914 tree comp
, *var_p
, op
, bound
;
6915 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6916 enum tree_code compare
;
6917 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6923 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6924 tree var_type
= TREE_TYPE (var
);
6927 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6929 fprintf (dump_file
, "Replacing exit test: ");
6930 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6933 bound
= unshare_expr (fold_convert (var_type
, bound
));
6934 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6936 gsi_insert_seq_on_edge_immediate (
6937 loop_preheader_edge (data
->current_loop
),
6940 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6941 gimple_cond_set_lhs (cond_stmt
, var
);
6942 gimple_cond_set_code (cond_stmt
, compare
);
6943 gimple_cond_set_rhs (cond_stmt
, op
);
6947 /* The induction variable elimination failed; just express the original
6949 comp
= get_computation (data
->current_loop
, use
, cand
);
6950 gcc_assert (comp
!= NULL_TREE
);
6952 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6955 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6956 true, GSI_SAME_STMT
);
6959 /* Rewrites USE using candidate CAND. */
6962 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6966 case USE_NONLINEAR_EXPR
:
6967 rewrite_use_nonlinear_expr (data
, use
, cand
);
6971 rewrite_use_address (data
, use
, cand
);
6975 rewrite_use_compare (data
, use
, cand
);
6982 update_stmt (use
->stmt
);
6985 /* Rewrite the uses using the selected induction variables. */
6988 rewrite_uses (struct ivopts_data
*data
)
6991 struct iv_cand
*cand
;
6994 for (i
= 0; i
< n_iv_uses (data
); i
++)
6996 use
= iv_use (data
, i
);
6997 cand
= use
->selected
;
7000 rewrite_use (data
, use
, cand
);
7004 /* Removes the ivs that are not used after rewriting. */
7007 remove_unused_ivs (struct ivopts_data
*data
)
7011 bitmap toremove
= BITMAP_ALLOC (NULL
);
7013 /* Figure out an order in which to release SSA DEFs so that we don't
7014 release something that we'd have to propagate into a debug stmt
7016 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7018 struct version_info
*info
;
7020 info
= ver_info (data
, j
);
7022 && !integer_zerop (info
->iv
->step
)
7024 && !info
->iv
->have_use_for
7025 && !info
->preserve_biv
)
7027 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7029 tree def
= info
->iv
->ssa_name
;
7031 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7033 imm_use_iterator imm_iter
;
7034 use_operand_p use_p
;
7038 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7040 if (!gimple_debug_bind_p (stmt
))
7043 /* We just want to determine whether to do nothing
7044 (count == 0), to substitute the computed
7045 expression into a single use of the SSA DEF by
7046 itself (count == 1), or to use a debug temp
7047 because the SSA DEF is used multiple times or as
7048 part of a larger expression (count > 1). */
7050 if (gimple_debug_bind_get_value (stmt
) != def
)
7054 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7060 struct iv_use dummy_use
;
7061 struct iv_cand
*best_cand
= NULL
, *cand
;
7062 unsigned i
, best_pref
= 0, cand_pref
;
7064 memset (&dummy_use
, 0, sizeof (dummy_use
));
7065 dummy_use
.iv
= info
->iv
;
7066 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7068 cand
= iv_use (data
, i
)->selected
;
7069 if (cand
== best_cand
)
7071 cand_pref
= operand_equal_p (cand
->iv
->step
,
7075 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7076 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7079 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7081 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7084 best_pref
= cand_pref
;
7091 tree comp
= get_computation_at (data
->current_loop
,
7092 &dummy_use
, best_cand
,
7093 SSA_NAME_DEF_STMT (def
));
7099 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7100 DECL_ARTIFICIAL (vexpr
) = 1;
7101 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7102 if (SSA_NAME_VAR (def
))
7103 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7105 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7107 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7108 gimple_stmt_iterator gsi
;
7110 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7111 gsi
= gsi_after_labels (gimple_bb
7112 (SSA_NAME_DEF_STMT (def
)));
7114 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7116 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7120 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7122 if (!gimple_debug_bind_p (stmt
))
7125 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7126 SET_USE (use_p
, comp
);
7134 release_defs_bitset (toremove
);
7136 BITMAP_FREE (toremove
);
7139 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7140 for hash_map::traverse. */
7143 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7149 /* Frees data allocated by the optimization of a single loop. */
7152 free_loop_data (struct ivopts_data
*data
)
7160 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7161 delete data
->niters
;
7162 data
->niters
= NULL
;
7165 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7167 struct version_info
*info
;
7169 info
= ver_info (data
, i
);
7171 info
->has_nonlin_use
= false;
7172 info
->preserve_biv
= false;
7175 bitmap_clear (data
->relevant
);
7176 bitmap_clear (data
->important_candidates
);
7178 for (i
= 0; i
< n_iv_uses (data
); i
++)
7180 struct iv_use
*use
= iv_use (data
, i
);
7181 struct iv_use
*pre
= use
, *sub
= use
->next
;
7185 gcc_assert (sub
->related_cands
== NULL
);
7186 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7193 BITMAP_FREE (use
->related_cands
);
7194 for (j
= 0; j
< use
->n_map_members
; j
++)
7195 if (use
->cost_map
[j
].depends_on
)
7196 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7197 free (use
->cost_map
);
7200 data
->iv_uses
.truncate (0);
7202 for (i
= 0; i
< n_iv_cands (data
); i
++)
7204 struct iv_cand
*cand
= iv_cand (data
, i
);
7206 if (cand
->depends_on
)
7207 BITMAP_FREE (cand
->depends_on
);
7210 data
->iv_candidates
.truncate (0);
7212 if (data
->version_info_size
< num_ssa_names
)
7214 data
->version_info_size
= 2 * num_ssa_names
;
7215 free (data
->version_info
);
7216 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7219 data
->max_inv_id
= 0;
7221 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7222 SET_DECL_RTL (obj
, NULL_RTX
);
7224 decl_rtl_to_reset
.truncate (0);
7226 data
->inv_expr_tab
->empty ();
7227 data
->inv_expr_id
= 0;
7230 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7234 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7236 free_loop_data (data
);
7237 free (data
->version_info
);
7238 BITMAP_FREE (data
->relevant
);
7239 BITMAP_FREE (data
->important_candidates
);
7241 decl_rtl_to_reset
.release ();
7242 data
->iv_uses
.release ();
7243 data
->iv_candidates
.release ();
7244 delete data
->inv_expr_tab
;
7245 data
->inv_expr_tab
= NULL
;
7246 free_affine_expand_cache (&data
->name_expansion_cache
);
7247 obstack_free (&data
->iv_obstack
, NULL
);
7250 /* Returns true if the loop body BODY includes any function calls. */
7253 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7255 gimple_stmt_iterator gsi
;
7258 for (i
= 0; i
< num_nodes
; i
++)
7259 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7261 gimple stmt
= gsi_stmt (gsi
);
7262 if (is_gimple_call (stmt
)
7263 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7269 /* Optimizes the LOOP. Returns true if anything changed. */
7272 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7274 bool changed
= false;
7275 struct iv_ca
*iv_ca
;
7276 edge exit
= single_dom_exit (loop
);
7279 gcc_assert (!data
->niters
);
7280 data
->current_loop
= loop
;
7281 data
->loop_loc
= find_loop_location (loop
);
7282 data
->speed
= optimize_loop_for_speed_p (loop
);
7284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7286 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7287 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7288 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7289 LOCATION_LINE (data
->loop_loc
));
7290 fprintf (dump_file
, "\n");
7294 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7295 exit
->src
->index
, exit
->dest
->index
);
7296 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7297 fprintf (dump_file
, "\n");
7300 fprintf (dump_file
, "\n");
7303 body
= get_loop_body (loop
);
7304 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7305 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7308 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7310 /* For each ssa name determines whether it behaves as an induction variable
7312 if (!find_induction_variables (data
))
7315 /* Finds interesting uses (item 1). */
7316 find_interesting_uses (data
);
7317 group_address_uses (data
);
7318 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7321 /* Finds candidates for the induction variables (item 2). */
7322 find_iv_candidates (data
);
7324 /* Calculates the costs (item 3, part 1). */
7325 determine_iv_costs (data
);
7326 determine_use_iv_costs (data
);
7327 determine_set_costs (data
);
7329 /* Find the optimal set of induction variables (item 3, part 2). */
7330 iv_ca
= find_optimal_iv_set (data
);
7335 /* Create the new induction variables (item 4, part 1). */
7336 create_new_ivs (data
, iv_ca
);
7337 iv_ca_free (&iv_ca
);
7339 /* Rewrite the uses (item 4, part 2). */
7340 rewrite_uses (data
);
7342 /* Remove the ivs that are unused after rewriting. */
7343 remove_unused_ivs (data
);
7345 /* We have changed the structure of induction variables; it might happen
7346 that definitions in the scev database refer to some of them that were
7351 free_loop_data (data
);
7356 /* Main entry point. Optimizes induction variables in loops. */
7359 tree_ssa_iv_optimize (void)
7362 struct ivopts_data data
;
7364 tree_ssa_iv_optimize_init (&data
);
7366 /* Optimize the loops starting with the innermost ones. */
7367 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7370 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7372 tree_ssa_iv_optimize_loop (&data
, loop
);
7375 tree_ssa_iv_optimize_finalize (&data
);