1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
90 #include "insn-config.h"
100 #include "tree-ssa.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 unsigned use_id
; /* The identifier in the use if it is the case. */
147 bool biv_p
; /* Is it a biv? */
148 bool have_use_for
; /* Do we already have a use for it? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_ADDRESS
, /* Use in an address. */
170 USE_COMPARE
/* Use is a compare. */
173 /* Cost of a computation. */
176 int cost
; /* The runtime cost. */
177 unsigned complexity
; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
183 static const comp_cost no_cost
= {0, 0};
184 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
186 /* The candidate - cost pair. */
189 struct iv_cand
*cand
; /* The candidate. */
190 comp_cost cost
; /* The cost. */
191 bitmap depends_on
; /* The list of invariants that have to be
193 tree value
; /* For final value elimination, the expression for
194 the final value of the iv. For iv elimination,
195 the new bound to compare with. */
196 enum tree_code comp
; /* For iv elimination, the comparison. */
197 int inv_expr_id
; /* Loop invariant expression id. */
203 unsigned id
; /* The id of the use. */
204 unsigned sub_id
; /* The id of the sub use. */
205 enum use_type type
; /* Type of the use. */
206 struct iv
*iv
; /* The induction variable it is based on. */
207 gimple
*stmt
; /* Statement in that it occurs. */
208 tree
*op_p
; /* The place where it occurs. */
209 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
212 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
213 struct cost_pair
*cost_map
;
214 /* The costs wrto the iv candidates. */
216 struct iv_cand
*selected
;
217 /* The selected candidate. */
219 struct iv_use
*next
; /* The next sub use. */
220 tree addr_base
; /* Base address with const offset stripped. */
221 unsigned HOST_WIDE_INT addr_offset
;
222 /* Const offset stripped from base address. */
225 /* The position where the iv is computed. */
228 IP_NORMAL
, /* At the end, just before the exit condition. */
229 IP_END
, /* At the end of the latch block. */
230 IP_BEFORE_USE
, /* Immediately before a specific use. */
231 IP_AFTER_USE
, /* Immediately after a specific use. */
232 IP_ORIGINAL
/* The original biv. */
235 /* The induction variable candidate. */
238 unsigned id
; /* The number of the candidate. */
239 bool important
; /* Whether this is an "important" candidate, i.e. such
240 that it should be considered by all uses. */
241 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
242 gimple
*incremented_at
;/* For original biv, the statement where it is
244 tree var_before
; /* The variable used for it before increment. */
245 tree var_after
; /* The variable used for it after increment. */
246 struct iv
*iv
; /* The value of the candidate. NULL for
247 "pseudocandidate" used to indicate the possibility
248 to replace the final value of an iv by direct
249 computation of the value. */
250 unsigned cost
; /* Cost of the candidate. */
251 unsigned cost_step
; /* Cost of the candidate's increment operation. */
252 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
253 where it is incremented. */
254 bitmap depends_on
; /* The list of invariants that are used in step of the
256 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
260 /* Loop invariant expression hashtable entry. */
261 struct iv_inv_expr_ent
268 /* The data used by the induction variable optimizations. */
270 /* Hashtable helpers. */
272 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
274 static inline hashval_t
hash (const iv_inv_expr_ent
*);
275 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
278 /* Hash function for loop invariant expressions. */
281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
286 /* Hash table equality function for expressions. */
289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
290 const iv_inv_expr_ent
*expr2
)
292 return expr1
->hash
== expr2
->hash
293 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
298 /* The currently optimized loop. */
299 struct loop
*current_loop
;
300 source_location loop_loc
;
302 /* Numbers of iterations for all exits of the current loop. */
303 hash_map
<edge
, tree_niter_desc
*> *niters
;
305 /* Number of registers used in it. */
308 /* The size of version_info array allocated. */
309 unsigned version_info_size
;
311 /* The array of information for the ssa names. */
312 struct version_info
*version_info
;
314 /* The hashtable of loop invariant expressions created
316 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
318 /* Loop invariant expression id. */
321 /* The bitmap of indices in version_info whose value was changed. */
324 /* The uses of induction variables. */
325 vec
<iv_use
*> iv_uses
;
327 /* The candidates. */
328 vec
<iv_cand
*> iv_candidates
;
330 /* A bitmap of important candidates. */
331 bitmap important_candidates
;
333 /* Cache used by tree_to_aff_combination_expand. */
334 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
336 /* The maximum invariant id. */
339 /* Number of no_overflow BIVs which are not used in memory address. */
340 unsigned bivs_not_used_in_addr
;
342 /* Obstack for iv structure. */
343 struct obstack iv_obstack
;
345 /* Whether to consider just related and important candidates when replacing a
347 bool consider_all_candidates
;
349 /* Are we optimizing for speed? */
352 /* Whether the loop body includes any function calls. */
353 bool body_includes_call
;
355 /* Whether the loop body can only be exited via single exit. */
356 bool loop_single_exit_p
;
359 /* An assignment of iv candidates to uses. */
363 /* The number of uses covered by the assignment. */
366 /* Number of uses that cannot be expressed by the candidates in the set. */
369 /* Candidate assigned to a use, together with the related costs. */
370 struct cost_pair
**cand_for_use
;
372 /* Number of times each candidate is used. */
373 unsigned *n_cand_uses
;
375 /* The candidates used. */
378 /* The number of candidates in the set. */
381 /* Total number of registers needed. */
384 /* Total cost of expressing uses. */
385 comp_cost cand_use_cost
;
387 /* Total cost of candidates. */
390 /* Number of times each invariant is used. */
391 unsigned *n_invariant_uses
;
393 /* The array holding the number of uses of each loop
394 invariant expressions created by ivopt. */
395 unsigned *used_inv_expr
;
397 /* The number of created loop invariants. */
398 unsigned num_used_inv_expr
;
400 /* Total cost of the assignment. */
404 /* Difference of two iv candidate assignments. */
411 /* An old assignment (for rollback purposes). */
412 struct cost_pair
*old_cp
;
414 /* A new assignment. */
415 struct cost_pair
*new_cp
;
417 /* Next change in the list. */
418 struct iv_ca_delta
*next_change
;
421 /* Bound on number of candidates below that all candidates are considered. */
423 #define CONSIDER_ALL_CANDIDATES_BOUND \
424 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
426 /* If there are more iv occurrences, we just give up (it is quite unlikely that
427 optimizing such a loop would help, and it would take ages). */
429 #define MAX_CONSIDERED_USES \
430 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
432 /* If there are at most this number of ivs in the set, try removing unnecessary
433 ivs from the set always. */
435 #define ALWAYS_PRUNE_CAND_SET_BOUND \
436 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
438 /* The list of trees for that the decl_rtl field must be reset is stored
441 static vec
<tree
> decl_rtl_to_reset
;
443 static comp_cost
force_expr_to_var_cost (tree
, bool);
445 /* Number of uses recorded in DATA. */
447 static inline unsigned
448 n_iv_uses (struct ivopts_data
*data
)
450 return data
->iv_uses
.length ();
453 /* Ith use recorded in DATA. */
455 static inline struct iv_use
*
456 iv_use (struct ivopts_data
*data
, unsigned i
)
458 return data
->iv_uses
[i
];
461 /* Number of candidates recorded in DATA. */
463 static inline unsigned
464 n_iv_cands (struct ivopts_data
*data
)
466 return data
->iv_candidates
.length ();
469 /* Ith candidate recorded in DATA. */
471 static inline struct iv_cand
*
472 iv_cand (struct ivopts_data
*data
, unsigned i
)
474 return data
->iv_candidates
[i
];
477 /* The single loop exit if it dominates the latch, NULL otherwise. */
480 single_dom_exit (struct loop
*loop
)
482 edge exit
= single_exit (loop
);
487 if (!just_once_each_iteration_p (loop
, exit
->src
))
493 /* Dumps information about the induction variable IV to FILE. */
496 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
498 if (iv
->ssa_name
&& dump_name
)
500 fprintf (file
, "ssa name ");
501 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
502 fprintf (file
, "\n");
505 fprintf (file
, " type ");
506 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
507 fprintf (file
, "\n");
511 fprintf (file
, " base ");
512 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
513 fprintf (file
, "\n");
515 fprintf (file
, " step ");
516 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
517 fprintf (file
, "\n");
521 fprintf (file
, " invariant ");
522 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
523 fprintf (file
, "\n");
528 fprintf (file
, " base object ");
529 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
530 fprintf (file
, "\n");
534 fprintf (file
, " is a biv\n");
537 fprintf (file
, " iv doesn't overflow wrto loop niter\n");
540 /* Dumps information about the USE to FILE. */
543 dump_use (FILE *file
, struct iv_use
*use
)
545 fprintf (file
, "use %d", use
->id
);
547 fprintf (file
, ".%d", use
->sub_id
);
549 fprintf (file
, "\n");
553 case USE_NONLINEAR_EXPR
:
554 fprintf (file
, " generic\n");
558 fprintf (file
, " address\n");
562 fprintf (file
, " compare\n");
569 fprintf (file
, " in statement ");
570 print_gimple_stmt (file
, use
->stmt
, 0, 0);
571 fprintf (file
, "\n");
573 fprintf (file
, " at position ");
575 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
576 fprintf (file
, "\n");
578 dump_iv (file
, use
->iv
, false);
580 if (use
->related_cands
)
582 fprintf (file
, " related candidates ");
583 dump_bitmap (file
, use
->related_cands
);
587 /* Dumps information about the uses to FILE. */
590 dump_uses (FILE *file
, struct ivopts_data
*data
)
595 for (i
= 0; i
< n_iv_uses (data
); i
++)
597 use
= iv_use (data
, i
);
600 dump_use (file
, use
);
604 fprintf (file
, "\n");
608 /* Dumps information about induction variable candidate CAND to FILE. */
611 dump_cand (FILE *file
, struct iv_cand
*cand
)
613 struct iv
*iv
= cand
->iv
;
615 fprintf (file
, "candidate %d%s\n",
616 cand
->id
, cand
->important
? " (important)" : "");
618 if (cand
->depends_on
)
620 fprintf (file
, " depends on ");
621 dump_bitmap (file
, cand
->depends_on
);
626 fprintf (file
, " final value replacement\n");
630 if (cand
->var_before
)
632 fprintf (file
, " var_before ");
633 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
634 fprintf (file
, "\n");
638 fprintf (file
, " var_after ");
639 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
640 fprintf (file
, "\n");
646 fprintf (file
, " incremented before exit test\n");
650 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
654 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
658 fprintf (file
, " incremented at end\n");
662 fprintf (file
, " original biv\n");
666 dump_iv (file
, iv
, false);
669 /* Returns the info for ssa version VER. */
671 static inline struct version_info
*
672 ver_info (struct ivopts_data
*data
, unsigned ver
)
674 return data
->version_info
+ ver
;
677 /* Returns the info for ssa name NAME. */
679 static inline struct version_info
*
680 name_info (struct ivopts_data
*data
, tree name
)
682 return ver_info (data
, SSA_NAME_VERSION (name
));
685 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
689 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
691 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
695 if (sbb
== loop
->latch
)
701 return stmt
== last_stmt (bb
);
704 /* Returns true if STMT if after the place where the original induction
705 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
706 if the positions are identical. */
709 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
711 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
712 basic_block stmt_bb
= gimple_bb (stmt
);
714 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
717 if (stmt_bb
!= cand_bb
)
721 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
723 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
726 /* Returns true if STMT if after the place where the induction variable
727 CAND is incremented in LOOP. */
730 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
738 return stmt_after_ip_normal_pos (loop
, stmt
);
742 return stmt_after_inc_pos (cand
, stmt
, false);
745 return stmt_after_inc_pos (cand
, stmt
, true);
752 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
755 abnormal_ssa_name_p (tree exp
)
760 if (TREE_CODE (exp
) != SSA_NAME
)
763 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
766 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
767 abnormal phi node. Callback for for_each_index. */
770 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
771 void *data ATTRIBUTE_UNUSED
)
773 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
775 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
777 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
781 return !abnormal_ssa_name_p (*index
);
784 /* Returns true if EXPR contains a ssa name that occurs in an
785 abnormal phi node. */
788 contains_abnormal_ssa_name_p (tree expr
)
791 enum tree_code_class codeclass
;
796 code
= TREE_CODE (expr
);
797 codeclass
= TREE_CODE_CLASS (code
);
799 if (code
== SSA_NAME
)
800 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
802 if (code
== INTEGER_CST
803 || is_gimple_min_invariant (expr
))
806 if (code
== ADDR_EXPR
)
807 return !for_each_index (&TREE_OPERAND (expr
, 0),
808 idx_contains_abnormal_ssa_name_p
,
811 if (code
== COND_EXPR
)
812 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
813 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
814 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
820 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
837 /* Returns the structure describing number of iterations determined from
838 EXIT of DATA->current_loop, or NULL if something goes wrong. */
840 static struct tree_niter_desc
*
841 niter_for_exit (struct ivopts_data
*data
, edge exit
)
843 struct tree_niter_desc
*desc
;
844 tree_niter_desc
**slot
;
848 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
852 slot
= data
->niters
->get (exit
);
856 /* Try to determine number of iterations. We cannot safely work with ssa
857 names that appear in phi nodes on abnormal edges, so that we do not
858 create overlapping life ranges for them (PR 27283). */
859 desc
= XNEW (struct tree_niter_desc
);
860 if (!number_of_iterations_exit (data
->current_loop
,
862 || contains_abnormal_ssa_name_p (desc
->niter
))
867 data
->niters
->put (exit
, desc
);
875 /* Returns the structure describing number of iterations determined from
876 single dominating exit of DATA->current_loop, or NULL if something
879 static struct tree_niter_desc
*
880 niter_for_single_dom_exit (struct ivopts_data
*data
)
882 edge exit
= single_dom_exit (data
->current_loop
);
887 return niter_for_exit (data
, exit
);
890 /* Initializes data structures used by the iv optimization pass, stored
894 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
896 data
->version_info_size
= 2 * num_ssa_names
;
897 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
898 data
->relevant
= BITMAP_ALLOC (NULL
);
899 data
->important_candidates
= BITMAP_ALLOC (NULL
);
900 data
->max_inv_id
= 0;
902 data
->iv_uses
.create (20);
903 data
->iv_candidates
.create (20);
904 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
905 data
->inv_expr_id
= 0;
906 data
->name_expansion_cache
= NULL
;
907 decl_rtl_to_reset
.create (20);
908 gcc_obstack_init (&data
->iv_obstack
);
911 /* Returns a memory object to that EXPR points. In case we are able to
912 determine that it does not point to any such object, NULL is returned. */
915 determine_base_object (tree expr
)
917 enum tree_code code
= TREE_CODE (expr
);
920 /* If this is a pointer casted to any type, we need to determine
921 the base object for the pointer; so handle conversions before
922 throwing away non-pointer expressions. */
923 if (CONVERT_EXPR_P (expr
))
924 return determine_base_object (TREE_OPERAND (expr
, 0));
926 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
935 obj
= TREE_OPERAND (expr
, 0);
936 base
= get_base_address (obj
);
941 if (TREE_CODE (base
) == MEM_REF
)
942 return determine_base_object (TREE_OPERAND (base
, 0));
944 return fold_convert (ptr_type_node
,
945 build_fold_addr_expr (base
));
947 case POINTER_PLUS_EXPR
:
948 return determine_base_object (TREE_OPERAND (expr
, 0));
952 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
956 return fold_convert (ptr_type_node
, expr
);
960 /* Return true if address expression with non-DECL_P operand appears
964 contain_complex_addr_expr (tree expr
)
969 switch (TREE_CODE (expr
))
971 case POINTER_PLUS_EXPR
:
974 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
975 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
979 return (!DECL_P (TREE_OPERAND (expr
, 0)));
988 /* Allocates an induction variable with given initial value BASE and step STEP
989 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
992 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
993 bool no_overflow
= false)
996 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
998 gcc_assert (step
!= NULL_TREE
);
1000 /* Lower address expression in base except ones with DECL_P as operand.
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1006 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1007 || contain_complex_addr_expr (expr
))
1010 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1011 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1015 iv
->base_object
= determine_base_object (base
);
1018 iv
->have_use_for
= false;
1020 iv
->ssa_name
= NULL_TREE
;
1021 iv
->no_overflow
= no_overflow
;
1022 iv
->have_address_use
= false;
1027 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1028 doesn't overflow. */
1031 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1034 struct version_info
*info
= name_info (data
, iv
);
1036 gcc_assert (!info
->iv
);
1038 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1039 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1040 info
->iv
->ssa_name
= iv
;
1043 /* Finds induction variable declaration for VAR. */
1046 get_iv (struct ivopts_data
*data
, tree var
)
1049 tree type
= TREE_TYPE (var
);
1051 if (!POINTER_TYPE_P (type
)
1052 && !INTEGRAL_TYPE_P (type
))
1055 if (!name_info (data
, var
)->iv
)
1057 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1060 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1061 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1064 return name_info (data
, var
)->iv
;
1067 /* Return the first non-invariant ssa var found in EXPR. */
1070 extract_single_var_from_expr (tree expr
)
1074 enum tree_code code
;
1076 if (!expr
|| is_gimple_min_invariant (expr
))
1079 code
= TREE_CODE (expr
);
1080 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1082 n
= TREE_OPERAND_LENGTH (expr
);
1083 for (i
= 0; i
< n
; i
++)
1085 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1091 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1094 /* Finds basic ivs. */
1097 find_bivs (struct ivopts_data
*data
)
1101 tree step
, type
, base
, stop
;
1103 struct loop
*loop
= data
->current_loop
;
1106 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1110 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1113 if (virtual_operand_p (PHI_RESULT (phi
)))
1116 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1119 if (integer_zerop (iv
.step
))
1123 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1124 /* Stop expanding iv base at the first ssa var referred by iv step.
1125 Ideally we should stop at any ssa var, because that's expensive
1126 and unusual to happen, we just do it on the first one.
1128 See PR64705 for the rationale. */
1129 stop
= extract_single_var_from_expr (step
);
1130 base
= expand_simple_operations (base
, stop
);
1131 if (contains_abnormal_ssa_name_p (base
)
1132 || contains_abnormal_ssa_name_p (step
))
1135 type
= TREE_TYPE (PHI_RESULT (phi
));
1136 base
= fold_convert (type
, base
);
1139 if (POINTER_TYPE_P (type
))
1140 step
= convert_to_ptrofftype (step
);
1142 step
= fold_convert (type
, step
);
1145 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1152 /* Marks basic ivs. */
1155 mark_bivs (struct ivopts_data
*data
)
1160 struct iv
*iv
, *incr_iv
;
1161 struct loop
*loop
= data
->current_loop
;
1162 basic_block incr_bb
;
1165 data
->bivs_not_used_in_addr
= 0;
1166 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1170 iv
= get_iv (data
, PHI_RESULT (phi
));
1174 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1175 def
= SSA_NAME_DEF_STMT (var
);
1176 /* Don't mark iv peeled from other one as biv. */
1178 && gimple_code (def
) == GIMPLE_PHI
1179 && gimple_bb (def
) == loop
->header
)
1182 incr_iv
= get_iv (data
, var
);
1186 /* If the increment is in the subloop, ignore it. */
1187 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1188 if (incr_bb
->loop_father
!= data
->current_loop
1189 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1193 incr_iv
->biv_p
= true;
1194 if (iv
->no_overflow
)
1195 data
->bivs_not_used_in_addr
++;
1196 if (incr_iv
->no_overflow
)
1197 data
->bivs_not_used_in_addr
++;
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202 parameters to IV. */
1205 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1208 struct loop
*loop
= data
->current_loop
;
1210 iv
->base
= NULL_TREE
;
1211 iv
->step
= NULL_TREE
;
1213 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1216 lhs
= gimple_assign_lhs (stmt
);
1217 if (TREE_CODE (lhs
) != SSA_NAME
)
1220 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1223 /* Stop expanding iv base at the first ssa var referred by iv step.
1224 Ideally we should stop at any ssa var, because that's expensive
1225 and unusual to happen, we just do it on the first one.
1227 See PR64705 for the rationale. */
1228 stop
= extract_single_var_from_expr (iv
->step
);
1229 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1230 if (contains_abnormal_ssa_name_p (iv
->base
)
1231 || contains_abnormal_ssa_name_p (iv
->step
))
1234 /* If STMT could throw, then do not consider STMT as defining a GIV.
1235 While this will suppress optimizations, we can not safely delete this
1236 GIV and associated statements, even if it appears it is not used. */
1237 if (stmt_could_throw_p (stmt
))
1243 /* Finds general ivs in statement STMT. */
1246 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1250 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1253 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1256 /* Finds general ivs in basic block BB. */
1259 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1261 gimple_stmt_iterator bsi
;
1263 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1264 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1267 /* Finds general ivs. */
1270 find_givs (struct ivopts_data
*data
)
1272 struct loop
*loop
= data
->current_loop
;
1273 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1276 for (i
= 0; i
< loop
->num_nodes
; i
++)
1277 find_givs_in_bb (data
, body
[i
]);
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282 variable and if so, its initial value and step. */
1285 find_induction_variables (struct ivopts_data
*data
)
1290 if (!find_bivs (data
))
1296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1298 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1302 fprintf (dump_file
, " number of iterations ");
1303 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1304 if (!integer_zerop (niter
->may_be_zero
))
1306 fprintf (dump_file
, "; zero if ");
1307 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1309 fprintf (dump_file
, "\n\n");
1312 fprintf (dump_file
, "Induction variables:\n\n");
1314 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1316 if (ver_info (data
, i
)->iv
)
1317 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326 is the const offset stripped from IV base. For uses of other types,
1327 ADDR_BASE and ADDR_OFFSET are zero by default. */
1329 static struct iv_use
*
1330 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1331 gimple
*stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1332 unsigned HOST_WIDE_INT addr_offset
= 0)
1334 struct iv_use
*use
= XCNEW (struct iv_use
);
1336 use
->id
= n_iv_uses (data
);
1338 use
->type
= use_type
;
1342 use
->related_cands
= BITMAP_ALLOC (NULL
);
1344 use
->addr_base
= addr_base
;
1345 use
->addr_offset
= addr_offset
;
1347 data
->iv_uses
.safe_push (use
);
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353 The sub use is recorded under the one whose use id is ID_GROUP. */
1355 static struct iv_use
*
1356 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1357 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
,
1358 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1359 unsigned int id_group
)
1361 struct iv_use
*use
= XCNEW (struct iv_use
);
1362 struct iv_use
*group
= iv_use (data
, id_group
);
1364 use
->id
= group
->id
;
1366 use
->type
= use_type
;
1370 use
->related_cands
= NULL
;
1371 use
->addr_base
= addr_base
;
1372 use
->addr_offset
= addr_offset
;
1374 /* Sub use list is maintained in offset ascending order. */
1375 if (addr_offset
<= group
->addr_offset
)
1377 use
->related_cands
= group
->related_cands
;
1378 group
->related_cands
= NULL
;
1380 data
->iv_uses
[id_group
] = use
;
1388 group
= group
->next
;
1390 while (group
&& addr_offset
> group
->addr_offset
);
1391 use
->next
= pre
->next
;
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399 NONLINEAR_USE is true if the invariant is used in a way we do not
1400 handle specially. */
1403 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1406 struct version_info
*info
;
1408 if (TREE_CODE (op
) != SSA_NAME
1409 || virtual_operand_p (op
))
1412 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1414 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1417 info
= name_info (data
, op
);
1419 info
->has_nonlin_use
|= nonlinear_use
;
1421 info
->inv_id
= ++data
->max_inv_id
;
1422 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1425 /* Checks whether the use OP is interesting and if so, records it. */
1427 static struct iv_use
*
1428 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1434 if (TREE_CODE (op
) != SSA_NAME
)
1437 iv
= get_iv (data
, op
);
1441 if (iv
->have_use_for
)
1443 use
= iv_use (data
, iv
->use_id
);
1445 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1449 if (integer_zerop (iv
->step
))
1451 record_invariant (data
, op
, true);
1454 iv
->have_use_for
= true;
1456 stmt
= SSA_NAME_DEF_STMT (op
);
1457 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1458 || is_gimple_assign (stmt
));
1460 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1461 iv
->use_id
= use
->id
;
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467 of an induction variable and an invariant. If this is the case,
1468 CONTROL_VAR is set to location of the iv, BOUND to the location of
1469 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470 induction variable descriptions, and true is returned. If this is not
1471 the case, CONTROL_VAR and BOUND are set to the arguments of the
1472 condition and false is returned. */
1475 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1476 tree
**control_var
, tree
**bound
,
1477 struct iv
**iv_var
, struct iv
**iv_bound
)
1479 /* The objects returned when COND has constant operands. */
1480 static struct iv const_iv
;
1482 tree
*op0
= &zero
, *op1
= &zero
;
1483 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1486 if (gimple_code (stmt
) == GIMPLE_COND
)
1488 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1489 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1490 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1494 op0
= gimple_assign_rhs1_ptr (stmt
);
1495 op1
= gimple_assign_rhs2_ptr (stmt
);
1498 zero
= integer_zero_node
;
1499 const_iv
.step
= integer_zero_node
;
1501 if (TREE_CODE (*op0
) == SSA_NAME
)
1502 iv0
= get_iv (data
, *op0
);
1503 if (TREE_CODE (*op1
) == SSA_NAME
)
1504 iv1
= get_iv (data
, *op1
);
1506 /* Exactly one of the compared values must be an iv, and the other one must
1511 if (integer_zerop (iv0
->step
))
1513 /* Control variable may be on the other side. */
1514 std::swap (op0
, op1
);
1515 std::swap (iv0
, iv1
);
1517 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1532 /* Checks whether the condition in STMT is interesting and if so,
1536 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1538 tree
*var_p
, *bound_p
;
1541 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1543 find_interesting_uses_op (data
, *var_p
);
1544 find_interesting_uses_op (data
, *bound_p
);
1548 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552 relative to the loop LOOP, i.e. if all its operands are defined
1553 outside of the returned loop. Returns NULL if EXPR is not
1554 even obviously invariant in LOOP. */
1557 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1562 if (is_gimple_min_invariant (expr
))
1563 return current_loops
->tree_root
;
1565 if (TREE_CODE (expr
) == SSA_NAME
)
1567 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1570 if (flow_bb_inside_loop_p (loop
, def_bb
))
1572 return superloop_at_depth (loop
,
1573 loop_depth (def_bb
->loop_father
) + 1);
1576 return current_loops
->tree_root
;
1582 unsigned maxdepth
= 0;
1583 len
= TREE_OPERAND_LENGTH (expr
);
1584 for (i
= 0; i
< len
; i
++)
1586 struct loop
*ivloop
;
1587 if (!TREE_OPERAND (expr
, i
))
1590 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1593 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1596 return superloop_at_depth (loop
, maxdepth
);
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600 i.e. if all its operands are defined outside of the LOOP. LOOP
1601 should not be the function body. */
1604 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1609 gcc_assert (loop_depth (loop
) > 0);
1611 if (is_gimple_min_invariant (expr
))
1614 if (TREE_CODE (expr
) == SSA_NAME
)
1616 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1618 && flow_bb_inside_loop_p (loop
, def_bb
))
1627 len
= TREE_OPERAND_LENGTH (expr
);
1628 for (i
= 0; i
< len
; i
++)
1629 if (TREE_OPERAND (expr
, i
)
1630 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1636 /* Given expression EXPR which computes inductive values with respect
1637 to loop recorded in DATA, this function returns biv from which EXPR
1638 is derived by tracing definition chains of ssa variables in EXPR. */
1641 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1646 enum tree_code code
;
1649 if (expr
== NULL_TREE
)
1652 if (is_gimple_min_invariant (expr
))
1655 code
= TREE_CODE (expr
);
1656 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1658 n
= TREE_OPERAND_LENGTH (expr
);
1659 for (i
= 0; i
< n
; i
++)
1661 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1667 /* Stop if it's not ssa name. */
1668 if (code
!= SSA_NAME
)
1671 iv
= get_iv (data
, expr
);
1672 if (!iv
|| integer_zerop (iv
->step
))
1677 stmt
= SSA_NAME_DEF_STMT (expr
);
1678 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1681 use_operand_p use_p
;
1683 if (virtual_operand_p (gimple_phi_result (phi
)))
1686 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1688 tree use
= USE_FROM_PTR (use_p
);
1689 iv
= find_deriving_biv_for_expr (data
, use
);
1695 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1698 e1
= gimple_assign_rhs1 (stmt
);
1699 code
= gimple_assign_rhs_code (stmt
);
1700 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1701 return find_deriving_biv_for_expr (data
, e1
);
1708 case POINTER_PLUS_EXPR
:
1709 /* Increments, decrements and multiplications by a constant
1711 e2
= gimple_assign_rhs2 (stmt
);
1712 iv
= find_deriving_biv_for_expr (data
, e2
);
1718 /* Casts are simple. */
1719 return find_deriving_biv_for_expr (data
, e1
);
1728 /* Record BIV, its predecessor and successor that they are used in
1729 address type uses. */
1732 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1735 tree type
, base_1
, base_2
;
1738 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1739 || biv
->have_address_use
|| !biv
->no_overflow
)
1742 type
= TREE_TYPE (biv
->base
);
1743 if (!INTEGRAL_TYPE_P (type
))
1746 biv
->have_address_use
= true;
1747 data
->bivs_not_used_in_addr
--;
1748 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1749 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1751 struct iv
*iv
= ver_info (data
, i
)->iv
;
1753 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1754 || iv
->have_address_use
|| !iv
->no_overflow
)
1757 if (type
!= TREE_TYPE (iv
->base
)
1758 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1761 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1764 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1765 if (operand_equal_p (base_1
, iv
->base
, 0)
1766 || operand_equal_p (base_2
, biv
->base
, 0))
1768 iv
->have_address_use
= true;
1769 data
->bivs_not_used_in_addr
--;
1774 /* Cumulates the steps of indices into DATA and replaces their values with the
1775 initial ones. Returns false when the value of the index cannot be determined.
1776 Callback for for_each_index. */
1778 struct ifs_ivopts_data
1780 struct ivopts_data
*ivopts_data
;
1786 idx_find_step (tree base
, tree
*idx
, void *data
)
1788 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1790 bool use_overflow_semantics
= false;
1791 tree step
, iv_base
, iv_step
, lbound
, off
;
1792 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1794 /* If base is a component ref, require that the offset of the reference
1796 if (TREE_CODE (base
) == COMPONENT_REF
)
1798 off
= component_ref_field_offset (base
);
1799 return expr_invariant_in_loop_p (loop
, off
);
1802 /* If base is array, first check whether we will be able to move the
1803 reference out of the loop (in order to take its address in strength
1804 reduction). In order for this to work we need both lower bound
1805 and step to be loop invariants. */
1806 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1808 /* Moreover, for a range, the size needs to be invariant as well. */
1809 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1810 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1813 step
= array_ref_element_size (base
);
1814 lbound
= array_ref_low_bound (base
);
1816 if (!expr_invariant_in_loop_p (loop
, step
)
1817 || !expr_invariant_in_loop_p (loop
, lbound
))
1821 if (TREE_CODE (*idx
) != SSA_NAME
)
1824 iv
= get_iv (dta
->ivopts_data
, *idx
);
1828 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1829 *&x[0], which is not folded and does not trigger the
1830 ARRAY_REF path below. */
1833 if (integer_zerop (iv
->step
))
1836 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1838 step
= array_ref_element_size (base
);
1840 /* We only handle addresses whose step is an integer constant. */
1841 if (TREE_CODE (step
) != INTEGER_CST
)
1845 /* The step for pointer arithmetics already is 1 byte. */
1846 step
= size_one_node
;
1850 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1851 use_overflow_semantics
= true;
1853 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1854 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1855 use_overflow_semantics
))
1857 /* The index might wrap. */
1861 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1862 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1864 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
1867 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
1869 record_biv_for_address_use (dta
->ivopts_data
, iv
);
1874 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1875 object is passed to it in DATA. */
1878 idx_record_use (tree base
, tree
*idx
,
1881 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1882 find_interesting_uses_op (data
, *idx
);
1883 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1885 find_interesting_uses_op (data
, array_ref_element_size (base
));
1886 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1891 /* If we can prove that TOP = cst * BOT for some constant cst,
1892 store cst to MUL and return true. Otherwise return false.
1893 The returned value is always sign-extended, regardless of the
1894 signedness of TOP and BOT. */
1897 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1900 enum tree_code code
;
1901 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1902 widest_int res
, p0
, p1
;
1907 if (operand_equal_p (top
, bot
, 0))
1913 code
= TREE_CODE (top
);
1917 mby
= TREE_OPERAND (top
, 1);
1918 if (TREE_CODE (mby
) != INTEGER_CST
)
1921 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1924 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1929 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1930 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1933 if (code
== MINUS_EXPR
)
1935 *mul
= wi::sext (p0
+ p1
, precision
);
1939 if (TREE_CODE (bot
) != INTEGER_CST
)
1942 p0
= widest_int::from (top
, SIGNED
);
1943 p1
= widest_int::from (bot
, SIGNED
);
1946 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1954 /* Return true if memory reference REF with step STEP may be unaligned. */
1957 may_be_unaligned_p (tree ref
, tree step
)
1959 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1960 thus they are not misaligned. */
1961 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1964 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1965 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1966 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1968 unsigned HOST_WIDE_INT bitpos
;
1969 unsigned int ref_align
;
1970 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1971 if (ref_align
< align
1972 || (bitpos
% align
) != 0
1973 || (bitpos
% BITS_PER_UNIT
) != 0)
1976 unsigned int trailing_zeros
= tree_ctz (step
);
1977 if (trailing_zeros
< HOST_BITS_PER_INT
1978 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1984 /* Return true if EXPR may be non-addressable. */
1987 may_be_nonaddressable_p (tree expr
)
1989 switch (TREE_CODE (expr
))
1991 case TARGET_MEM_REF
:
1992 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1993 target, thus they are always addressable. */
1997 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1998 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2000 case VIEW_CONVERT_EXPR
:
2001 /* This kind of view-conversions may wrap non-addressable objects
2002 and make them look addressable. After some processing the
2003 non-addressability may be uncovered again, causing ADDR_EXPRs
2004 of inappropriate objects to be built. */
2005 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2006 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2009 /* ... fall through ... */
2012 case ARRAY_RANGE_REF
:
2013 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2026 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
2028 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2029 If there is an existing use which has same stripped iv base and step,
2030 this function records this one as a sub use to that; otherwise records
2031 it as a normal one. */
2033 static struct iv_use
*
2034 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
2035 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
)
2040 unsigned HOST_WIDE_INT addr_offset
;
2042 /* Only support sub use for address type uses, that is, with base
2044 if (!iv
->base_object
)
2045 return record_use (data
, use_p
, iv
, stmt
, use_type
);
2047 addr_base
= strip_offset (iv
->base
, &addr_offset
);
2048 for (i
= 0; i
< n_iv_uses (data
); i
++)
2050 use
= iv_use (data
, i
);
2051 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
2054 /* Check if it has the same stripped base and step. */
2055 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
2056 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
2057 && operand_equal_p (addr_base
, use
->addr_base
, 0))
2061 if (i
== n_iv_uses (data
))
2062 return record_use (data
, use_p
, iv
, stmt
,
2063 use_type
, addr_base
, addr_offset
);
2065 return record_sub_use (data
, use_p
, iv
, stmt
,
2066 use_type
, addr_base
, addr_offset
, i
);
2069 /* Finds addresses in *OP_P inside STMT. */
2072 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2075 tree base
= *op_p
, step
= size_zero_node
;
2077 struct ifs_ivopts_data ifs_ivopts_data
;
2079 /* Do not play with volatile memory references. A bit too conservative,
2080 perhaps, but safe. */
2081 if (gimple_has_volatile_ops (stmt
))
2084 /* Ignore bitfields for now. Not really something terribly complicated
2086 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2089 base
= unshare_expr (base
);
2091 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2093 tree type
= build_pointer_type (TREE_TYPE (base
));
2097 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2099 civ
= get_iv (data
, TMR_BASE (base
));
2103 TMR_BASE (base
) = civ
->base
;
2106 if (TMR_INDEX2 (base
)
2107 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2109 civ
= get_iv (data
, TMR_INDEX2 (base
));
2113 TMR_INDEX2 (base
) = civ
->base
;
2116 if (TMR_INDEX (base
)
2117 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2119 civ
= get_iv (data
, TMR_INDEX (base
));
2123 TMR_INDEX (base
) = civ
->base
;
2128 if (TMR_STEP (base
))
2129 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2131 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2135 if (integer_zerop (step
))
2137 base
= tree_mem_ref_addr (type
, base
);
2141 ifs_ivopts_data
.ivopts_data
= data
;
2142 ifs_ivopts_data
.stmt
= stmt
;
2143 ifs_ivopts_data
.step
= size_zero_node
;
2144 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2145 || integer_zerop (ifs_ivopts_data
.step
))
2147 step
= ifs_ivopts_data
.step
;
2149 /* Check that the base expression is addressable. This needs
2150 to be done after substituting bases of IVs into it. */
2151 if (may_be_nonaddressable_p (base
))
2154 /* Moreover, on strict alignment platforms, check that it is
2155 sufficiently aligned. */
2156 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2159 base
= build_fold_addr_expr (base
);
2161 /* Substituting bases of IVs into the base expression might
2162 have caused folding opportunities. */
2163 if (TREE_CODE (base
) == ADDR_EXPR
)
2165 tree
*ref
= &TREE_OPERAND (base
, 0);
2166 while (handled_component_p (*ref
))
2167 ref
= &TREE_OPERAND (*ref
, 0);
2168 if (TREE_CODE (*ref
) == MEM_REF
)
2170 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2171 TREE_OPERAND (*ref
, 0),
2172 TREE_OPERAND (*ref
, 1));
2179 civ
= alloc_iv (data
, base
, step
);
2180 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2184 for_each_index (op_p
, idx_record_use
, data
);
2187 /* Finds and records invariants used in STMT. */
2190 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2193 use_operand_p use_p
;
2196 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2198 op
= USE_FROM_PTR (use_p
);
2199 record_invariant (data
, op
, false);
2203 /* Finds interesting uses of induction variables in the statement STMT. */
2206 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2209 tree op
, *lhs
, *rhs
;
2211 use_operand_p use_p
;
2212 enum tree_code code
;
2214 find_invariants_stmt (data
, stmt
);
2216 if (gimple_code (stmt
) == GIMPLE_COND
)
2218 find_interesting_uses_cond (data
, stmt
);
2222 if (is_gimple_assign (stmt
))
2224 lhs
= gimple_assign_lhs_ptr (stmt
);
2225 rhs
= gimple_assign_rhs1_ptr (stmt
);
2227 if (TREE_CODE (*lhs
) == SSA_NAME
)
2229 /* If the statement defines an induction variable, the uses are not
2230 interesting by themselves. */
2232 iv
= get_iv (data
, *lhs
);
2234 if (iv
&& !integer_zerop (iv
->step
))
2238 code
= gimple_assign_rhs_code (stmt
);
2239 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2240 && (REFERENCE_CLASS_P (*rhs
)
2241 || is_gimple_val (*rhs
)))
2243 if (REFERENCE_CLASS_P (*rhs
))
2244 find_interesting_uses_address (data
, stmt
, rhs
);
2246 find_interesting_uses_op (data
, *rhs
);
2248 if (REFERENCE_CLASS_P (*lhs
))
2249 find_interesting_uses_address (data
, stmt
, lhs
);
2252 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2254 find_interesting_uses_cond (data
, stmt
);
2258 /* TODO -- we should also handle address uses of type
2260 memory = call (whatever);
2267 if (gimple_code (stmt
) == GIMPLE_PHI
2268 && gimple_bb (stmt
) == data
->current_loop
->header
)
2270 iv
= get_iv (data
, PHI_RESULT (stmt
));
2272 if (iv
&& !integer_zerop (iv
->step
))
2276 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2278 op
= USE_FROM_PTR (use_p
);
2280 if (TREE_CODE (op
) != SSA_NAME
)
2283 iv
= get_iv (data
, op
);
2287 find_interesting_uses_op (data
, op
);
2291 /* Finds interesting uses of induction variables outside of loops
2292 on loop exit edge EXIT. */
2295 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2301 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2304 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2305 if (!virtual_operand_p (def
))
2306 find_interesting_uses_op (data
, def
);
2310 /* Finds uses of the induction variables that are interesting. */
2313 find_interesting_uses (struct ivopts_data
*data
)
2316 gimple_stmt_iterator bsi
;
2317 basic_block
*body
= get_loop_body (data
->current_loop
);
2319 struct version_info
*info
;
2322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2323 fprintf (dump_file
, "Uses:\n\n");
2325 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2330 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2331 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2332 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2333 find_interesting_uses_outside (data
, e
);
2335 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2336 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2337 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2338 if (!is_gimple_debug (gsi_stmt (bsi
)))
2339 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2346 fprintf (dump_file
, "\n");
2348 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2350 info
= ver_info (data
, i
);
2353 fprintf (dump_file
, " ");
2354 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2355 fprintf (dump_file
, " is invariant (%d)%s\n",
2356 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2360 fprintf (dump_file
, "\n");
2366 /* Compute maximum offset of [base + offset] addressing mode
2367 for memory reference represented by USE. */
2369 static HOST_WIDE_INT
2370 compute_max_addr_offset (struct iv_use
*use
)
2374 HOST_WIDE_INT i
, off
;
2375 unsigned list_index
, num
;
2377 machine_mode mem_mode
, addr_mode
;
2378 static vec
<HOST_WIDE_INT
> max_offset_list
;
2380 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2381 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2383 num
= max_offset_list
.length ();
2384 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2385 if (list_index
>= num
)
2387 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2388 for (; num
< max_offset_list
.length (); num
++)
2389 max_offset_list
[num
] = -1;
2392 off
= max_offset_list
[list_index
];
2396 addr_mode
= targetm
.addr_space
.address_mode (as
);
2397 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2398 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2400 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2401 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2402 width
= HOST_BITS_PER_WIDE_INT
- 1;
2404 for (i
= width
; i
> 0; i
--)
2406 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2407 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2408 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2411 /* For some strict-alignment targets, the offset must be naturally
2412 aligned. Try an aligned offset if mem_mode is not QImode. */
2413 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2414 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2416 off
-= GET_MODE_SIZE (mem_mode
);
2417 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2418 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2425 max_offset_list
[list_index
] = off
;
2429 /* Check if all small groups should be split. Return true if and
2432 1) At least one groups contain two uses with different offsets.
2433 2) No group contains more than two uses with different offsets.
2435 Return false otherwise. We want to split such groups because:
2437 1) Small groups don't have much benefit and may interfer with
2438 general candidate selection.
2439 2) Size for problem with only small groups is usually small and
2440 general algorithm can handle it well.
2442 TODO -- Above claim may not hold when auto increment is supported. */
2445 split_all_small_groups (struct ivopts_data
*data
)
2447 bool split_p
= false;
2448 unsigned int i
, n
, distinct
;
2449 struct iv_use
*pre
, *use
;
2451 n
= n_iv_uses (data
);
2452 for (i
= 0; i
< n
; i
++)
2454 use
= iv_use (data
, i
);
2459 gcc_assert (use
->type
== USE_ADDRESS
);
2460 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2462 if (pre
->addr_offset
!= use
->addr_offset
)
2475 /* For each group of address type uses, this function further groups
2476 these uses according to the maximum offset supported by target's
2477 [base + offset] addressing mode. */
2480 group_address_uses (struct ivopts_data
*data
)
2482 HOST_WIDE_INT max_offset
= -1;
2483 unsigned int i
, n
, sub_id
;
2484 struct iv_use
*pre
, *use
;
2485 unsigned HOST_WIDE_INT addr_offset_first
;
2487 /* Reset max offset to split all small groups. */
2488 if (split_all_small_groups (data
))
2491 n
= n_iv_uses (data
);
2492 for (i
= 0; i
< n
; i
++)
2494 use
= iv_use (data
, i
);
2498 gcc_assert (use
->type
== USE_ADDRESS
);
2499 if (max_offset
!= 0)
2500 max_offset
= compute_max_addr_offset (use
);
2505 addr_offset_first
= use
->addr_offset
;
2506 /* Only uses with offset that can fit in offset part against
2507 the first use can be grouped together. */
2508 for (pre
= use
, use
= use
->next
;
2509 use
&& (use
->addr_offset
- addr_offset_first
2510 <= (unsigned HOST_WIDE_INT
) max_offset
);
2511 pre
= use
, use
= use
->next
)
2514 use
->sub_id
= ++sub_id
;
2517 /* Break the list and create new group. */
2521 use
->id
= n_iv_uses (data
);
2522 use
->related_cands
= BITMAP_ALLOC (NULL
);
2523 data
->iv_uses
.safe_push (use
);
2528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2529 dump_uses (dump_file
, data
);
2532 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2533 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2534 we are at the top-level of the processed address. */
2537 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2538 HOST_WIDE_INT
*offset
)
2540 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2541 enum tree_code code
;
2542 tree type
, orig_type
= TREE_TYPE (expr
);
2543 HOST_WIDE_INT off0
, off1
, st
;
2544 tree orig_expr
= expr
;
2548 type
= TREE_TYPE (expr
);
2549 code
= TREE_CODE (expr
);
2555 if (!cst_and_fits_in_hwi (expr
)
2556 || integer_zerop (expr
))
2559 *offset
= int_cst_value (expr
);
2560 return build_int_cst (orig_type
, 0);
2562 case POINTER_PLUS_EXPR
:
2565 op0
= TREE_OPERAND (expr
, 0);
2566 op1
= TREE_OPERAND (expr
, 1);
2568 op0
= strip_offset_1 (op0
, false, false, &off0
);
2569 op1
= strip_offset_1 (op1
, false, false, &off1
);
2571 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2572 if (op0
== TREE_OPERAND (expr
, 0)
2573 && op1
== TREE_OPERAND (expr
, 1))
2576 if (integer_zerop (op1
))
2578 else if (integer_zerop (op0
))
2580 if (code
== MINUS_EXPR
)
2581 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2586 expr
= fold_build2 (code
, type
, op0
, op1
);
2588 return fold_convert (orig_type
, expr
);
2591 op1
= TREE_OPERAND (expr
, 1);
2592 if (!cst_and_fits_in_hwi (op1
))
2595 op0
= TREE_OPERAND (expr
, 0);
2596 op0
= strip_offset_1 (op0
, false, false, &off0
);
2597 if (op0
== TREE_OPERAND (expr
, 0))
2600 *offset
= off0
* int_cst_value (op1
);
2601 if (integer_zerop (op0
))
2604 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2606 return fold_convert (orig_type
, expr
);
2609 case ARRAY_RANGE_REF
:
2613 step
= array_ref_element_size (expr
);
2614 if (!cst_and_fits_in_hwi (step
))
2617 st
= int_cst_value (step
);
2618 op1
= TREE_OPERAND (expr
, 1);
2619 op1
= strip_offset_1 (op1
, false, false, &off1
);
2620 *offset
= off1
* st
;
2623 && integer_zerop (op1
))
2625 /* Strip the component reference completely. */
2626 op0
= TREE_OPERAND (expr
, 0);
2627 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2640 tmp
= component_ref_field_offset (expr
);
2641 field
= TREE_OPERAND (expr
, 1);
2643 && cst_and_fits_in_hwi (tmp
)
2644 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2646 HOST_WIDE_INT boffset
, abs_off
;
2648 /* Strip the component reference completely. */
2649 op0
= TREE_OPERAND (expr
, 0);
2650 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2651 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2652 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2656 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2663 op0
= TREE_OPERAND (expr
, 0);
2664 op0
= strip_offset_1 (op0
, true, true, &off0
);
2667 if (op0
== TREE_OPERAND (expr
, 0))
2670 expr
= build_fold_addr_expr (op0
);
2671 return fold_convert (orig_type
, expr
);
2674 /* ??? Offset operand? */
2675 inside_addr
= false;
2682 /* Default handling of expressions for that we want to recurse into
2683 the first operand. */
2684 op0
= TREE_OPERAND (expr
, 0);
2685 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2688 if (op0
== TREE_OPERAND (expr
, 0)
2689 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2692 expr
= copy_node (expr
);
2693 TREE_OPERAND (expr
, 0) = op0
;
2695 TREE_OPERAND (expr
, 1) = op1
;
2697 /* Inside address, we might strip the top level component references,
2698 thus changing type of the expression. Handling of ADDR_EXPR
2700 expr
= fold_convert (orig_type
, expr
);
2705 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2708 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2711 tree core
= strip_offset_1 (expr
, false, false, &off
);
2716 /* Returns variant of TYPE that can be used as base for different uses.
2717 We return unsigned type with the same precision, which avoids problems
2721 generic_type_for (tree type
)
2723 if (POINTER_TYPE_P (type
))
2724 return unsigned_type_for (type
);
2726 if (TYPE_UNSIGNED (type
))
2729 return unsigned_type_for (type
);
2732 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2733 the bitmap to that we should store it. */
2735 static struct ivopts_data
*fd_ivopts_data
;
2737 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2739 bitmap
*depends_on
= (bitmap
*) data
;
2740 struct version_info
*info
;
2742 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2744 info
= name_info (fd_ivopts_data
, *expr_p
);
2746 if (!info
->inv_id
|| info
->has_nonlin_use
)
2750 *depends_on
= BITMAP_ALLOC (NULL
);
2751 bitmap_set_bit (*depends_on
, info
->inv_id
);
2756 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2757 position to POS. If USE is not NULL, the candidate is set as related to
2758 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2759 replacement of the final value of the iv by a direct computation. */
2761 static struct iv_cand
*
2762 add_candidate_1 (struct ivopts_data
*data
,
2763 tree base
, tree step
, bool important
, enum iv_position pos
,
2764 struct iv_use
*use
, gimple
*incremented_at
,
2765 struct iv
*orig_iv
= NULL
)
2768 struct iv_cand
*cand
= NULL
;
2769 tree type
, orig_type
;
2771 /* For non-original variables, make sure their values are computed in a type
2772 that does not invoke undefined behavior on overflows (since in general,
2773 we cannot prove that these induction variables are non-wrapping). */
2774 if (pos
!= IP_ORIGINAL
)
2776 orig_type
= TREE_TYPE (base
);
2777 type
= generic_type_for (orig_type
);
2778 if (type
!= orig_type
)
2780 base
= fold_convert (type
, base
);
2781 step
= fold_convert (type
, step
);
2785 for (i
= 0; i
< n_iv_cands (data
); i
++)
2787 cand
= iv_cand (data
, i
);
2789 if (cand
->pos
!= pos
)
2792 if (cand
->incremented_at
!= incremented_at
2793 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2794 && cand
->ainc_use
!= use
))
2808 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2809 && operand_equal_p (step
, cand
->iv
->step
, 0)
2810 && (TYPE_PRECISION (TREE_TYPE (base
))
2811 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2815 if (i
== n_iv_cands (data
))
2817 cand
= XCNEW (struct iv_cand
);
2823 cand
->iv
= alloc_iv (data
, base
, step
);
2826 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2828 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2829 cand
->var_after
= cand
->var_before
;
2831 cand
->important
= important
;
2832 cand
->incremented_at
= incremented_at
;
2833 data
->iv_candidates
.safe_push (cand
);
2836 && TREE_CODE (step
) != INTEGER_CST
)
2838 fd_ivopts_data
= data
;
2839 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2842 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2843 cand
->ainc_use
= use
;
2845 cand
->ainc_use
= NULL
;
2847 cand
->orig_iv
= orig_iv
;
2848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2849 dump_cand (dump_file
, cand
);
2852 if (important
&& !cand
->important
)
2854 cand
->important
= true;
2855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2856 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2861 bitmap_set_bit (use
->related_cands
, i
);
2862 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2863 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2870 /* Returns true if incrementing the induction variable at the end of the LOOP
2873 The purpose is to avoid splitting latch edge with a biv increment, thus
2874 creating a jump, possibly confusing other optimization passes and leaving
2875 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2876 is not available (so we do not have a better alternative), or if the latch
2877 edge is already nonempty. */
2880 allow_ip_end_pos_p (struct loop
*loop
)
2882 if (!ip_normal_pos (loop
))
2885 if (!empty_block_p (ip_end_pos (loop
)))
2891 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2892 Important field is set to IMPORTANT. */
2895 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2896 bool important
, struct iv_use
*use
)
2898 basic_block use_bb
= gimple_bb (use
->stmt
);
2899 machine_mode mem_mode
;
2900 unsigned HOST_WIDE_INT cstepi
;
2902 /* If we insert the increment in any position other than the standard
2903 ones, we must ensure that it is incremented once per iteration.
2904 It must not be in an inner nested loop, or one side of an if
2906 if (use_bb
->loop_father
!= data
->current_loop
2907 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2908 || stmt_could_throw_p (use
->stmt
)
2909 || !cst_and_fits_in_hwi (step
))
2912 cstepi
= int_cst_value (step
);
2914 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2915 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2916 || USE_STORE_PRE_INCREMENT (mem_mode
))
2917 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2918 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2919 || USE_STORE_PRE_DECREMENT (mem_mode
))
2920 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2922 enum tree_code code
= MINUS_EXPR
;
2924 tree new_step
= step
;
2926 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2928 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2929 code
= POINTER_PLUS_EXPR
;
2932 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2933 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2934 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2937 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2938 || USE_STORE_POST_INCREMENT (mem_mode
))
2939 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2940 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2941 || USE_STORE_POST_DECREMENT (mem_mode
))
2942 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2944 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2949 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2950 position to POS. If USE is not NULL, the candidate is set as related to
2951 it. The candidate computation is scheduled before exit condition and at
2955 add_candidate (struct ivopts_data
*data
,
2956 tree base
, tree step
, bool important
, struct iv_use
*use
,
2957 struct iv
*orig_iv
= NULL
)
2959 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2961 if (ip_normal_pos (data
->current_loop
))
2962 add_candidate_1 (data
, base
, step
, important
,
2963 IP_NORMAL
, use
, NULL
, orig_iv
);
2964 if (ip_end_pos (data
->current_loop
)
2965 && allow_ip_end_pos_p (data
->current_loop
))
2966 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
2969 /* Adds standard iv candidates. */
2972 add_standard_iv_candidates (struct ivopts_data
*data
)
2974 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2976 /* The same for a double-integer type if it is still fast enough. */
2978 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2979 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2980 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2981 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2983 /* The same for a double-integer type if it is still fast enough. */
2985 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2986 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2987 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2988 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2992 /* Adds candidates bases on the old induction variable IV. */
2995 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
2999 struct iv_cand
*cand
;
3001 /* Check if this biv is used in address type use. */
3002 if (iv
->no_overflow
&& iv
->have_address_use
3003 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3004 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3006 tree base
= fold_convert (sizetype
, iv
->base
);
3007 tree step
= fold_convert (sizetype
, iv
->step
);
3009 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3010 add_candidate (data
, base
, step
, true, NULL
, iv
);
3011 /* Add iv cand of the original type only if it has nonlinear use. */
3012 if (iv
->have_use_for
)
3013 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3016 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3018 /* The same, but with initial value zero. */
3019 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3020 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3022 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3023 iv
->step
, true, NULL
);
3025 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3026 if (gimple_code (phi
) == GIMPLE_PHI
)
3028 /* Additionally record the possibility of leaving the original iv
3030 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3031 /* Don't add candidate if it's from another PHI node because
3032 it's an affine iv appearing in the form of PEELED_CHREC. */
3033 phi
= SSA_NAME_DEF_STMT (def
);
3034 if (gimple_code (phi
) != GIMPLE_PHI
)
3036 cand
= add_candidate_1 (data
,
3037 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3038 SSA_NAME_DEF_STMT (def
));
3039 cand
->var_before
= iv
->ssa_name
;
3040 cand
->var_after
= def
;
3043 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3047 /* Adds candidates based on the old induction variables. */
3050 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3056 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3058 iv
= ver_info (data
, i
)->iv
;
3059 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3060 add_iv_candidate_for_biv (data
, iv
);
3064 /* Adds candidates based on the value of USE's iv. */
3067 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3069 unsigned HOST_WIDE_INT offset
;
3072 struct iv
*iv
= use
->iv
;
3074 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3076 /* The same, but with initial value zero. Make such variable important,
3077 since it is generic enough so that possibly many uses may be based
3079 basetype
= TREE_TYPE (iv
->base
);
3080 if (POINTER_TYPE_P (basetype
))
3081 basetype
= sizetype
;
3082 add_candidate (data
, build_int_cst (basetype
, 0), iv
->step
, true, use
);
3084 /* Third, try removing the constant offset. Make sure to even
3085 add a candidate for &a[0] vs. (T *)&a. */
3086 base
= strip_offset (iv
->base
, &offset
);
3087 if (offset
|| base
!= iv
->base
)
3088 add_candidate (data
, base
, iv
->step
, false, use
);
3090 /* At last, add auto-incremental candidates. Make such variables
3091 important since other iv uses with same base object may be based
3093 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3094 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3097 /* Adds candidates based on the uses. */
3100 add_iv_candidate_for_uses (struct ivopts_data
*data
)
3104 for (i
= 0; i
< n_iv_uses (data
); i
++)
3106 struct iv_use
*use
= iv_use (data
, i
);
3113 case USE_NONLINEAR_EXPR
:
3116 /* Just add the ivs based on the value of the iv used here. */
3117 add_iv_candidate_for_use (data
, use
);
3126 /* Record important candidates and add them to related_cands bitmaps
3130 record_important_candidates (struct ivopts_data
*data
)
3135 for (i
= 0; i
< n_iv_cands (data
); i
++)
3137 struct iv_cand
*cand
= iv_cand (data
, i
);
3139 if (cand
->important
)
3140 bitmap_set_bit (data
->important_candidates
, i
);
3143 data
->consider_all_candidates
= (n_iv_cands (data
)
3144 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3146 if (data
->consider_all_candidates
)
3148 /* We will not need "related_cands" bitmaps in this case,
3149 so release them to decrease peak memory consumption. */
3150 for (i
= 0; i
< n_iv_uses (data
); i
++)
3152 use
= iv_use (data
, i
);
3153 BITMAP_FREE (use
->related_cands
);
3158 /* Add important candidates to the related_cands bitmaps. */
3159 for (i
= 0; i
< n_iv_uses (data
); i
++)
3160 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
3161 data
->important_candidates
);
3165 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3166 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3167 we allocate a simple list to every use. */
3170 alloc_use_cost_map (struct ivopts_data
*data
)
3172 unsigned i
, size
, s
;
3174 for (i
= 0; i
< n_iv_uses (data
); i
++)
3176 struct iv_use
*use
= iv_use (data
, i
);
3178 if (data
->consider_all_candidates
)
3179 size
= n_iv_cands (data
);
3182 s
= bitmap_count_bits (use
->related_cands
);
3184 /* Round up to the power of two, so that moduling by it is fast. */
3185 size
= s
? (1 << ceil_log2 (s
)) : 1;
3188 use
->n_map_members
= size
;
3189 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3193 /* Returns description of computation cost of expression whose runtime
3194 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3197 new_cost (unsigned runtime
, unsigned complexity
)
3201 cost
.cost
= runtime
;
3202 cost
.complexity
= complexity
;
3207 /* Returns true if COST is infinite. */
3210 infinite_cost_p (comp_cost cost
)
3212 return cost
.cost
== INFTY
;
3215 /* Adds costs COST1 and COST2. */
3218 add_costs (comp_cost cost1
, comp_cost cost2
)
3220 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3221 return infinite_cost
;
3223 cost1
.cost
+= cost2
.cost
;
3224 cost1
.complexity
+= cost2
.complexity
;
3228 /* Subtracts costs COST1 and COST2. */
3231 sub_costs (comp_cost cost1
, comp_cost cost2
)
3233 cost1
.cost
-= cost2
.cost
;
3234 cost1
.complexity
-= cost2
.complexity
;
3239 /* Returns a negative number if COST1 < COST2, a positive number if
3240 COST1 > COST2, and 0 if COST1 = COST2. */
3243 compare_costs (comp_cost cost1
, comp_cost cost2
)
3245 if (cost1
.cost
== cost2
.cost
)
3246 return cost1
.complexity
- cost2
.complexity
;
3248 return cost1
.cost
- cost2
.cost
;
3251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3252 on invariants DEPENDS_ON and that the value used in expressing it
3253 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3256 set_use_iv_cost (struct ivopts_data
*data
,
3257 struct iv_use
*use
, struct iv_cand
*cand
,
3258 comp_cost cost
, bitmap depends_on
, tree value
,
3259 enum tree_code comp
, int inv_expr_id
)
3263 if (infinite_cost_p (cost
))
3265 BITMAP_FREE (depends_on
);
3269 if (data
->consider_all_candidates
)
3271 use
->cost_map
[cand
->id
].cand
= cand
;
3272 use
->cost_map
[cand
->id
].cost
= cost
;
3273 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3274 use
->cost_map
[cand
->id
].value
= value
;
3275 use
->cost_map
[cand
->id
].comp
= comp
;
3276 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3280 /* n_map_members is a power of two, so this computes modulo. */
3281 s
= cand
->id
& (use
->n_map_members
- 1);
3282 for (i
= s
; i
< use
->n_map_members
; i
++)
3283 if (!use
->cost_map
[i
].cand
)
3285 for (i
= 0; i
< s
; i
++)
3286 if (!use
->cost_map
[i
].cand
)
3292 use
->cost_map
[i
].cand
= cand
;
3293 use
->cost_map
[i
].cost
= cost
;
3294 use
->cost_map
[i
].depends_on
= depends_on
;
3295 use
->cost_map
[i
].value
= value
;
3296 use
->cost_map
[i
].comp
= comp
;
3297 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3300 /* Gets cost of (USE, CANDIDATE) pair. */
3302 static struct cost_pair
*
3303 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3304 struct iv_cand
*cand
)
3307 struct cost_pair
*ret
;
3312 if (data
->consider_all_candidates
)
3314 ret
= use
->cost_map
+ cand
->id
;
3321 /* n_map_members is a power of two, so this computes modulo. */
3322 s
= cand
->id
& (use
->n_map_members
- 1);
3323 for (i
= s
; i
< use
->n_map_members
; i
++)
3324 if (use
->cost_map
[i
].cand
== cand
)
3325 return use
->cost_map
+ i
;
3326 else if (use
->cost_map
[i
].cand
== NULL
)
3328 for (i
= 0; i
< s
; i
++)
3329 if (use
->cost_map
[i
].cand
== cand
)
3330 return use
->cost_map
+ i
;
3331 else if (use
->cost_map
[i
].cand
== NULL
)
3337 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3339 produce_memory_decl_rtl (tree obj
, int *regno
)
3341 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3342 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3346 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3348 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3349 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3350 SET_SYMBOL_REF_DECL (x
, obj
);
3351 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3352 set_mem_addr_space (x
, as
);
3353 targetm
.encode_section_info (obj
, x
, true);
3357 x
= gen_raw_REG (address_mode
, (*regno
)++);
3358 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3359 set_mem_addr_space (x
, as
);
3365 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3366 walk_tree. DATA contains the actual fake register number. */
3369 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3371 tree obj
= NULL_TREE
;
3373 int *regno
= (int *) data
;
3375 switch (TREE_CODE (*expr_p
))
3378 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3379 handled_component_p (*expr_p
);
3380 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3383 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3384 x
= produce_memory_decl_rtl (obj
, regno
);
3389 obj
= SSA_NAME_VAR (*expr_p
);
3390 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3393 if (!DECL_RTL_SET_P (obj
))
3394 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3403 if (DECL_RTL_SET_P (obj
))
3406 if (DECL_MODE (obj
) == BLKmode
)
3407 x
= produce_memory_decl_rtl (obj
, regno
);
3409 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3419 decl_rtl_to_reset
.safe_push (obj
);
3420 SET_DECL_RTL (obj
, x
);
3426 /* Determines cost of the computation of EXPR. */
3429 computation_cost (tree expr
, bool speed
)
3433 tree type
= TREE_TYPE (expr
);
3435 /* Avoid using hard regs in ways which may be unsupported. */
3436 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3437 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3438 enum node_frequency real_frequency
= node
->frequency
;
3440 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3441 crtl
->maybe_hot_insn_p
= speed
;
3442 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3444 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3447 default_rtl_profile ();
3448 node
->frequency
= real_frequency
;
3450 cost
= seq_cost (seq
, speed
);
3452 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3453 TYPE_ADDR_SPACE (type
), speed
);
3454 else if (!REG_P (rslt
))
3455 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3460 /* Returns variable containing the value of candidate CAND at statement AT. */
3463 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3465 if (stmt_after_increment (loop
, cand
, stmt
))
3466 return cand
->var_after
;
3468 return cand
->var_before
;
3471 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3472 same precision that is at least as wide as the precision of TYPE, stores
3473 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3477 determine_common_wider_type (tree
*a
, tree
*b
)
3479 tree wider_type
= NULL
;
3481 tree atype
= TREE_TYPE (*a
);
3483 if (CONVERT_EXPR_P (*a
))
3485 suba
= TREE_OPERAND (*a
, 0);
3486 wider_type
= TREE_TYPE (suba
);
3487 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3493 if (CONVERT_EXPR_P (*b
))
3495 subb
= TREE_OPERAND (*b
, 0);
3496 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3507 /* Determines the expression by that USE is expressed from induction variable
3508 CAND at statement AT in LOOP. The expression is stored in a decomposed
3509 form into AFF. Returns false if USE cannot be expressed using CAND. */
3512 get_computation_aff (struct loop
*loop
,
3513 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3514 struct aff_tree
*aff
)
3516 tree ubase
= use
->iv
->base
;
3517 tree ustep
= use
->iv
->step
;
3518 tree cbase
= cand
->iv
->base
;
3519 tree cstep
= cand
->iv
->step
, cstep_common
;
3520 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3521 tree common_type
, var
;
3523 aff_tree cbase_aff
, var_aff
;
3526 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3528 /* We do not have a precision to express the values of use. */
3532 var
= var_at_stmt (loop
, cand
, at
);
3533 uutype
= unsigned_type_for (utype
);
3535 /* If the conversion is not noop, perform it. */
3536 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3538 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3539 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3541 tree inner_base
, inner_step
, inner_type
;
3542 inner_base
= TREE_OPERAND (cbase
, 0);
3543 if (CONVERT_EXPR_P (cstep
))
3544 inner_step
= TREE_OPERAND (cstep
, 0);
3548 inner_type
= TREE_TYPE (inner_base
);
3549 /* If candidate is added from a biv whose type is smaller than
3550 ctype, we know both candidate and the biv won't overflow.
3551 In this case, it's safe to skip the convertion in candidate.
3552 As an example, (unsigned short)((unsigned long)A) equals to
3553 (unsigned short)A, if A has a type no larger than short. */
3554 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3560 cstep
= fold_convert (uutype
, cstep
);
3561 cbase
= fold_convert (uutype
, cbase
);
3562 var
= fold_convert (uutype
, var
);
3565 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3568 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3569 type, we achieve better folding by computing their difference in this
3570 wider type, and cast the result to UUTYPE. We do not need to worry about
3571 overflows, as all the arithmetics will in the end be performed in UUTYPE
3573 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3575 /* use = ubase - ratio * cbase + ratio * var. */
3576 tree_to_aff_combination (ubase
, common_type
, aff
);
3577 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3578 tree_to_aff_combination (var
, uutype
, &var_aff
);
3580 /* We need to shift the value if we are after the increment. */
3581 if (stmt_after_increment (loop
, cand
, at
))
3585 if (common_type
!= uutype
)
3586 cstep_common
= fold_convert (common_type
, cstep
);
3588 cstep_common
= cstep
;
3590 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3591 aff_combination_add (&cbase_aff
, &cstep_aff
);
3594 aff_combination_scale (&cbase_aff
, -rat
);
3595 aff_combination_add (aff
, &cbase_aff
);
3596 if (common_type
!= uutype
)
3597 aff_combination_convert (aff
, uutype
);
3599 aff_combination_scale (&var_aff
, rat
);
3600 aff_combination_add (aff
, &var_aff
);
3605 /* Return the type of USE. */
3608 get_use_type (struct iv_use
*use
)
3610 tree base_type
= TREE_TYPE (use
->iv
->base
);
3613 if (use
->type
== USE_ADDRESS
)
3615 /* The base_type may be a void pointer. Create a pointer type based on
3616 the mem_ref instead. */
3617 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3618 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3619 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3627 /* Determines the expression by that USE is expressed from induction variable
3628 CAND at statement AT in LOOP. The computation is unshared. */
3631 get_computation_at (struct loop
*loop
,
3632 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3635 tree type
= get_use_type (use
);
3637 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3639 unshare_aff_combination (&aff
);
3640 return fold_convert (type
, aff_combination_to_tree (&aff
));
3643 /* Determines the expression by that USE is expressed from induction variable
3644 CAND in LOOP. The computation is unshared. */
3647 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3649 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3652 /* Adjust the cost COST for being in loop setup rather than loop body.
3653 If we're optimizing for space, the loop setup overhead is constant;
3654 if we're optimizing for speed, amortize it over the per-iteration cost. */
3656 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3660 else if (optimize_loop_for_speed_p (data
->current_loop
))
3661 return cost
/ avg_loop_niter (data
->current_loop
);
3666 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3667 validity for a memory reference accessing memory of mode MODE in
3668 address space AS. */
3672 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3675 #define MAX_RATIO 128
3676 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3677 static vec
<sbitmap
> valid_mult_list
;
3680 if (data_index
>= valid_mult_list
.length ())
3681 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3683 valid_mult
= valid_mult_list
[data_index
];
3686 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3687 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3688 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3692 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3693 bitmap_clear (valid_mult
);
3694 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3695 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3696 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3698 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3699 if (memory_address_addr_space_p (mode
, addr
, as
)
3700 || memory_address_addr_space_p (mode
, scaled
, as
))
3701 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3706 fprintf (dump_file
, " allowed multipliers:");
3707 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3708 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3709 fprintf (dump_file
, " %d", (int) i
);
3710 fprintf (dump_file
, "\n");
3711 fprintf (dump_file
, "\n");
3714 valid_mult_list
[data_index
] = valid_mult
;
3717 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3720 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3723 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3724 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3725 variable is omitted. Compute the cost for a memory reference that accesses
3726 a memory location of mode MEM_MODE in address space AS.
3728 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3729 size of MEM_MODE / RATIO) is available. To make this determination, we
3730 look at the size of the increment to be made, which is given in CSTEP.
3731 CSTEP may be zero if the step is unknown.
3732 STMT_AFTER_INC is true iff the statement we're looking at is after the
3733 increment of the original biv.
3735 TODO -- there must be some better way. This all is quite crude. */
3739 AINC_PRE_INC
, /* Pre increment. */
3740 AINC_PRE_DEC
, /* Pre decrement. */
3741 AINC_POST_INC
, /* Post increment. */
3742 AINC_POST_DEC
, /* Post decrement. */
3743 AINC_NONE
/* Also the number of auto increment types. */
3746 struct address_cost_data
3748 HOST_WIDE_INT min_offset
, max_offset
;
3749 unsigned costs
[2][2][2][2];
3750 unsigned ainc_costs
[AINC_NONE
];
3755 get_address_cost (bool symbol_present
, bool var_present
,
3756 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3757 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3758 addr_space_t as
, bool speed
,
3759 bool stmt_after_inc
, bool *may_autoinc
)
3761 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3762 static vec
<address_cost_data
*> address_cost_data_list
;
3763 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3764 address_cost_data
*data
;
3765 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3766 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3767 unsigned cost
, acost
, complexity
;
3768 enum ainc_type autoinc_type
;
3769 bool offset_p
, ratio_p
, autoinc
;
3770 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3771 unsigned HOST_WIDE_INT mask
;
3774 if (data_index
>= address_cost_data_list
.length ())
3775 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3777 data
= address_cost_data_list
[data_index
];
3781 HOST_WIDE_INT rat
, off
= 0;
3782 int old_cse_not_expected
, width
;
3783 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3788 data
= (address_cost_data
*) xcalloc (1, sizeof (*data
));
3790 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3792 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3793 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3794 width
= HOST_BITS_PER_WIDE_INT
- 1;
3795 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3797 for (i
= width
; i
>= 0; i
--)
3799 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3800 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3801 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3804 data
->min_offset
= (i
== -1? 0 : off
);
3806 for (i
= width
; i
>= 0; i
--)
3808 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3809 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3810 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3812 /* For some strict-alignment targets, the offset must be naturally
3813 aligned. Try an aligned offset if mem_mode is not QImode. */
3814 off
= mem_mode
!= QImode
3815 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3816 - GET_MODE_SIZE (mem_mode
)
3820 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3821 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3827 data
->max_offset
= off
;
3829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3831 fprintf (dump_file
, "get_address_cost:\n");
3832 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3833 GET_MODE_NAME (mem_mode
),
3835 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3836 GET_MODE_NAME (mem_mode
),
3841 for (i
= 2; i
<= MAX_RATIO
; i
++)
3842 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3848 /* Compute the cost of various addressing modes. */
3850 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3851 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3853 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3854 || USE_STORE_PRE_DECREMENT (mem_mode
))
3856 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3857 has_predec
[mem_mode
]
3858 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3860 if (has_predec
[mem_mode
])
3861 data
->ainc_costs
[AINC_PRE_DEC
]
3862 = address_cost (addr
, mem_mode
, as
, speed
);
3864 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3865 || USE_STORE_POST_DECREMENT (mem_mode
))
3867 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3868 has_postdec
[mem_mode
]
3869 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3871 if (has_postdec
[mem_mode
])
3872 data
->ainc_costs
[AINC_POST_DEC
]
3873 = address_cost (addr
, mem_mode
, as
, speed
);
3875 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3876 || USE_STORE_PRE_DECREMENT (mem_mode
))
3878 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3879 has_preinc
[mem_mode
]
3880 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3882 if (has_preinc
[mem_mode
])
3883 data
->ainc_costs
[AINC_PRE_INC
]
3884 = address_cost (addr
, mem_mode
, as
, speed
);
3886 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3887 || USE_STORE_POST_INCREMENT (mem_mode
))
3889 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3890 has_postinc
[mem_mode
]
3891 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3893 if (has_postinc
[mem_mode
])
3894 data
->ainc_costs
[AINC_POST_INC
]
3895 = address_cost (addr
, mem_mode
, as
, speed
);
3897 for (i
= 0; i
< 16; i
++)
3900 var_p
= (i
>> 1) & 1;
3901 off_p
= (i
>> 2) & 1;
3902 rat_p
= (i
>> 3) & 1;
3906 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3907 gen_int_mode (rat
, address_mode
));
3910 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3914 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3915 /* ??? We can run into trouble with some backends by presenting
3916 it with symbols which haven't been properly passed through
3917 targetm.encode_section_info. By setting the local bit, we
3918 enhance the probability of things working. */
3919 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3922 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3924 (PLUS
, address_mode
, base
,
3925 gen_int_mode (off
, address_mode
)));
3928 base
= gen_int_mode (off
, address_mode
);
3933 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3936 /* To avoid splitting addressing modes, pretend that no cse will
3938 old_cse_not_expected
= cse_not_expected
;
3939 cse_not_expected
= true;
3940 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3941 cse_not_expected
= old_cse_not_expected
;
3945 acost
= seq_cost (seq
, speed
);
3946 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3950 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3953 /* On some targets, it is quite expensive to load symbol to a register,
3954 which makes addresses that contain symbols look much more expensive.
3955 However, the symbol will have to be loaded in any case before the
3956 loop (and quite likely we have it in register already), so it does not
3957 make much sense to penalize them too heavily. So make some final
3958 tweaks for the SYMBOL_PRESENT modes:
3960 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3961 var is cheaper, use this mode with small penalty.
3962 If VAR_PRESENT is true, try whether the mode with
3963 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3964 if this is the case, use it. */
3965 add_c
= add_cost (speed
, address_mode
);
3966 for (i
= 0; i
< 8; i
++)
3969 off_p
= (i
>> 1) & 1;
3970 rat_p
= (i
>> 2) & 1;
3972 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3976 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3977 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3980 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3982 fprintf (dump_file
, "Address costs:\n");
3984 for (i
= 0; i
< 16; i
++)
3987 var_p
= (i
>> 1) & 1;
3988 off_p
= (i
>> 2) & 1;
3989 rat_p
= (i
>> 3) & 1;
3991 fprintf (dump_file
, " ");
3993 fprintf (dump_file
, "sym + ");
3995 fprintf (dump_file
, "var + ");
3997 fprintf (dump_file
, "cst + ");
3999 fprintf (dump_file
, "rat * ");
4001 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4002 fprintf (dump_file
, "index costs %d\n", acost
);
4004 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4005 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4006 fprintf (dump_file
, " May include autoinc/dec\n");
4007 fprintf (dump_file
, "\n");
4010 address_cost_data_list
[data_index
] = data
;
4013 bits
= GET_MODE_BITSIZE (address_mode
);
4014 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4016 if ((offset
>> (bits
- 1) & 1))
4021 autoinc_type
= AINC_NONE
;
4022 msize
= GET_MODE_SIZE (mem_mode
);
4023 autoinc_offset
= offset
;
4025 autoinc_offset
+= ratio
* cstep
;
4026 if (symbol_present
|| var_present
|| ratio
!= 1)
4030 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4032 autoinc_type
= AINC_POST_INC
;
4033 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4035 autoinc_type
= AINC_POST_DEC
;
4036 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4038 autoinc_type
= AINC_PRE_INC
;
4039 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4041 autoinc_type
= AINC_PRE_DEC
;
4043 if (autoinc_type
!= AINC_NONE
)
4048 offset_p
= (s_offset
!= 0
4049 && data
->min_offset
<= s_offset
4050 && s_offset
<= data
->max_offset
);
4051 ratio_p
= (ratio
!= 1
4052 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4054 if (ratio
!= 1 && !ratio_p
)
4055 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4057 if (s_offset
&& !offset_p
&& !symbol_present
)
4058 cost
+= add_cost (speed
, address_mode
);
4061 *may_autoinc
= autoinc
;
4063 acost
= data
->ainc_costs
[autoinc_type
];
4065 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4066 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4067 return new_cost (cost
+ acost
, complexity
);
4070 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4071 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4072 calculating the operands of EXPR. Returns true if successful, and returns
4073 the cost in COST. */
4076 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4077 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4080 tree op1
= TREE_OPERAND (expr
, 1);
4081 tree cst
= TREE_OPERAND (mult
, 1);
4082 tree multop
= TREE_OPERAND (mult
, 0);
4083 int m
= exact_log2 (int_cst_value (cst
));
4084 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4085 int as_cost
, sa_cost
;
4088 if (!(m
>= 0 && m
< maxm
))
4092 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4094 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4096 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4097 use that in preference to a shift insn followed by an add insn. */
4098 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4099 ? shiftadd_cost (speed
, mode
, m
)
4101 ? shiftsub1_cost (speed
, mode
, m
)
4102 : shiftsub0_cost (speed
, mode
, m
)));
4104 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
4105 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
4107 STRIP_NOPS (multop
);
4108 if (!is_gimple_val (multop
))
4109 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
4115 /* Estimates cost of forcing expression EXPR into a variable. */
4118 force_expr_to_var_cost (tree expr
, bool speed
)
4120 static bool costs_initialized
= false;
4121 static unsigned integer_cost
[2];
4122 static unsigned symbol_cost
[2];
4123 static unsigned address_cost
[2];
4125 comp_cost cost0
, cost1
, cost
;
4128 if (!costs_initialized
)
4130 tree type
= build_pointer_type (integer_type_node
);
4135 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4136 TREE_STATIC (var
) = 1;
4137 x
= produce_memory_decl_rtl (var
, NULL
);
4138 SET_DECL_RTL (var
, x
);
4140 addr
= build1 (ADDR_EXPR
, type
, var
);
4143 for (i
= 0; i
< 2; i
++)
4145 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4148 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4151 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4154 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4155 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4156 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4157 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4158 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4159 fprintf (dump_file
, "\n");
4163 costs_initialized
= true;
4168 if (SSA_VAR_P (expr
))
4171 if (is_gimple_min_invariant (expr
))
4173 if (TREE_CODE (expr
) == INTEGER_CST
)
4174 return new_cost (integer_cost
[speed
], 0);
4176 if (TREE_CODE (expr
) == ADDR_EXPR
)
4178 tree obj
= TREE_OPERAND (expr
, 0);
4180 if (TREE_CODE (obj
) == VAR_DECL
4181 || TREE_CODE (obj
) == PARM_DECL
4182 || TREE_CODE (obj
) == RESULT_DECL
)
4183 return new_cost (symbol_cost
[speed
], 0);
4186 return new_cost (address_cost
[speed
], 0);
4189 switch (TREE_CODE (expr
))
4191 case POINTER_PLUS_EXPR
:
4195 op0
= TREE_OPERAND (expr
, 0);
4196 op1
= TREE_OPERAND (expr
, 1);
4203 op0
= TREE_OPERAND (expr
, 0);
4209 /* Just an arbitrary value, FIXME. */
4210 return new_cost (target_spill_cost
[speed
], 0);
4213 if (op0
== NULL_TREE
4214 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4217 cost0
= force_expr_to_var_cost (op0
, speed
);
4219 if (op1
== NULL_TREE
4220 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4223 cost1
= force_expr_to_var_cost (op1
, speed
);
4225 mode
= TYPE_MODE (TREE_TYPE (expr
));
4226 switch (TREE_CODE (expr
))
4228 case POINTER_PLUS_EXPR
:
4232 cost
= new_cost (add_cost (speed
, mode
), 0);
4233 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4235 tree mult
= NULL_TREE
;
4237 if (TREE_CODE (op1
) == MULT_EXPR
)
4239 else if (TREE_CODE (op0
) == MULT_EXPR
)
4242 if (mult
!= NULL_TREE
4243 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4244 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4252 tree inner_mode
, outer_mode
;
4253 outer_mode
= TREE_TYPE (expr
);
4254 inner_mode
= TREE_TYPE (op0
);
4255 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4256 TYPE_MODE (inner_mode
), speed
), 0);
4261 if (cst_and_fits_in_hwi (op0
))
4262 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4264 else if (cst_and_fits_in_hwi (op1
))
4265 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4268 return new_cost (target_spill_cost
[speed
], 0);
4275 cost
= add_costs (cost
, cost0
);
4276 cost
= add_costs (cost
, cost1
);
4278 /* Bound the cost by target_spill_cost. The parts of complicated
4279 computations often are either loop invariant or at least can
4280 be shared between several iv uses, so letting this grow without
4281 limits would not give reasonable results. */
4282 if (cost
.cost
> (int) target_spill_cost
[speed
])
4283 cost
.cost
= target_spill_cost
[speed
];
4288 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4289 invariants the computation depends on. */
4292 force_var_cost (struct ivopts_data
*data
,
4293 tree expr
, bitmap
*depends_on
)
4297 fd_ivopts_data
= data
;
4298 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4301 return force_expr_to_var_cost (expr
, data
->speed
);
4304 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4305 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4306 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4307 invariants the computation depends on. */
4310 split_address_cost (struct ivopts_data
*data
,
4311 tree addr
, bool *symbol_present
, bool *var_present
,
4312 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4315 HOST_WIDE_INT bitsize
;
4316 HOST_WIDE_INT bitpos
;
4319 int unsignedp
, volatilep
;
4321 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4322 &unsignedp
, &volatilep
, false);
4325 || bitpos
% BITS_PER_UNIT
!= 0
4326 || TREE_CODE (core
) != VAR_DECL
)
4328 *symbol_present
= false;
4329 *var_present
= true;
4330 fd_ivopts_data
= data
;
4331 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4332 return new_cost (target_spill_cost
[data
->speed
], 0);
4335 *offset
+= bitpos
/ BITS_PER_UNIT
;
4336 if (TREE_STATIC (core
)
4337 || DECL_EXTERNAL (core
))
4339 *symbol_present
= true;
4340 *var_present
= false;
4344 *symbol_present
= false;
4345 *var_present
= true;
4349 /* Estimates cost of expressing difference of addresses E1 - E2 as
4350 var + symbol + offset. The value of offset is added to OFFSET,
4351 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4352 part is missing. DEPENDS_ON is a set of the invariants the computation
4356 ptr_difference_cost (struct ivopts_data
*data
,
4357 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4358 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4360 HOST_WIDE_INT diff
= 0;
4361 aff_tree aff_e1
, aff_e2
;
4364 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4366 if (ptr_difference_const (e1
, e2
, &diff
))
4369 *symbol_present
= false;
4370 *var_present
= false;
4374 if (integer_zerop (e2
))
4375 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4376 symbol_present
, var_present
, offset
, depends_on
);
4378 *symbol_present
= false;
4379 *var_present
= true;
4381 type
= signed_type_for (TREE_TYPE (e1
));
4382 tree_to_aff_combination (e1
, type
, &aff_e1
);
4383 tree_to_aff_combination (e2
, type
, &aff_e2
);
4384 aff_combination_scale (&aff_e2
, -1);
4385 aff_combination_add (&aff_e1
, &aff_e2
);
4387 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4390 /* Estimates cost of expressing difference E1 - E2 as
4391 var + symbol + offset. The value of offset is added to OFFSET,
4392 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4393 part is missing. DEPENDS_ON is a set of the invariants the computation
4397 difference_cost (struct ivopts_data
*data
,
4398 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4399 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4401 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4402 unsigned HOST_WIDE_INT off1
, off2
;
4403 aff_tree aff_e1
, aff_e2
;
4406 e1
= strip_offset (e1
, &off1
);
4407 e2
= strip_offset (e2
, &off2
);
4408 *offset
+= off1
- off2
;
4413 if (TREE_CODE (e1
) == ADDR_EXPR
)
4414 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4415 offset
, depends_on
);
4416 *symbol_present
= false;
4418 if (operand_equal_p (e1
, e2
, 0))
4420 *var_present
= false;
4424 *var_present
= true;
4426 if (integer_zerop (e2
))
4427 return force_var_cost (data
, e1
, depends_on
);
4429 if (integer_zerop (e1
))
4431 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4432 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4436 type
= signed_type_for (TREE_TYPE (e1
));
4437 tree_to_aff_combination (e1
, type
, &aff_e1
);
4438 tree_to_aff_combination (e2
, type
, &aff_e2
);
4439 aff_combination_scale (&aff_e2
, -1);
4440 aff_combination_add (&aff_e1
, &aff_e2
);
4442 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4445 /* Returns true if AFF1 and AFF2 are identical. */
4448 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4452 if (aff1
->n
!= aff2
->n
)
4455 for (i
= 0; i
< aff1
->n
; i
++)
4457 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4460 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4466 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4469 get_expr_id (struct ivopts_data
*data
, tree expr
)
4471 struct iv_inv_expr_ent ent
;
4472 struct iv_inv_expr_ent
**slot
;
4475 ent
.hash
= iterative_hash_expr (expr
, 0);
4476 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4480 *slot
= XNEW (struct iv_inv_expr_ent
);
4481 (*slot
)->expr
= expr
;
4482 (*slot
)->hash
= ent
.hash
;
4483 (*slot
)->id
= data
->inv_expr_id
++;
4487 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4488 requires a new compiler generated temporary. Returns -1 otherwise.
4489 ADDRESS_P is a flag indicating if the expression is for address
4493 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4494 tree cbase
, HOST_WIDE_INT ratio
,
4497 aff_tree ubase_aff
, cbase_aff
;
4505 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4506 && (TREE_CODE (cbase
) == INTEGER_CST
))
4509 /* Strips the constant part. */
4510 if (TREE_CODE (ubase
) == PLUS_EXPR
4511 || TREE_CODE (ubase
) == MINUS_EXPR
4512 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4514 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4515 ubase
= TREE_OPERAND (ubase
, 0);
4518 /* Strips the constant part. */
4519 if (TREE_CODE (cbase
) == PLUS_EXPR
4520 || TREE_CODE (cbase
) == MINUS_EXPR
4521 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4523 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4524 cbase
= TREE_OPERAND (cbase
, 0);
4529 if (((TREE_CODE (ubase
) == SSA_NAME
)
4530 || (TREE_CODE (ubase
) == ADDR_EXPR
4531 && is_gimple_min_invariant (ubase
)))
4532 && (TREE_CODE (cbase
) == INTEGER_CST
))
4535 if (((TREE_CODE (cbase
) == SSA_NAME
)
4536 || (TREE_CODE (cbase
) == ADDR_EXPR
4537 && is_gimple_min_invariant (cbase
)))
4538 && (TREE_CODE (ubase
) == INTEGER_CST
))
4544 if (operand_equal_p (ubase
, cbase
, 0))
4547 if (TREE_CODE (ubase
) == ADDR_EXPR
4548 && TREE_CODE (cbase
) == ADDR_EXPR
)
4552 usym
= TREE_OPERAND (ubase
, 0);
4553 csym
= TREE_OPERAND (cbase
, 0);
4554 if (TREE_CODE (usym
) == ARRAY_REF
)
4556 tree ind
= TREE_OPERAND (usym
, 1);
4557 if (TREE_CODE (ind
) == INTEGER_CST
4558 && tree_fits_shwi_p (ind
)
4559 && tree_to_shwi (ind
) == 0)
4560 usym
= TREE_OPERAND (usym
, 0);
4562 if (TREE_CODE (csym
) == ARRAY_REF
)
4564 tree ind
= TREE_OPERAND (csym
, 1);
4565 if (TREE_CODE (ind
) == INTEGER_CST
4566 && tree_fits_shwi_p (ind
)
4567 && tree_to_shwi (ind
) == 0)
4568 csym
= TREE_OPERAND (csym
, 0);
4570 if (operand_equal_p (usym
, csym
, 0))
4573 /* Now do more complex comparison */
4574 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4575 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4576 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4580 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4581 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4583 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4584 aff_combination_add (&ubase_aff
, &cbase_aff
);
4585 expr
= aff_combination_to_tree (&ubase_aff
);
4586 return get_expr_id (data
, expr
);
4591 /* Determines the cost of the computation by that USE is expressed
4592 from induction variable CAND. If ADDRESS_P is true, we just need
4593 to create an address from it, otherwise we want to get it into
4594 register. A set of invariants we depend on is stored in
4595 DEPENDS_ON. AT is the statement at that the value is computed.
4596 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4597 addressing is likely. */
4600 get_computation_cost_at (struct ivopts_data
*data
,
4601 struct iv_use
*use
, struct iv_cand
*cand
,
4602 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4606 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4608 tree utype
= TREE_TYPE (ubase
), ctype
;
4609 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4610 HOST_WIDE_INT ratio
, aratio
;
4611 bool var_present
, symbol_present
, stmt_is_after_inc
;
4614 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4615 machine_mode mem_mode
= (address_p
4616 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4621 /* Only consider real candidates. */
4623 return infinite_cost
;
4625 cbase
= cand
->iv
->base
;
4626 cstep
= cand
->iv
->step
;
4627 ctype
= TREE_TYPE (cbase
);
4629 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4631 /* We do not have a precision to express the values of use. */
4632 return infinite_cost
;
4636 || (use
->iv
->base_object
4637 && cand
->iv
->base_object
4638 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4639 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4641 /* Do not try to express address of an object with computation based
4642 on address of a different object. This may cause problems in rtl
4643 level alias analysis (that does not expect this to be happening,
4644 as this is illegal in C), and would be unlikely to be useful
4646 if (use
->iv
->base_object
4647 && cand
->iv
->base_object
4648 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4649 return infinite_cost
;
4652 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4654 /* TODO -- add direct handling of this case. */
4658 /* CSTEPI is removed from the offset in case statement is after the
4659 increment. If the step is not constant, we use zero instead.
4660 This is a bit imprecise (there is the extra addition), but
4661 redundancy elimination is likely to transform the code so that
4662 it uses value of the variable before increment anyway,
4663 so it is not that much unrealistic. */
4664 if (cst_and_fits_in_hwi (cstep
))
4665 cstepi
= int_cst_value (cstep
);
4669 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4670 return infinite_cost
;
4672 if (wi::fits_shwi_p (rat
))
4673 ratio
= rat
.to_shwi ();
4675 return infinite_cost
;
4678 ctype
= TREE_TYPE (cbase
);
4680 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4682 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4683 or ratio == 1, it is better to handle this like
4685 ubase - ratio * cbase + ratio * var
4687 (also holds in the case ratio == -1, TODO. */
4689 if (cst_and_fits_in_hwi (cbase
))
4691 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4692 cost
= difference_cost (data
,
4693 ubase
, build_int_cst (utype
, 0),
4694 &symbol_present
, &var_present
, &offset
,
4696 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4698 else if (ratio
== 1)
4700 tree real_cbase
= cbase
;
4702 /* Check to see if any adjustment is needed. */
4703 if (cstepi
== 0 && stmt_is_after_inc
)
4705 aff_tree real_cbase_aff
;
4708 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4710 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4712 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4713 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4716 cost
= difference_cost (data
,
4718 &symbol_present
, &var_present
, &offset
,
4720 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4723 && !POINTER_TYPE_P (ctype
)
4724 && multiplier_allowed_in_address_p
4726 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4728 if (cstepi
== 0 && stmt_is_after_inc
)
4730 if (POINTER_TYPE_P (ctype
))
4731 cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4733 cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4736 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4737 cost
= difference_cost (data
,
4739 &symbol_present
, &var_present
, &offset
,
4741 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4745 cost
= force_var_cost (data
, cbase
, depends_on
);
4746 cost
= add_costs (cost
,
4747 difference_cost (data
,
4748 ubase
, build_int_cst (utype
, 0),
4749 &symbol_present
, &var_present
,
4750 &offset
, depends_on
));
4751 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4752 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4755 /* Set of invariants depended on by sub use has already been computed
4756 for the first use in the group. */
4760 if (depends_on
&& *depends_on
)
4761 bitmap_clear (*depends_on
);
4763 else if (inv_expr_id
)
4766 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4767 /* Clear depends on. */
4768 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4769 bitmap_clear (*depends_on
);
4772 /* If we are after the increment, the value of the candidate is higher by
4774 if (stmt_is_after_inc
)
4775 offset
-= ratio
* cstepi
;
4777 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4778 (symbol/var1/const parts may be omitted). If we are looking for an
4779 address, find the cost of addressing this. */
4781 return add_costs (cost
,
4782 get_address_cost (symbol_present
, var_present
,
4783 offset
, ratio
, cstepi
,
4785 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4786 speed
, stmt_is_after_inc
,
4789 /* Otherwise estimate the costs for computing the expression. */
4790 if (!symbol_present
&& !var_present
&& !offset
)
4793 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4797 /* Symbol + offset should be compile-time computable so consider that they
4798 are added once to the variable, if present. */
4799 if (var_present
&& (symbol_present
|| offset
))
4800 cost
.cost
+= adjust_setup_cost (data
,
4801 add_cost (speed
, TYPE_MODE (ctype
)));
4803 /* Having offset does not affect runtime cost in case it is added to
4804 symbol, but it increases complexity. */
4808 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4810 aratio
= ratio
> 0 ? ratio
: -ratio
;
4812 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4817 *can_autoinc
= false;
4820 /* Just get the expression, expand it and measure the cost. */
4821 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4824 return infinite_cost
;
4827 comp
= build_simple_mem_ref (comp
);
4829 return new_cost (computation_cost (comp
, speed
), 0);
4833 /* Determines the cost of the computation by that USE is expressed
4834 from induction variable CAND. If ADDRESS_P is true, we just need
4835 to create an address from it, otherwise we want to get it into
4836 register. A set of invariants we depend on is stored in
4837 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4838 autoinc addressing is likely. */
4841 get_computation_cost (struct ivopts_data
*data
,
4842 struct iv_use
*use
, struct iv_cand
*cand
,
4843 bool address_p
, bitmap
*depends_on
,
4844 bool *can_autoinc
, int *inv_expr_id
)
4846 return get_computation_cost_at (data
,
4847 use
, cand
, address_p
, depends_on
, use
->stmt
,
4848 can_autoinc
, inv_expr_id
);
4851 /* Determines cost of basing replacement of USE on CAND in a generic
4855 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4856 struct iv_use
*use
, struct iv_cand
*cand
)
4860 int inv_expr_id
= -1;
4862 /* The simple case first -- if we need to express value of the preserved
4863 original biv, the cost is 0. This also prevents us from counting the
4864 cost of increment twice -- once at this use and once in the cost of
4866 if (cand
->pos
== IP_ORIGINAL
4867 && cand
->incremented_at
== use
->stmt
)
4869 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4874 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4875 NULL
, &inv_expr_id
);
4877 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4880 return !infinite_cost_p (cost
);
4883 /* Determines cost of basing replacement of USE on CAND in an address. */
4886 determine_use_iv_cost_address (struct ivopts_data
*data
,
4887 struct iv_use
*use
, struct iv_cand
*cand
)
4891 int inv_expr_id
= -1;
4892 struct iv_use
*sub_use
;
4894 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4895 &can_autoinc
, &inv_expr_id
);
4897 if (cand
->ainc_use
== use
)
4900 cost
.cost
-= cand
->cost_step
;
4901 /* If we generated the candidate solely for exploiting autoincrement
4902 opportunities, and it turns out it can't be used, set the cost to
4903 infinity to make sure we ignore it. */
4904 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4905 cost
= infinite_cost
;
4907 for (sub_use
= use
->next
;
4908 sub_use
&& !infinite_cost_p (cost
);
4909 sub_use
= sub_use
->next
)
4911 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4912 &can_autoinc
, &inv_expr_id
);
4913 cost
= add_costs (cost
, sub_cost
);
4916 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4919 return !infinite_cost_p (cost
);
4922 /* Computes value of candidate CAND at position AT in iteration NITER, and
4923 stores it to VAL. */
4926 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4929 aff_tree step
, delta
, nit
;
4930 struct iv
*iv
= cand
->iv
;
4931 tree type
= TREE_TYPE (iv
->base
);
4932 tree steptype
= type
;
4933 if (POINTER_TYPE_P (type
))
4934 steptype
= sizetype
;
4935 steptype
= unsigned_type_for (type
);
4937 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4938 aff_combination_convert (&step
, steptype
);
4939 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4940 aff_combination_convert (&nit
, steptype
);
4941 aff_combination_mult (&nit
, &step
, &delta
);
4942 if (stmt_after_increment (loop
, cand
, at
))
4943 aff_combination_add (&delta
, &step
);
4945 tree_to_aff_combination (iv
->base
, type
, val
);
4946 if (!POINTER_TYPE_P (type
))
4947 aff_combination_convert (val
, steptype
);
4948 aff_combination_add (val
, &delta
);
4951 /* Returns period of induction variable iv. */
4954 iv_period (struct iv
*iv
)
4956 tree step
= iv
->step
, period
, type
;
4959 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4961 type
= unsigned_type_for (TREE_TYPE (step
));
4962 /* Period of the iv is lcm (step, type_range)/step -1,
4963 i.e., N*type_range/step - 1. Since type range is power
4964 of two, N == (step >> num_of_ending_zeros_binary (step),
4965 so the final result is
4967 (type_range >> num_of_ending_zeros_binary (step)) - 1
4970 pow2div
= num_ending_zeros (step
);
4972 period
= build_low_bits_mask (type
,
4973 (TYPE_PRECISION (type
)
4974 - tree_to_uhwi (pow2div
)));
4979 /* Returns the comparison operator used when eliminating the iv USE. */
4981 static enum tree_code
4982 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4984 struct loop
*loop
= data
->current_loop
;
4988 ex_bb
= gimple_bb (use
->stmt
);
4989 exit
= EDGE_SUCC (ex_bb
, 0);
4990 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4991 exit
= EDGE_SUCC (ex_bb
, 1);
4993 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4996 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4997 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4998 calculation is performed in non-wrapping type.
5000 TODO: More generally, we could test for the situation that
5001 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5002 This would require knowing the sign of OFFSET. */
5005 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5007 enum tree_code code
;
5009 aff_tree aff_e1
, aff_e2
, aff_offset
;
5011 if (!nowrap_type_p (TREE_TYPE (base
)))
5014 base
= expand_simple_operations (base
);
5016 if (TREE_CODE (base
) == SSA_NAME
)
5018 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5020 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5023 code
= gimple_assign_rhs_code (stmt
);
5024 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5027 e1
= gimple_assign_rhs1 (stmt
);
5028 e2
= gimple_assign_rhs2 (stmt
);
5032 code
= TREE_CODE (base
);
5033 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5035 e1
= TREE_OPERAND (base
, 0);
5036 e2
= TREE_OPERAND (base
, 1);
5039 /* Use affine expansion as deeper inspection to prove the equality. */
5040 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5041 &aff_e2
, &data
->name_expansion_cache
);
5042 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5043 &aff_offset
, &data
->name_expansion_cache
);
5044 aff_combination_scale (&aff_offset
, -1);
5048 aff_combination_add (&aff_e2
, &aff_offset
);
5049 if (aff_combination_zero_p (&aff_e2
))
5052 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5053 &aff_e1
, &data
->name_expansion_cache
);
5054 aff_combination_add (&aff_e1
, &aff_offset
);
5055 return aff_combination_zero_p (&aff_e1
);
5057 case POINTER_PLUS_EXPR
:
5058 aff_combination_add (&aff_e2
, &aff_offset
);
5059 return aff_combination_zero_p (&aff_e2
);
5066 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5067 comparison with CAND. NITER describes the number of iterations of
5068 the loops. If successful, the comparison in COMP_P is altered accordingly.
5070 We aim to handle the following situation:
5086 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5087 We aim to optimize this to
5095 while (p < p_0 - a + b);
5097 This preserves the correctness, since the pointer arithmetics does not
5098 overflow. More precisely:
5100 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5101 overflow in computing it or the values of p.
5102 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5103 overflow. To prove this, we use the fact that p_0 = base + a. */
5106 iv_elimination_compare_lt (struct ivopts_data
*data
,
5107 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5108 struct tree_niter_desc
*niter
)
5110 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5111 struct aff_tree nit
, tmpa
, tmpb
;
5112 enum tree_code comp
;
5115 /* We need to know that the candidate induction variable does not overflow.
5116 While more complex analysis may be used to prove this, for now just
5117 check that the variable appears in the original program and that it
5118 is computed in a type that guarantees no overflows. */
5119 cand_type
= TREE_TYPE (cand
->iv
->base
);
5120 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5123 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5124 the calculation of the BOUND could overflow, making the comparison
5126 if (!data
->loop_single_exit_p
)
5129 /* We need to be able to decide whether candidate is increasing or decreasing
5130 in order to choose the right comparison operator. */
5131 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5133 step
= int_cst_value (cand
->iv
->step
);
5135 /* Check that the number of iterations matches the expected pattern:
5136 a + 1 > b ? 0 : b - a - 1. */
5137 mbz
= niter
->may_be_zero
;
5138 if (TREE_CODE (mbz
) == GT_EXPR
)
5140 /* Handle a + 1 > b. */
5141 tree op0
= TREE_OPERAND (mbz
, 0);
5142 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5144 a
= TREE_OPERAND (op0
, 0);
5145 b
= TREE_OPERAND (mbz
, 1);
5150 else if (TREE_CODE (mbz
) == LT_EXPR
)
5152 tree op1
= TREE_OPERAND (mbz
, 1);
5154 /* Handle b < a + 1. */
5155 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5157 a
= TREE_OPERAND (op1
, 0);
5158 b
= TREE_OPERAND (mbz
, 0);
5166 /* Expected number of iterations is B - A - 1. Check that it matches
5167 the actual number, i.e., that B - A - NITER = 1. */
5168 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5169 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5170 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5171 aff_combination_scale (&nit
, -1);
5172 aff_combination_scale (&tmpa
, -1);
5173 aff_combination_add (&tmpb
, &tmpa
);
5174 aff_combination_add (&tmpb
, &nit
);
5175 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5178 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5180 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5182 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5183 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5186 /* Determine the new comparison operator. */
5187 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5188 if (*comp_p
== NE_EXPR
)
5190 else if (*comp_p
== EQ_EXPR
)
5191 *comp_p
= invert_tree_comparison (comp
, false);
5198 /* Check whether it is possible to express the condition in USE by comparison
5199 of candidate CAND. If so, store the value compared with to BOUND, and the
5200 comparison operator to COMP. */
5203 may_eliminate_iv (struct ivopts_data
*data
,
5204 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5205 enum tree_code
*comp
)
5210 struct loop
*loop
= data
->current_loop
;
5212 struct tree_niter_desc
*desc
= NULL
;
5214 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5217 /* For now works only for exits that dominate the loop latch.
5218 TODO: extend to other conditions inside loop body. */
5219 ex_bb
= gimple_bb (use
->stmt
);
5220 if (use
->stmt
!= last_stmt (ex_bb
)
5221 || gimple_code (use
->stmt
) != GIMPLE_COND
5222 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5225 exit
= EDGE_SUCC (ex_bb
, 0);
5226 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5227 exit
= EDGE_SUCC (ex_bb
, 1);
5228 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5231 desc
= niter_for_exit (data
, exit
);
5235 /* Determine whether we can use the variable to test the exit condition.
5236 This is the case iff the period of the induction variable is greater
5237 than the number of iterations for which the exit condition is true. */
5238 period
= iv_period (cand
->iv
);
5240 /* If the number of iterations is constant, compare against it directly. */
5241 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5243 /* See cand_value_at. */
5244 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5246 if (!tree_int_cst_lt (desc
->niter
, period
))
5251 if (tree_int_cst_lt (period
, desc
->niter
))
5256 /* If not, and if this is the only possible exit of the loop, see whether
5257 we can get a conservative estimate on the number of iterations of the
5258 entire loop and compare against that instead. */
5261 widest_int period_value
, max_niter
;
5263 max_niter
= desc
->max
;
5264 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5266 period_value
= wi::to_widest (period
);
5267 if (wi::gtu_p (max_niter
, period_value
))
5269 /* See if we can take advantage of inferred loop bound information. */
5270 if (data
->loop_single_exit_p
)
5272 if (!max_loop_iterations (loop
, &max_niter
))
5274 /* The loop bound is already adjusted by adding 1. */
5275 if (wi::gtu_p (max_niter
, period_value
))
5283 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5285 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5286 aff_combination_to_tree (&bnd
));
5287 *comp
= iv_elimination_compare (data
, use
);
5289 /* It is unlikely that computing the number of iterations using division
5290 would be more profitable than keeping the original induction variable. */
5291 if (expression_expensive_p (*bound
))
5294 /* Sometimes, it is possible to handle the situation that the number of
5295 iterations may be zero unless additional assumtions by using <
5296 instead of != in the exit condition.
5298 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5299 base the exit condition on it. However, that is often too
5301 if (!integer_zerop (desc
->may_be_zero
))
5302 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5307 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5308 be copied, if it is used in the loop body and DATA->body_includes_call. */
5311 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5313 tree sbound
= bound
;
5314 STRIP_NOPS (sbound
);
5316 if (TREE_CODE (sbound
) == SSA_NAME
5317 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5318 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5319 && data
->body_includes_call
)
5320 return COSTS_N_INSNS (1);
5325 /* Determines cost of basing replacement of USE on CAND in a condition. */
5328 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5329 struct iv_use
*use
, struct iv_cand
*cand
)
5331 tree bound
= NULL_TREE
;
5333 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5334 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5336 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5337 tree
*control_var
, *bound_cst
;
5338 enum tree_code comp
= ERROR_MARK
;
5340 /* Only consider real candidates. */
5343 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5348 /* Try iv elimination. */
5349 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5351 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5352 if (elim_cost
.cost
== 0)
5353 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5354 else if (TREE_CODE (bound
) == INTEGER_CST
)
5356 /* If we replace a loop condition 'i < n' with 'p < base + n',
5357 depends_on_elim will have 'base' and 'n' set, which implies
5358 that both 'base' and 'n' will be live during the loop. More likely,
5359 'base + n' will be loop invariant, resulting in only one live value
5360 during the loop. So in that case we clear depends_on_elim and set
5361 elim_inv_expr_id instead. */
5362 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5364 elim_inv_expr_id
= get_expr_id (data
, bound
);
5365 bitmap_clear (depends_on_elim
);
5367 /* The bound is a loop invariant, so it will be only computed
5369 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5372 elim_cost
= infinite_cost
;
5374 /* Try expressing the original giv. If it is compared with an invariant,
5375 note that we cannot get rid of it. */
5376 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5380 /* When the condition is a comparison of the candidate IV against
5381 zero, prefer this IV.
5383 TODO: The constant that we're subtracting from the cost should
5384 be target-dependent. This information should be added to the
5385 target costs for each backend. */
5386 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5387 && integer_zerop (*bound_cst
)
5388 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5389 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5390 elim_cost
.cost
-= 1;
5392 express_cost
= get_computation_cost (data
, use
, cand
, false,
5393 &depends_on_express
, NULL
,
5394 &express_inv_expr_id
);
5395 fd_ivopts_data
= data
;
5396 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5398 /* Count the cost of the original bound as well. */
5399 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5400 if (bound_cost
.cost
== 0)
5401 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5402 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5403 bound_cost
.cost
= 0;
5404 express_cost
.cost
+= bound_cost
.cost
;
5406 /* Choose the better approach, preferring the eliminated IV. */
5407 if (compare_costs (elim_cost
, express_cost
) <= 0)
5410 depends_on
= depends_on_elim
;
5411 depends_on_elim
= NULL
;
5412 inv_expr_id
= elim_inv_expr_id
;
5416 cost
= express_cost
;
5417 depends_on
= depends_on_express
;
5418 depends_on_express
= NULL
;
5421 inv_expr_id
= express_inv_expr_id
;
5424 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5426 if (depends_on_elim
)
5427 BITMAP_FREE (depends_on_elim
);
5428 if (depends_on_express
)
5429 BITMAP_FREE (depends_on_express
);
5431 return !infinite_cost_p (cost
);
5434 /* Determines cost of basing replacement of USE on CAND. Returns false
5435 if USE cannot be based on CAND. */
5438 determine_use_iv_cost (struct ivopts_data
*data
,
5439 struct iv_use
*use
, struct iv_cand
*cand
)
5443 case USE_NONLINEAR_EXPR
:
5444 return determine_use_iv_cost_generic (data
, use
, cand
);
5447 return determine_use_iv_cost_address (data
, use
, cand
);
5450 return determine_use_iv_cost_condition (data
, use
, cand
);
5457 /* Return true if get_computation_cost indicates that autoincrement is
5458 a possibility for the pair of USE and CAND, false otherwise. */
5461 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5462 struct iv_cand
*cand
)
5468 if (use
->type
!= USE_ADDRESS
)
5471 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5472 &can_autoinc
, NULL
);
5474 BITMAP_FREE (depends_on
);
5476 return !infinite_cost_p (cost
) && can_autoinc
;
5479 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5480 use that allows autoincrement, and set their AINC_USE if possible. */
5483 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5487 for (i
= 0; i
< n_iv_cands (data
); i
++)
5489 struct iv_cand
*cand
= iv_cand (data
, i
);
5490 struct iv_use
*closest_before
= NULL
;
5491 struct iv_use
*closest_after
= NULL
;
5492 if (cand
->pos
!= IP_ORIGINAL
)
5495 for (j
= 0; j
< n_iv_uses (data
); j
++)
5497 struct iv_use
*use
= iv_use (data
, j
);
5498 unsigned uid
= gimple_uid (use
->stmt
);
5500 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5503 if (uid
< gimple_uid (cand
->incremented_at
)
5504 && (closest_before
== NULL
5505 || uid
> gimple_uid (closest_before
->stmt
)))
5506 closest_before
= use
;
5508 if (uid
> gimple_uid (cand
->incremented_at
)
5509 && (closest_after
== NULL
5510 || uid
< gimple_uid (closest_after
->stmt
)))
5511 closest_after
= use
;
5514 if (closest_before
!= NULL
5515 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5516 cand
->ainc_use
= closest_before
;
5517 else if (closest_after
!= NULL
5518 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5519 cand
->ainc_use
= closest_after
;
5523 /* Finds the candidates for the induction variables. */
5526 find_iv_candidates (struct ivopts_data
*data
)
5528 /* Add commonly used ivs. */
5529 add_standard_iv_candidates (data
);
5531 /* Add old induction variables. */
5532 add_iv_candidate_for_bivs (data
);
5534 /* Add induction variables derived from uses. */
5535 add_iv_candidate_for_uses (data
);
5537 set_autoinc_for_original_candidates (data
);
5539 /* Record the important candidates. */
5540 record_important_candidates (data
);
5543 /* Determines costs of basing the use of the iv on an iv candidate. */
5546 determine_use_iv_costs (struct ivopts_data
*data
)
5550 struct iv_cand
*cand
;
5551 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5553 alloc_use_cost_map (data
);
5555 for (i
= 0; i
< n_iv_uses (data
); i
++)
5557 use
= iv_use (data
, i
);
5559 if (data
->consider_all_candidates
)
5561 for (j
= 0; j
< n_iv_cands (data
); j
++)
5563 cand
= iv_cand (data
, j
);
5564 determine_use_iv_cost (data
, use
, cand
);
5571 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5573 cand
= iv_cand (data
, j
);
5574 if (!determine_use_iv_cost (data
, use
, cand
))
5575 bitmap_set_bit (to_clear
, j
);
5578 /* Remove the candidates for that the cost is infinite from
5579 the list of related candidates. */
5580 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5581 bitmap_clear (to_clear
);
5585 BITMAP_FREE (to_clear
);
5587 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5589 fprintf (dump_file
, "Use-candidate costs:\n");
5591 for (i
= 0; i
< n_iv_uses (data
); i
++)
5593 use
= iv_use (data
, i
);
5595 fprintf (dump_file
, "Use %d:\n", i
);
5596 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5597 for (j
= 0; j
< use
->n_map_members
; j
++)
5599 if (!use
->cost_map
[j
].cand
5600 || infinite_cost_p (use
->cost_map
[j
].cost
))
5603 fprintf (dump_file
, " %d\t%d\t%d\t",
5604 use
->cost_map
[j
].cand
->id
,
5605 use
->cost_map
[j
].cost
.cost
,
5606 use
->cost_map
[j
].cost
.complexity
);
5607 if (use
->cost_map
[j
].depends_on
)
5608 bitmap_print (dump_file
,
5609 use
->cost_map
[j
].depends_on
, "","");
5610 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5611 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5612 fprintf (dump_file
, "\n");
5615 fprintf (dump_file
, "\n");
5617 fprintf (dump_file
, "\n");
5621 /* Determines cost of the candidate CAND. */
5624 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5626 comp_cost cost_base
;
5627 unsigned cost
, cost_step
;
5636 /* There are two costs associated with the candidate -- its increment
5637 and its initialization. The second is almost negligible for any loop
5638 that rolls enough, so we take it just very little into account. */
5640 base
= cand
->iv
->base
;
5641 cost_base
= force_var_cost (data
, base
, NULL
);
5642 /* It will be exceptional that the iv register happens to be initialized with
5643 the proper value at no cost. In general, there will at least be a regcopy
5645 if (cost_base
.cost
== 0)
5646 cost_base
.cost
= COSTS_N_INSNS (1);
5647 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5649 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5651 /* Prefer the original ivs unless we may gain something by replacing it.
5652 The reason is to make debugging simpler; so this is not relevant for
5653 artificial ivs created by other optimization passes. */
5654 if (cand
->pos
!= IP_ORIGINAL
5655 || !SSA_NAME_VAR (cand
->var_before
)
5656 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5659 /* Prefer not to insert statements into latch unless there are some
5660 already (so that we do not create unnecessary jumps). */
5661 if (cand
->pos
== IP_END
5662 && empty_block_p (ip_end_pos (data
->current_loop
)))
5666 cand
->cost_step
= cost_step
;
5669 /* Determines costs of computation of the candidates. */
5672 determine_iv_costs (struct ivopts_data
*data
)
5676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5678 fprintf (dump_file
, "Candidate costs:\n");
5679 fprintf (dump_file
, " cand\tcost\n");
5682 for (i
= 0; i
< n_iv_cands (data
); i
++)
5684 struct iv_cand
*cand
= iv_cand (data
, i
);
5686 determine_iv_cost (data
, cand
);
5688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5689 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5692 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5693 fprintf (dump_file
, "\n");
5696 /* Calculates cost for having SIZE induction variables. */
5699 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5701 /* We add size to the cost, so that we prefer eliminating ivs
5703 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5704 data
->body_includes_call
);
5707 /* For each size of the induction variable set determine the penalty. */
5710 determine_set_costs (struct ivopts_data
*data
)
5716 struct loop
*loop
= data
->current_loop
;
5719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5721 fprintf (dump_file
, "Global costs:\n");
5722 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5723 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5724 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5725 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5729 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5732 op
= PHI_RESULT (phi
);
5734 if (virtual_operand_p (op
))
5737 if (get_iv (data
, op
))
5743 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5745 struct version_info
*info
= ver_info (data
, j
);
5747 if (info
->inv_id
&& info
->has_nonlin_use
)
5751 data
->regs_used
= n
;
5752 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5753 fprintf (dump_file
, " regs_used %d\n", n
);
5755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5757 fprintf (dump_file
, " cost for size:\n");
5758 fprintf (dump_file
, " ivs\tcost\n");
5759 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5760 fprintf (dump_file
, " %d\t%d\n", j
,
5761 ivopts_global_cost_for_size (data
, j
));
5762 fprintf (dump_file
, "\n");
5766 /* Returns true if A is a cheaper cost pair than B. */
5769 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5779 cmp
= compare_costs (a
->cost
, b
->cost
);
5786 /* In case the costs are the same, prefer the cheaper candidate. */
5787 if (a
->cand
->cost
< b
->cand
->cost
)
5794 /* Returns candidate by that USE is expressed in IVS. */
5796 static struct cost_pair
*
5797 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5799 return ivs
->cand_for_use
[use
->id
];
5802 /* Computes the cost field of IVS structure. */
5805 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5807 comp_cost cost
= ivs
->cand_use_cost
;
5809 cost
.cost
+= ivs
->cand_cost
;
5811 cost
.cost
+= ivopts_global_cost_for_size (data
,
5812 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5817 /* Remove invariants in set INVS to set IVS. */
5820 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5828 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5830 ivs
->n_invariant_uses
[iid
]--;
5831 if (ivs
->n_invariant_uses
[iid
] == 0)
5836 /* Set USE not to be expressed by any candidate in IVS. */
5839 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5842 unsigned uid
= use
->id
, cid
;
5843 struct cost_pair
*cp
;
5845 cp
= ivs
->cand_for_use
[uid
];
5851 ivs
->cand_for_use
[uid
] = NULL
;
5852 ivs
->n_cand_uses
[cid
]--;
5854 if (ivs
->n_cand_uses
[cid
] == 0)
5856 bitmap_clear_bit (ivs
->cands
, cid
);
5857 /* Do not count the pseudocandidates. */
5861 ivs
->cand_cost
-= cp
->cand
->cost
;
5863 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5866 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5868 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5870 if (cp
->inv_expr_id
!= -1)
5872 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5873 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5874 ivs
->num_used_inv_expr
--;
5876 iv_ca_recount_cost (data
, ivs
);
5879 /* Add invariants in set INVS to set IVS. */
5882 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5890 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5892 ivs
->n_invariant_uses
[iid
]++;
5893 if (ivs
->n_invariant_uses
[iid
] == 1)
5898 /* Set cost pair for USE in set IVS to CP. */
5901 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5902 struct iv_use
*use
, struct cost_pair
*cp
)
5904 unsigned uid
= use
->id
, cid
;
5906 if (ivs
->cand_for_use
[uid
] == cp
)
5909 if (ivs
->cand_for_use
[uid
])
5910 iv_ca_set_no_cp (data
, ivs
, use
);
5917 ivs
->cand_for_use
[uid
] = cp
;
5918 ivs
->n_cand_uses
[cid
]++;
5919 if (ivs
->n_cand_uses
[cid
] == 1)
5921 bitmap_set_bit (ivs
->cands
, cid
);
5922 /* Do not count the pseudocandidates. */
5926 ivs
->cand_cost
+= cp
->cand
->cost
;
5928 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5931 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5932 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5934 if (cp
->inv_expr_id
!= -1)
5936 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5937 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5938 ivs
->num_used_inv_expr
++;
5940 iv_ca_recount_cost (data
, ivs
);
5944 /* Extend set IVS by expressing USE by some of the candidates in it
5945 if possible. Consider all important candidates if candidates in
5946 set IVS don't give any result. */
5949 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5952 struct cost_pair
*best_cp
= NULL
, *cp
;
5955 struct iv_cand
*cand
;
5957 gcc_assert (ivs
->upto
>= use
->id
);
5961 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5963 cand
= iv_cand (data
, i
);
5964 cp
= get_use_iv_cost (data
, use
, cand
);
5965 if (cheaper_cost_pair (cp
, best_cp
))
5969 if (best_cp
== NULL
)
5971 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5973 cand
= iv_cand (data
, i
);
5974 cp
= get_use_iv_cost (data
, use
, cand
);
5975 if (cheaper_cost_pair (cp
, best_cp
))
5980 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5983 /* Get cost for assignment IVS. */
5986 iv_ca_cost (struct iv_ca
*ivs
)
5988 /* This was a conditional expression but it triggered a bug in
5991 return infinite_cost
;
5996 /* Returns true if all dependences of CP are among invariants in IVS. */
5999 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6004 if (!cp
->depends_on
)
6007 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6009 if (ivs
->n_invariant_uses
[i
] == 0)
6016 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6017 it before NEXT_CHANGE. */
6019 static struct iv_ca_delta
*
6020 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
6021 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
6023 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6026 change
->old_cp
= old_cp
;
6027 change
->new_cp
= new_cp
;
6028 change
->next_change
= next_change
;
6033 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6036 static struct iv_ca_delta
*
6037 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6039 struct iv_ca_delta
*last
;
6047 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
6049 last
->next_change
= l2
;
6054 /* Reverse the list of changes DELTA, forming the inverse to it. */
6056 static struct iv_ca_delta
*
6057 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6059 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6061 for (act
= delta
; act
; act
= next
)
6063 next
= act
->next_change
;
6064 act
->next_change
= prev
;
6067 std::swap (act
->old_cp
, act
->new_cp
);
6073 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6074 reverted instead. */
6077 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6078 struct iv_ca_delta
*delta
, bool forward
)
6080 struct cost_pair
*from
, *to
;
6081 struct iv_ca_delta
*act
;
6084 delta
= iv_ca_delta_reverse (delta
);
6086 for (act
= delta
; act
; act
= act
->next_change
)
6090 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
6091 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
6095 iv_ca_delta_reverse (delta
);
6098 /* Returns true if CAND is used in IVS. */
6101 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6103 return ivs
->n_cand_uses
[cand
->id
] > 0;
6106 /* Returns number of induction variable candidates in the set IVS. */
6109 iv_ca_n_cands (struct iv_ca
*ivs
)
6111 return ivs
->n_cands
;
6114 /* Free the list of changes DELTA. */
6117 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6119 struct iv_ca_delta
*act
, *next
;
6121 for (act
= *delta
; act
; act
= next
)
6123 next
= act
->next_change
;
6130 /* Allocates new iv candidates assignment. */
6132 static struct iv_ca
*
6133 iv_ca_new (struct ivopts_data
*data
)
6135 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6139 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
6140 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
6141 nw
->cands
= BITMAP_ALLOC (NULL
);
6144 nw
->cand_use_cost
= no_cost
;
6146 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6148 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
6149 nw
->num_used_inv_expr
= 0;
6154 /* Free memory occupied by the set IVS. */
6157 iv_ca_free (struct iv_ca
**ivs
)
6159 free ((*ivs
)->cand_for_use
);
6160 free ((*ivs
)->n_cand_uses
);
6161 BITMAP_FREE ((*ivs
)->cands
);
6162 free ((*ivs
)->n_invariant_uses
);
6163 free ((*ivs
)->used_inv_expr
);
6168 /* Dumps IVS to FILE. */
6171 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6173 const char *pref
= " invariants ";
6175 comp_cost cost
= iv_ca_cost (ivs
);
6177 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6178 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6179 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6180 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6182 for (i
= 0; i
< ivs
->upto
; i
++)
6184 struct iv_use
*use
= iv_use (data
, i
);
6185 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6187 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6188 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6190 fprintf (file
, " use:%d --> ??\n", use
->id
);
6193 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6194 if (ivs
->n_invariant_uses
[i
])
6196 fprintf (file
, "%s%d", pref
, i
);
6199 fprintf (file
, "\n\n");
6202 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6203 new set, and store differences in DELTA. Number of induction variables
6204 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6205 the function will try to find a solution with mimimal iv candidates. */
6208 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6209 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6210 unsigned *n_ivs
, bool min_ncand
)
6215 struct cost_pair
*old_cp
, *new_cp
;
6218 for (i
= 0; i
< ivs
->upto
; i
++)
6220 use
= iv_use (data
, i
);
6221 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6224 && old_cp
->cand
== cand
)
6227 new_cp
= get_use_iv_cost (data
, use
, cand
);
6231 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6234 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6237 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6240 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6241 cost
= iv_ca_cost (ivs
);
6243 *n_ivs
= iv_ca_n_cands (ivs
);
6244 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6249 /* Try narrowing set IVS by removing CAND. Return the cost of
6250 the new set and store the differences in DELTA. START is
6251 the candidate with which we start narrowing. */
6254 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6255 struct iv_cand
*cand
, struct iv_cand
*start
,
6256 struct iv_ca_delta
**delta
)
6260 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6262 struct iv_cand
*cnd
;
6263 comp_cost cost
, best_cost
, acost
;
6266 for (i
= 0; i
< n_iv_uses (data
); i
++)
6268 use
= iv_use (data
, i
);
6270 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6271 if (old_cp
->cand
!= cand
)
6274 best_cost
= iv_ca_cost (ivs
);
6275 /* Start narrowing with START. */
6276 new_cp
= get_use_iv_cost (data
, use
, start
);
6278 if (data
->consider_all_candidates
)
6280 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6282 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6285 cnd
= iv_cand (data
, ci
);
6287 cp
= get_use_iv_cost (data
, use
, cnd
);
6291 iv_ca_set_cp (data
, ivs
, use
, cp
);
6292 acost
= iv_ca_cost (ivs
);
6294 if (compare_costs (acost
, best_cost
) < 0)
6303 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6305 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6308 cnd
= iv_cand (data
, ci
);
6310 cp
= get_use_iv_cost (data
, use
, cnd
);
6314 iv_ca_set_cp (data
, ivs
, use
, cp
);
6315 acost
= iv_ca_cost (ivs
);
6317 if (compare_costs (acost
, best_cost
) < 0)
6324 /* Restore to old cp for use. */
6325 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6329 iv_ca_delta_free (delta
);
6330 return infinite_cost
;
6333 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6336 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6337 cost
= iv_ca_cost (ivs
);
6338 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6343 /* Try optimizing the set of candidates IVS by removing candidates different
6344 from to EXCEPT_CAND from it. Return cost of the new set, and store
6345 differences in DELTA. */
6348 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6349 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6352 struct iv_ca_delta
*act_delta
, *best_delta
;
6354 comp_cost best_cost
, acost
;
6355 struct iv_cand
*cand
;
6358 best_cost
= iv_ca_cost (ivs
);
6360 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6362 cand
= iv_cand (data
, i
);
6364 if (cand
== except_cand
)
6367 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6369 if (compare_costs (acost
, best_cost
) < 0)
6372 iv_ca_delta_free (&best_delta
);
6373 best_delta
= act_delta
;
6376 iv_ca_delta_free (&act_delta
);
6385 /* Recurse to possibly remove other unnecessary ivs. */
6386 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6387 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6388 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6389 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6393 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6394 cheaper local cost for USE than BEST_CP. Return pointer to
6395 the corresponding cost_pair, otherwise just return BEST_CP. */
6397 static struct cost_pair
*
6398 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6399 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6400 struct cost_pair
*best_cp
)
6402 struct iv_cand
*cand
;
6403 struct cost_pair
*cp
;
6405 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6406 if (cand_idx
== old_cand
->id
)
6409 cand
= iv_cand (data
, cand_idx
);
6410 cp
= get_use_iv_cost (data
, use
, cand
);
6411 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6417 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6418 which are used by more than one iv uses. For each of those candidates,
6419 this function tries to represent iv uses under that candidate using
6420 other ones with lower local cost, then tries to prune the new set.
6421 If the new set has lower cost, It returns the new cost after recording
6422 candidate replacement in list DELTA. */
6425 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6426 struct iv_ca_delta
**delta
)
6428 bitmap_iterator bi
, bj
;
6429 unsigned int i
, j
, k
;
6431 struct iv_cand
*cand
;
6432 comp_cost orig_cost
, acost
;
6433 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6434 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6437 orig_cost
= iv_ca_cost (ivs
);
6439 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6441 if (ivs
->n_cand_uses
[i
] == 1
6442 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6445 cand
= iv_cand (data
, i
);
6448 /* Represent uses under current candidate using other ones with
6449 lower local cost. */
6450 for (j
= 0; j
< ivs
->upto
; j
++)
6452 use
= iv_use (data
, j
);
6453 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6455 if (old_cp
->cand
!= cand
)
6459 if (data
->consider_all_candidates
)
6460 for (k
= 0; k
< n_iv_cands (data
); k
++)
6461 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6462 old_cp
->cand
, best_cp
);
6464 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6465 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6466 old_cp
->cand
, best_cp
);
6468 if (best_cp
== old_cp
)
6471 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6473 /* No need for further prune. */
6477 /* Prune the new candidate set. */
6478 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6479 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6480 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6481 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6483 if (compare_costs (acost
, orig_cost
) < 0)
6489 iv_ca_delta_free (&act_delta
);
6495 /* Tries to extend the sets IVS in the best possible way in order
6496 to express the USE. If ORIGINALP is true, prefer candidates from
6497 the original set of IVs, otherwise favor important candidates not
6498 based on any memory object. */
6501 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6502 struct iv_use
*use
, bool originalp
)
6504 comp_cost best_cost
, act_cost
;
6507 struct iv_cand
*cand
;
6508 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6509 struct cost_pair
*cp
;
6511 iv_ca_add_use (data
, ivs
, use
);
6512 best_cost
= iv_ca_cost (ivs
);
6513 cp
= iv_ca_cand_for_use (ivs
, use
);
6516 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6517 iv_ca_set_no_cp (data
, ivs
, use
);
6520 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6521 first try important candidates not based on any memory object. Only if
6522 this fails, try the specific ones. Rationale -- in loops with many
6523 variables the best choice often is to use just one generic biv. If we
6524 added here many ivs specific to the uses, the optimization algorithm later
6525 would be likely to get stuck in a local minimum, thus causing us to create
6526 too many ivs. The approach from few ivs to more seems more likely to be
6527 successful -- starting from few ivs, replacing an expensive use by a
6528 specific iv should always be a win. */
6529 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6531 cand
= iv_cand (data
, i
);
6533 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6536 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6539 if (iv_ca_cand_used_p (ivs
, cand
))
6542 cp
= get_use_iv_cost (data
, use
, cand
);
6546 iv_ca_set_cp (data
, ivs
, use
, cp
);
6547 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6549 iv_ca_set_no_cp (data
, ivs
, use
);
6550 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6552 if (compare_costs (act_cost
, best_cost
) < 0)
6554 best_cost
= act_cost
;
6556 iv_ca_delta_free (&best_delta
);
6557 best_delta
= act_delta
;
6560 iv_ca_delta_free (&act_delta
);
6563 if (infinite_cost_p (best_cost
))
6565 for (i
= 0; i
< use
->n_map_members
; i
++)
6567 cp
= use
->cost_map
+ i
;
6572 /* Already tried this. */
6573 if (cand
->important
)
6575 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6577 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6581 if (iv_ca_cand_used_p (ivs
, cand
))
6585 iv_ca_set_cp (data
, ivs
, use
, cp
);
6586 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6587 iv_ca_set_no_cp (data
, ivs
, use
);
6588 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6591 if (compare_costs (act_cost
, best_cost
) < 0)
6593 best_cost
= act_cost
;
6596 iv_ca_delta_free (&best_delta
);
6597 best_delta
= act_delta
;
6600 iv_ca_delta_free (&act_delta
);
6604 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6605 iv_ca_delta_free (&best_delta
);
6607 return !infinite_cost_p (best_cost
);
6610 /* Finds an initial assignment of candidates to uses. */
6612 static struct iv_ca
*
6613 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6615 struct iv_ca
*ivs
= iv_ca_new (data
);
6618 for (i
= 0; i
< n_iv_uses (data
); i
++)
6619 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6628 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6629 points to a bool variable, this function tries to break local
6630 optimal fixed-point by replacing candidates in IVS if it's true. */
6633 try_improve_iv_set (struct ivopts_data
*data
,
6634 struct iv_ca
*ivs
, bool *try_replace_p
)
6637 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6638 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6639 struct iv_cand
*cand
;
6641 /* Try extending the set of induction variables by one. */
6642 for (i
= 0; i
< n_iv_cands (data
); i
++)
6644 cand
= iv_cand (data
, i
);
6646 if (iv_ca_cand_used_p (ivs
, cand
))
6649 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6653 /* If we successfully added the candidate and the set is small enough,
6654 try optimizing it by removing other candidates. */
6655 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6657 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6658 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6659 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6660 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6663 if (compare_costs (acost
, best_cost
) < 0)
6666 iv_ca_delta_free (&best_delta
);
6667 best_delta
= act_delta
;
6670 iv_ca_delta_free (&act_delta
);
6675 /* Try removing the candidates from the set instead. */
6676 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6678 if (!best_delta
&& *try_replace_p
)
6680 *try_replace_p
= false;
6681 /* So far candidate selecting algorithm tends to choose fewer IVs
6682 so that it can handle cases in which loops have many variables
6683 but the best choice is often to use only one general biv. One
6684 weakness is it can't handle opposite cases, in which different
6685 candidates should be chosen with respect to each use. To solve
6686 the problem, we replace candidates in a manner described by the
6687 comments of iv_ca_replace, thus give general algorithm a chance
6688 to break local optimal fixed-point in these cases. */
6689 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6696 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6697 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6698 iv_ca_delta_free (&best_delta
);
6702 /* Attempts to find the optimal set of induction variables. We do simple
6703 greedy heuristic -- we try to replace at most one candidate in the selected
6704 solution and remove the unused ivs while this improves the cost. */
6706 static struct iv_ca
*
6707 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6710 bool try_replace_p
= true;
6712 /* Get the initial solution. */
6713 set
= get_initial_solution (data
, originalp
);
6716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6717 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6723 fprintf (dump_file
, "Initial set of candidates:\n");
6724 iv_ca_dump (data
, dump_file
, set
);
6727 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6731 fprintf (dump_file
, "Improved to:\n");
6732 iv_ca_dump (data
, dump_file
, set
);
6739 static struct iv_ca
*
6740 find_optimal_iv_set (struct ivopts_data
*data
)
6743 struct iv_ca
*set
, *origset
;
6745 comp_cost cost
, origcost
;
6747 /* Determine the cost based on a strategy that starts with original IVs,
6748 and try again using a strategy that prefers candidates not based
6750 origset
= find_optimal_iv_set_1 (data
, true);
6751 set
= find_optimal_iv_set_1 (data
, false);
6753 if (!origset
&& !set
)
6756 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6757 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6761 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6762 origcost
.cost
, origcost
.complexity
);
6763 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6764 cost
.cost
, cost
.complexity
);
6767 /* Choose the one with the best cost. */
6768 if (compare_costs (origcost
, cost
) <= 0)
6775 iv_ca_free (&origset
);
6777 for (i
= 0; i
< n_iv_uses (data
); i
++)
6779 use
= iv_use (data
, i
);
6780 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6786 /* Creates a new induction variable corresponding to CAND. */
6789 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6791 gimple_stmt_iterator incr_pos
;
6801 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6805 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6813 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6817 /* Mark that the iv is preserved. */
6818 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6819 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6821 /* Rewrite the increment so that it uses var_before directly. */
6822 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6826 gimple_add_tmp_var (cand
->var_before
);
6828 base
= unshare_expr (cand
->iv
->base
);
6830 create_iv (base
, unshare_expr (cand
->iv
->step
),
6831 cand
->var_before
, data
->current_loop
,
6832 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6835 /* Creates new induction variables described in SET. */
6838 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6841 struct iv_cand
*cand
;
6844 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6846 cand
= iv_cand (data
, i
);
6847 create_new_iv (data
, cand
);
6850 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6852 fprintf (dump_file
, "Selected IV set for loop %d",
6853 data
->current_loop
->num
);
6854 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6855 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6856 LOCATION_LINE (data
->loop_loc
));
6857 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6858 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6860 cand
= iv_cand (data
, i
);
6861 dump_cand (dump_file
, cand
);
6863 fprintf (dump_file
, "\n");
6867 /* Rewrites USE (definition of iv used in a nonlinear expression)
6868 using candidate CAND. */
6871 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6872 struct iv_use
*use
, struct iv_cand
*cand
)
6877 gimple_stmt_iterator bsi
;
6879 /* An important special case -- if we are asked to express value of
6880 the original iv by itself, just exit; there is no need to
6881 introduce a new computation (that might also need casting the
6882 variable to unsigned and back). */
6883 if (cand
->pos
== IP_ORIGINAL
6884 && cand
->incremented_at
== use
->stmt
)
6886 enum tree_code stmt_code
;
6888 gcc_assert (is_gimple_assign (use
->stmt
));
6889 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6891 /* Check whether we may leave the computation unchanged.
6892 This is the case only if it does not rely on other
6893 computations in the loop -- otherwise, the computation
6894 we rely upon may be removed in remove_unused_ivs,
6895 thus leading to ICE. */
6896 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6897 if (stmt_code
== PLUS_EXPR
6898 || stmt_code
== MINUS_EXPR
6899 || stmt_code
== POINTER_PLUS_EXPR
)
6901 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6902 op
= gimple_assign_rhs2 (use
->stmt
);
6903 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6904 op
= gimple_assign_rhs1 (use
->stmt
);
6911 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6915 comp
= get_computation (data
->current_loop
, use
, cand
);
6916 gcc_assert (comp
!= NULL_TREE
);
6918 switch (gimple_code (use
->stmt
))
6921 tgt
= PHI_RESULT (use
->stmt
);
6923 /* If we should keep the biv, do not replace it. */
6924 if (name_info (data
, tgt
)->preserve_biv
)
6927 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6931 tgt
= gimple_assign_lhs (use
->stmt
);
6932 bsi
= gsi_for_stmt (use
->stmt
);
6939 if (!valid_gimple_rhs_p (comp
)
6940 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6941 /* We can't allow re-allocating the stmt as it might be pointed
6943 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6944 >= gimple_num_ops (gsi_stmt (bsi
)))))
6946 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6947 true, GSI_SAME_STMT
);
6948 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6950 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6951 /* As this isn't a plain copy we have to reset alignment
6953 if (SSA_NAME_PTR_INFO (comp
))
6954 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6958 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6960 ass
= gimple_build_assign (tgt
, comp
);
6961 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6963 bsi
= gsi_for_stmt (use
->stmt
);
6964 remove_phi_node (&bsi
, false);
6968 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6969 use
->stmt
= gsi_stmt (bsi
);
6973 /* Performs a peephole optimization to reorder the iv update statement with
6974 a mem ref to enable instruction combining in later phases. The mem ref uses
6975 the iv value before the update, so the reordering transformation requires
6976 adjustment of the offset. CAND is the selected IV_CAND.
6980 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6988 directly propagating t over to (1) will introduce overlapping live range
6989 thus increase register pressure. This peephole transform it into:
6993 t = MEM_REF (base, iv2, 8, 8);
7000 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7003 gimple
*iv_update
, *stmt
;
7005 gimple_stmt_iterator gsi
, gsi_iv
;
7007 if (cand
->pos
!= IP_NORMAL
)
7010 var_after
= cand
->var_after
;
7011 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7013 bb
= gimple_bb (iv_update
);
7014 gsi
= gsi_last_nondebug_bb (bb
);
7015 stmt
= gsi_stmt (gsi
);
7017 /* Only handle conditional statement for now. */
7018 if (gimple_code (stmt
) != GIMPLE_COND
)
7021 gsi_prev_nondebug (&gsi
);
7022 stmt
= gsi_stmt (gsi
);
7023 if (stmt
!= iv_update
)
7026 gsi_prev_nondebug (&gsi
);
7027 if (gsi_end_p (gsi
))
7030 stmt
= gsi_stmt (gsi
);
7031 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7034 if (stmt
!= use
->stmt
)
7037 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7040 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7042 fprintf (dump_file
, "Reordering \n");
7043 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7044 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7045 fprintf (dump_file
, "\n");
7048 gsi
= gsi_for_stmt (use
->stmt
);
7049 gsi_iv
= gsi_for_stmt (iv_update
);
7050 gsi_move_before (&gsi_iv
, &gsi
);
7052 cand
->pos
= IP_BEFORE_USE
;
7053 cand
->incremented_at
= use
->stmt
;
7056 /* Rewrites USE (address that is an iv) using candidate CAND. */
7059 rewrite_use_address_1 (struct ivopts_data
*data
,
7060 struct iv_use
*use
, struct iv_cand
*cand
)
7063 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7064 tree base_hint
= NULL_TREE
;
7068 adjust_iv_update_pos (cand
, use
);
7069 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7071 unshare_aff_combination (&aff
);
7073 /* To avoid undefined overflow problems, all IV candidates use unsigned
7074 integer types. The drawback is that this makes it impossible for
7075 create_mem_ref to distinguish an IV that is based on a memory object
7076 from one that represents simply an offset.
7078 To work around this problem, we pass a hint to create_mem_ref that
7079 indicates which variable (if any) in aff is an IV based on a memory
7080 object. Note that we only consider the candidate. If this is not
7081 based on an object, the base of the reference is in some subexpression
7082 of the use -- but these will use pointer types, so they are recognized
7083 by the create_mem_ref heuristics anyway. */
7084 if (cand
->iv
->base_object
)
7085 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7087 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7088 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7089 reference_alias_ptr_type (*use
->op_p
),
7090 iv
, base_hint
, data
->speed
);
7091 copy_ref_info (ref
, *use
->op_p
);
7095 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7096 first use of a group, rewrites sub uses in the group too. */
7099 rewrite_use_address (struct ivopts_data
*data
,
7100 struct iv_use
*use
, struct iv_cand
*cand
)
7102 struct iv_use
*next
;
7104 gcc_assert (use
->sub_id
== 0);
7105 rewrite_use_address_1 (data
, use
, cand
);
7106 update_stmt (use
->stmt
);
7108 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
7110 rewrite_use_address_1 (data
, next
, cand
);
7111 update_stmt (next
->stmt
);
7117 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7121 rewrite_use_compare (struct ivopts_data
*data
,
7122 struct iv_use
*use
, struct iv_cand
*cand
)
7124 tree comp
, *var_p
, op
, bound
;
7125 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7126 enum tree_code compare
;
7127 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
7133 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7134 tree var_type
= TREE_TYPE (var
);
7137 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7139 fprintf (dump_file
, "Replacing exit test: ");
7140 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7143 bound
= unshare_expr (fold_convert (var_type
, bound
));
7144 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7146 gsi_insert_seq_on_edge_immediate (
7147 loop_preheader_edge (data
->current_loop
),
7150 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7151 gimple_cond_set_lhs (cond_stmt
, var
);
7152 gimple_cond_set_code (cond_stmt
, compare
);
7153 gimple_cond_set_rhs (cond_stmt
, op
);
7157 /* The induction variable elimination failed; just express the original
7159 comp
= get_computation (data
->current_loop
, use
, cand
);
7160 gcc_assert (comp
!= NULL_TREE
);
7162 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7165 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7166 true, GSI_SAME_STMT
);
7169 /* Rewrites USE using candidate CAND. */
7172 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7176 case USE_NONLINEAR_EXPR
:
7177 rewrite_use_nonlinear_expr (data
, use
, cand
);
7181 rewrite_use_address (data
, use
, cand
);
7185 rewrite_use_compare (data
, use
, cand
);
7192 update_stmt (use
->stmt
);
7195 /* Rewrite the uses using the selected induction variables. */
7198 rewrite_uses (struct ivopts_data
*data
)
7201 struct iv_cand
*cand
;
7204 for (i
= 0; i
< n_iv_uses (data
); i
++)
7206 use
= iv_use (data
, i
);
7207 cand
= use
->selected
;
7210 rewrite_use (data
, use
, cand
);
7214 /* Removes the ivs that are not used after rewriting. */
7217 remove_unused_ivs (struct ivopts_data
*data
)
7221 bitmap toremove
= BITMAP_ALLOC (NULL
);
7223 /* Figure out an order in which to release SSA DEFs so that we don't
7224 release something that we'd have to propagate into a debug stmt
7226 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7228 struct version_info
*info
;
7230 info
= ver_info (data
, j
);
7232 && !integer_zerop (info
->iv
->step
)
7234 && !info
->iv
->have_use_for
7235 && !info
->preserve_biv
)
7237 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7239 tree def
= info
->iv
->ssa_name
;
7241 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7243 imm_use_iterator imm_iter
;
7244 use_operand_p use_p
;
7248 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7250 if (!gimple_debug_bind_p (stmt
))
7253 /* We just want to determine whether to do nothing
7254 (count == 0), to substitute the computed
7255 expression into a single use of the SSA DEF by
7256 itself (count == 1), or to use a debug temp
7257 because the SSA DEF is used multiple times or as
7258 part of a larger expression (count > 1). */
7260 if (gimple_debug_bind_get_value (stmt
) != def
)
7264 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7270 struct iv_use dummy_use
;
7271 struct iv_cand
*best_cand
= NULL
, *cand
;
7272 unsigned i
, best_pref
= 0, cand_pref
;
7274 memset (&dummy_use
, 0, sizeof (dummy_use
));
7275 dummy_use
.iv
= info
->iv
;
7276 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7278 cand
= iv_use (data
, i
)->selected
;
7279 if (cand
== best_cand
)
7281 cand_pref
= operand_equal_p (cand
->iv
->step
,
7285 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7286 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7289 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7291 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7294 best_pref
= cand_pref
;
7301 tree comp
= get_computation_at (data
->current_loop
,
7302 &dummy_use
, best_cand
,
7303 SSA_NAME_DEF_STMT (def
));
7309 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7310 DECL_ARTIFICIAL (vexpr
) = 1;
7311 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7312 if (SSA_NAME_VAR (def
))
7313 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7315 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7317 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7318 gimple_stmt_iterator gsi
;
7320 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7321 gsi
= gsi_after_labels (gimple_bb
7322 (SSA_NAME_DEF_STMT (def
)));
7324 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7326 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7330 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7332 if (!gimple_debug_bind_p (stmt
))
7335 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7336 SET_USE (use_p
, comp
);
7344 release_defs_bitset (toremove
);
7346 BITMAP_FREE (toremove
);
7349 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7350 for hash_map::traverse. */
7353 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7359 /* Frees data allocated by the optimization of a single loop. */
7362 free_loop_data (struct ivopts_data
*data
)
7370 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7371 delete data
->niters
;
7372 data
->niters
= NULL
;
7375 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7377 struct version_info
*info
;
7379 info
= ver_info (data
, i
);
7381 info
->has_nonlin_use
= false;
7382 info
->preserve_biv
= false;
7385 bitmap_clear (data
->relevant
);
7386 bitmap_clear (data
->important_candidates
);
7388 for (i
= 0; i
< n_iv_uses (data
); i
++)
7390 struct iv_use
*use
= iv_use (data
, i
);
7391 struct iv_use
*pre
= use
, *sub
= use
->next
;
7395 gcc_assert (sub
->related_cands
== NULL
);
7396 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7403 BITMAP_FREE (use
->related_cands
);
7404 for (j
= 0; j
< use
->n_map_members
; j
++)
7405 if (use
->cost_map
[j
].depends_on
)
7406 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7407 free (use
->cost_map
);
7410 data
->iv_uses
.truncate (0);
7412 for (i
= 0; i
< n_iv_cands (data
); i
++)
7414 struct iv_cand
*cand
= iv_cand (data
, i
);
7416 if (cand
->depends_on
)
7417 BITMAP_FREE (cand
->depends_on
);
7420 data
->iv_candidates
.truncate (0);
7422 if (data
->version_info_size
< num_ssa_names
)
7424 data
->version_info_size
= 2 * num_ssa_names
;
7425 free (data
->version_info
);
7426 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7429 data
->max_inv_id
= 0;
7431 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7432 SET_DECL_RTL (obj
, NULL_RTX
);
7434 decl_rtl_to_reset
.truncate (0);
7436 data
->inv_expr_tab
->empty ();
7437 data
->inv_expr_id
= 0;
7440 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7444 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7446 free_loop_data (data
);
7447 free (data
->version_info
);
7448 BITMAP_FREE (data
->relevant
);
7449 BITMAP_FREE (data
->important_candidates
);
7451 decl_rtl_to_reset
.release ();
7452 data
->iv_uses
.release ();
7453 data
->iv_candidates
.release ();
7454 delete data
->inv_expr_tab
;
7455 data
->inv_expr_tab
= NULL
;
7456 free_affine_expand_cache (&data
->name_expansion_cache
);
7457 obstack_free (&data
->iv_obstack
, NULL
);
7460 /* Returns true if the loop body BODY includes any function calls. */
7463 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7465 gimple_stmt_iterator gsi
;
7468 for (i
= 0; i
< num_nodes
; i
++)
7469 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7471 gimple
*stmt
= gsi_stmt (gsi
);
7472 if (is_gimple_call (stmt
)
7473 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7479 /* Optimizes the LOOP. Returns true if anything changed. */
7482 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7484 bool changed
= false;
7485 struct iv_ca
*iv_ca
;
7486 edge exit
= single_dom_exit (loop
);
7489 gcc_assert (!data
->niters
);
7490 data
->current_loop
= loop
;
7491 data
->loop_loc
= find_loop_location (loop
);
7492 data
->speed
= optimize_loop_for_speed_p (loop
);
7494 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7496 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7497 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7498 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7499 LOCATION_LINE (data
->loop_loc
));
7500 fprintf (dump_file
, "\n");
7504 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7505 exit
->src
->index
, exit
->dest
->index
);
7506 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7507 fprintf (dump_file
, "\n");
7510 fprintf (dump_file
, "\n");
7513 body
= get_loop_body (loop
);
7514 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7515 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7518 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7520 /* For each ssa name determines whether it behaves as an induction variable
7522 if (!find_induction_variables (data
))
7525 /* Finds interesting uses (item 1). */
7526 find_interesting_uses (data
);
7527 group_address_uses (data
);
7528 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7531 /* Finds candidates for the induction variables (item 2). */
7532 find_iv_candidates (data
);
7534 /* Calculates the costs (item 3, part 1). */
7535 determine_iv_costs (data
);
7536 determine_use_iv_costs (data
);
7537 determine_set_costs (data
);
7539 /* Find the optimal set of induction variables (item 3, part 2). */
7540 iv_ca
= find_optimal_iv_set (data
);
7545 /* Create the new induction variables (item 4, part 1). */
7546 create_new_ivs (data
, iv_ca
);
7547 iv_ca_free (&iv_ca
);
7549 /* Rewrite the uses (item 4, part 2). */
7550 rewrite_uses (data
);
7552 /* Remove the ivs that are unused after rewriting. */
7553 remove_unused_ivs (data
);
7555 /* We have changed the structure of induction variables; it might happen
7556 that definitions in the scev database refer to some of them that were
7561 free_loop_data (data
);
7566 /* Main entry point. Optimizes induction variables in loops. */
7569 tree_ssa_iv_optimize (void)
7572 struct ivopts_data data
;
7574 tree_ssa_iv_optimize_init (&data
);
7576 /* Optimize the loops starting with the innermost ones. */
7577 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7579 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7580 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7582 tree_ssa_iv_optimize_loop (&data
, loop
);
7585 tree_ssa_iv_optimize_finalize (&data
);