1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
72 #include "fold-const.h"
73 #include "stor-layout.h"
76 #include "hard-reg-set.h"
78 #include "dominance.h"
80 #include "basic-block.h"
81 #include "gimple-pretty-print.h"
82 #include "tree-ssa-alias.h"
83 #include "internal-fn.h"
85 #include "gimple-expr.h"
89 #include "gimple-iterator.h"
90 #include "gimplify-me.h"
91 #include "gimple-ssa.h"
92 #include "plugin-api.h"
96 #include "tree-phinodes.h"
97 #include "ssa-iterators.h"
98 #include "stringpool.h"
99 #include "tree-ssanames.h"
100 #include "tree-ssa-loop-ivopts.h"
101 #include "tree-ssa-loop-manip.h"
102 #include "tree-ssa-loop-niter.h"
103 #include "tree-ssa-loop.h"
106 #include "insn-config.h"
111 #include "emit-rtl.h"
115 #include "tree-dfa.h"
116 #include "tree-ssa.h"
118 #include "tree-pass.h"
119 #include "tree-chrec.h"
120 #include "tree-scalar-evolution.h"
122 #include "langhooks.h"
123 #include "tree-affine.h"
125 #include "tree-inline.h"
126 #include "tree-ssa-propagate.h"
127 #include "tree-ssa-address.h"
128 #include "builtins.h"
129 #include "tree-vectorizer.h"
131 /* FIXME: Expressions are expanded to RTL in this pass to determine the
132 cost of different addressing modes. This should be moved to a TBD
133 interface between the GIMPLE and RTL worlds. */
136 /* The infinite cost. */
137 #define INFTY 10000000
139 #define AVG_LOOP_NITER(LOOP) 5
141 /* Returns the expected number of loop iterations for LOOP.
142 The average trip count is computed from profile data if it
145 static inline HOST_WIDE_INT
146 avg_loop_niter (struct loop
*loop
)
148 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
150 return AVG_LOOP_NITER (loop
);
155 /* Representation of the induction variable. */
158 tree base
; /* Initial value of the iv. */
159 tree base_object
; /* A memory object to that the induction variable points. */
160 tree step
; /* Step of the iv (constant only). */
161 tree ssa_name
; /* The ssa name with the value. */
162 unsigned use_id
; /* The identifier in the use if it is the case. */
163 bool biv_p
; /* Is it a biv? */
164 bool have_use_for
; /* Do we already have a use for it? */
165 bool no_overflow
; /* True if the iv doesn't overflow. */
168 /* Per-ssa version information (induction variable descriptions, etc.). */
171 tree name
; /* The ssa name. */
172 struct iv
*iv
; /* Induction variable description. */
173 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
174 an expression that is not an induction variable. */
175 bool preserve_biv
; /* For the original biv, whether to preserve it. */
176 unsigned inv_id
; /* Id of an invariant. */
182 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
183 USE_ADDRESS
, /* Use in an address. */
184 USE_COMPARE
/* Use is a compare. */
187 /* Cost of a computation. */
190 int cost
; /* The runtime cost. */
191 unsigned complexity
; /* The estimate of the complexity of the code for
192 the computation (in no concrete units --
193 complexity field should be larger for more
194 complex expressions and addressing modes). */
197 static const comp_cost no_cost
= {0, 0};
198 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
200 /* The candidate - cost pair. */
203 struct iv_cand
*cand
; /* The candidate. */
204 comp_cost cost
; /* The cost. */
205 bitmap depends_on
; /* The list of invariants that have to be
207 tree value
; /* For final value elimination, the expression for
208 the final value of the iv. For iv elimination,
209 the new bound to compare with. */
210 enum tree_code comp
; /* For iv elimination, the comparison. */
211 int inv_expr_id
; /* Loop invariant expression id. */
217 unsigned id
; /* The id of the use. */
218 unsigned sub_id
; /* The id of the sub use. */
219 enum use_type type
; /* Type of the use. */
220 struct iv
*iv
; /* The induction variable it is based on. */
221 gimple stmt
; /* Statement in that it occurs. */
222 tree
*op_p
; /* The place where it occurs. */
223 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
226 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
227 struct cost_pair
*cost_map
;
228 /* The costs wrto the iv candidates. */
230 struct iv_cand
*selected
;
231 /* The selected candidate. */
233 struct iv_use
*next
; /* The next sub use. */
234 tree addr_base
; /* Base address with const offset stripped. */
235 unsigned HOST_WIDE_INT addr_offset
;
236 /* Const offset stripped from base address. */
239 /* The position where the iv is computed. */
242 IP_NORMAL
, /* At the end, just before the exit condition. */
243 IP_END
, /* At the end of the latch block. */
244 IP_BEFORE_USE
, /* Immediately before a specific use. */
245 IP_AFTER_USE
, /* Immediately after a specific use. */
246 IP_ORIGINAL
/* The original biv. */
249 /* The induction variable candidate. */
252 unsigned id
; /* The number of the candidate. */
253 bool important
; /* Whether this is an "important" candidate, i.e. such
254 that it should be considered by all uses. */
255 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
256 gimple incremented_at
;/* For original biv, the statement where it is
258 tree var_before
; /* The variable used for it before increment. */
259 tree var_after
; /* The variable used for it after increment. */
260 struct iv
*iv
; /* The value of the candidate. NULL for
261 "pseudocandidate" used to indicate the possibility
262 to replace the final value of an iv by direct
263 computation of the value. */
264 unsigned cost
; /* Cost of the candidate. */
265 unsigned cost_step
; /* Cost of the candidate's increment operation. */
266 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
267 where it is incremented. */
268 bitmap depends_on
; /* The list of invariants that are used in step of the
272 /* Loop invariant expression hashtable entry. */
273 struct iv_inv_expr_ent
280 /* The data used by the induction variable optimizations. */
282 typedef struct iv_use
*iv_use_p
;
284 typedef struct iv_cand
*iv_cand_p
;
286 /* Hashtable helpers. */
288 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
290 typedef iv_inv_expr_ent
*value_type
;
291 typedef iv_inv_expr_ent
*compare_type
;
292 static inline hashval_t
hash (const iv_inv_expr_ent
*);
293 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
296 /* Hash function for loop invariant expressions. */
299 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
304 /* Hash table equality function for expressions. */
307 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
308 const iv_inv_expr_ent
*expr2
)
310 return expr1
->hash
== expr2
->hash
311 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
316 /* The currently optimized loop. */
317 struct loop
*current_loop
;
318 source_location loop_loc
;
320 /* Numbers of iterations for all exits of the current loop. */
321 hash_map
<edge
, tree_niter_desc
*> *niters
;
323 /* Number of registers used in it. */
326 /* The size of version_info array allocated. */
327 unsigned version_info_size
;
329 /* The array of information for the ssa names. */
330 struct version_info
*version_info
;
332 /* The hashtable of loop invariant expressions created
334 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
336 /* Loop invariant expression id. */
339 /* The bitmap of indices in version_info whose value was changed. */
342 /* The uses of induction variables. */
343 vec
<iv_use_p
> iv_uses
;
345 /* The candidates. */
346 vec
<iv_cand_p
> iv_candidates
;
348 /* A bitmap of important candidates. */
349 bitmap important_candidates
;
351 /* Cache used by tree_to_aff_combination_expand. */
352 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
354 /* The maximum invariant id. */
357 /* Whether to consider just related and important candidates when replacing a
359 bool consider_all_candidates
;
361 /* Are we optimizing for speed? */
364 /* Whether the loop body includes any function calls. */
365 bool body_includes_call
;
367 /* Whether the loop body can only be exited via single exit. */
368 bool loop_single_exit_p
;
371 /* An assignment of iv candidates to uses. */
375 /* The number of uses covered by the assignment. */
378 /* Number of uses that cannot be expressed by the candidates in the set. */
381 /* Candidate assigned to a use, together with the related costs. */
382 struct cost_pair
**cand_for_use
;
384 /* Number of times each candidate is used. */
385 unsigned *n_cand_uses
;
387 /* The candidates used. */
390 /* The number of candidates in the set. */
393 /* Total number of registers needed. */
396 /* Total cost of expressing uses. */
397 comp_cost cand_use_cost
;
399 /* Total cost of candidates. */
402 /* Number of times each invariant is used. */
403 unsigned *n_invariant_uses
;
405 /* The array holding the number of uses of each loop
406 invariant expressions created by ivopt. */
407 unsigned *used_inv_expr
;
409 /* The number of created loop invariants. */
410 unsigned num_used_inv_expr
;
412 /* Total cost of the assignment. */
416 /* Difference of two iv candidate assignments. */
423 /* An old assignment (for rollback purposes). */
424 struct cost_pair
*old_cp
;
426 /* A new assignment. */
427 struct cost_pair
*new_cp
;
429 /* Next change in the list. */
430 struct iv_ca_delta
*next_change
;
433 /* Bound on number of candidates below that all candidates are considered. */
435 #define CONSIDER_ALL_CANDIDATES_BOUND \
436 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
438 /* If there are more iv occurrences, we just give up (it is quite unlikely that
439 optimizing such a loop would help, and it would take ages). */
441 #define MAX_CONSIDERED_USES \
442 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
444 /* If there are at most this number of ivs in the set, try removing unnecessary
445 ivs from the set always. */
447 #define ALWAYS_PRUNE_CAND_SET_BOUND \
448 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
450 /* The list of trees for that the decl_rtl field must be reset is stored
453 static vec
<tree
> decl_rtl_to_reset
;
455 static comp_cost
force_expr_to_var_cost (tree
, bool);
457 /* Number of uses recorded in DATA. */
459 static inline unsigned
460 n_iv_uses (struct ivopts_data
*data
)
462 return data
->iv_uses
.length ();
465 /* Ith use recorded in DATA. */
467 static inline struct iv_use
*
468 iv_use (struct ivopts_data
*data
, unsigned i
)
470 return data
->iv_uses
[i
];
473 /* Number of candidates recorded in DATA. */
475 static inline unsigned
476 n_iv_cands (struct ivopts_data
*data
)
478 return data
->iv_candidates
.length ();
481 /* Ith candidate recorded in DATA. */
483 static inline struct iv_cand
*
484 iv_cand (struct ivopts_data
*data
, unsigned i
)
486 return data
->iv_candidates
[i
];
489 /* The single loop exit if it dominates the latch, NULL otherwise. */
492 single_dom_exit (struct loop
*loop
)
494 edge exit
= single_exit (loop
);
499 if (!just_once_each_iteration_p (loop
, exit
->src
))
505 /* Dumps information about the induction variable IV to FILE. */
508 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
510 if (iv
->ssa_name
&& dump_name
)
512 fprintf (file
, "ssa name ");
513 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
514 fprintf (file
, "\n");
517 fprintf (file
, " type ");
518 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
519 fprintf (file
, "\n");
523 fprintf (file
, " base ");
524 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
525 fprintf (file
, "\n");
527 fprintf (file
, " step ");
528 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
529 fprintf (file
, "\n");
533 fprintf (file
, " invariant ");
534 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
535 fprintf (file
, "\n");
540 fprintf (file
, " base object ");
541 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
542 fprintf (file
, "\n");
546 fprintf (file
, " is a biv\n");
549 /* Dumps information about the USE to FILE. */
552 dump_use (FILE *file
, struct iv_use
*use
)
554 fprintf (file
, "use %d", use
->id
);
556 fprintf (file
, ".%d", use
->sub_id
);
558 fprintf (file
, "\n");
562 case USE_NONLINEAR_EXPR
:
563 fprintf (file
, " generic\n");
567 fprintf (file
, " address\n");
571 fprintf (file
, " compare\n");
578 fprintf (file
, " in statement ");
579 print_gimple_stmt (file
, use
->stmt
, 0, 0);
580 fprintf (file
, "\n");
582 fprintf (file
, " at position ");
584 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
585 fprintf (file
, "\n");
587 dump_iv (file
, use
->iv
, false);
589 if (use
->related_cands
)
591 fprintf (file
, " related candidates ");
592 dump_bitmap (file
, use
->related_cands
);
596 /* Dumps information about the uses to FILE. */
599 dump_uses (FILE *file
, struct ivopts_data
*data
)
604 for (i
= 0; i
< n_iv_uses (data
); i
++)
606 use
= iv_use (data
, i
);
609 dump_use (file
, use
);
613 fprintf (file
, "\n");
617 /* Dumps information about induction variable candidate CAND to FILE. */
620 dump_cand (FILE *file
, struct iv_cand
*cand
)
622 struct iv
*iv
= cand
->iv
;
624 fprintf (file
, "candidate %d%s\n",
625 cand
->id
, cand
->important
? " (important)" : "");
627 if (cand
->depends_on
)
629 fprintf (file
, " depends on ");
630 dump_bitmap (file
, cand
->depends_on
);
635 fprintf (file
, " final value replacement\n");
639 if (cand
->var_before
)
641 fprintf (file
, " var_before ");
642 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
643 fprintf (file
, "\n");
647 fprintf (file
, " var_after ");
648 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
649 fprintf (file
, "\n");
655 fprintf (file
, " incremented before exit test\n");
659 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
663 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
667 fprintf (file
, " incremented at end\n");
671 fprintf (file
, " original biv\n");
675 dump_iv (file
, iv
, false);
678 /* Returns the info for ssa version VER. */
680 static inline struct version_info
*
681 ver_info (struct ivopts_data
*data
, unsigned ver
)
683 return data
->version_info
+ ver
;
686 /* Returns the info for ssa name NAME. */
688 static inline struct version_info
*
689 name_info (struct ivopts_data
*data
, tree name
)
691 return ver_info (data
, SSA_NAME_VERSION (name
));
694 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
698 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
700 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
704 if (sbb
== loop
->latch
)
710 return stmt
== last_stmt (bb
);
713 /* Returns true if STMT if after the place where the original induction
714 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
715 if the positions are identical. */
718 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
720 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
721 basic_block stmt_bb
= gimple_bb (stmt
);
723 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
726 if (stmt_bb
!= cand_bb
)
730 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
732 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
735 /* Returns true if STMT if after the place where the induction variable
736 CAND is incremented in LOOP. */
739 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
747 return stmt_after_ip_normal_pos (loop
, stmt
);
751 return stmt_after_inc_pos (cand
, stmt
, false);
754 return stmt_after_inc_pos (cand
, stmt
, true);
761 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
764 abnormal_ssa_name_p (tree exp
)
769 if (TREE_CODE (exp
) != SSA_NAME
)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
775 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
776 abnormal phi node. Callback for for_each_index. */
779 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
780 void *data ATTRIBUTE_UNUSED
)
782 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
784 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
786 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
790 return !abnormal_ssa_name_p (*index
);
793 /* Returns true if EXPR contains a ssa name that occurs in an
794 abnormal phi node. */
797 contains_abnormal_ssa_name_p (tree expr
)
800 enum tree_code_class codeclass
;
805 code
= TREE_CODE (expr
);
806 codeclass
= TREE_CODE_CLASS (code
);
808 if (code
== SSA_NAME
)
809 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
811 if (code
== INTEGER_CST
812 || is_gimple_min_invariant (expr
))
815 if (code
== ADDR_EXPR
)
816 return !for_each_index (&TREE_OPERAND (expr
, 0),
817 idx_contains_abnormal_ssa_name_p
,
820 if (code
== COND_EXPR
)
821 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
822 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
823 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
829 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
834 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
846 /* Returns the structure describing number of iterations determined from
847 EXIT of DATA->current_loop, or NULL if something goes wrong. */
849 static struct tree_niter_desc
*
850 niter_for_exit (struct ivopts_data
*data
, edge exit
)
852 struct tree_niter_desc
*desc
;
853 tree_niter_desc
**slot
;
857 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
861 slot
= data
->niters
->get (exit
);
865 /* Try to determine number of iterations. We cannot safely work with ssa
866 names that appear in phi nodes on abnormal edges, so that we do not
867 create overlapping life ranges for them (PR 27283). */
868 desc
= XNEW (struct tree_niter_desc
);
869 if (!number_of_iterations_exit (data
->current_loop
,
871 || contains_abnormal_ssa_name_p (desc
->niter
))
876 data
->niters
->put (exit
, desc
);
884 /* Returns the structure describing number of iterations determined from
885 single dominating exit of DATA->current_loop, or NULL if something
888 static struct tree_niter_desc
*
889 niter_for_single_dom_exit (struct ivopts_data
*data
)
891 edge exit
= single_dom_exit (data
->current_loop
);
896 return niter_for_exit (data
, exit
);
899 /* Initializes data structures used by the iv optimization pass, stored
903 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
905 data
->version_info_size
= 2 * num_ssa_names
;
906 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
907 data
->relevant
= BITMAP_ALLOC (NULL
);
908 data
->important_candidates
= BITMAP_ALLOC (NULL
);
909 data
->max_inv_id
= 0;
911 data
->iv_uses
.create (20);
912 data
->iv_candidates
.create (20);
913 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
914 data
->inv_expr_id
= 0;
915 data
->name_expansion_cache
= NULL
;
916 decl_rtl_to_reset
.create (20);
919 /* Returns a memory object to that EXPR points. In case we are able to
920 determine that it does not point to any such object, NULL is returned. */
923 determine_base_object (tree expr
)
925 enum tree_code code
= TREE_CODE (expr
);
928 /* If this is a pointer casted to any type, we need to determine
929 the base object for the pointer; so handle conversions before
930 throwing away non-pointer expressions. */
931 if (CONVERT_EXPR_P (expr
))
932 return determine_base_object (TREE_OPERAND (expr
, 0));
934 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
943 obj
= TREE_OPERAND (expr
, 0);
944 base
= get_base_address (obj
);
949 if (TREE_CODE (base
) == MEM_REF
)
950 return determine_base_object (TREE_OPERAND (base
, 0));
952 return fold_convert (ptr_type_node
,
953 build_fold_addr_expr (base
));
955 case POINTER_PLUS_EXPR
:
956 return determine_base_object (TREE_OPERAND (expr
, 0));
960 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
964 return fold_convert (ptr_type_node
, expr
);
968 /* Return true if address expression with non-DECL_P operand appears
972 contain_complex_addr_expr (tree expr
)
977 switch (TREE_CODE (expr
))
979 case POINTER_PLUS_EXPR
:
982 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
983 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
987 return (!DECL_P (TREE_OPERAND (expr
, 0)));
996 /* Allocates an induction variable with given initial value BASE and step STEP
997 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1000 alloc_iv (tree base
, tree step
, bool no_overflow
= false)
1003 struct iv
*iv
= XCNEW (struct iv
);
1004 gcc_assert (step
!= NULL_TREE
);
1006 /* Lower address expression in base except ones with DECL_P as operand.
1008 1) More accurate cost can be computed for address expressions;
1009 2) Duplicate candidates won't be created for bases in different
1010 forms, like &a[0] and &a. */
1012 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1013 || contain_complex_addr_expr (expr
))
1016 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1017 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1021 iv
->base_object
= determine_base_object (base
);
1024 iv
->have_use_for
= false;
1026 iv
->ssa_name
= NULL_TREE
;
1027 iv
->no_overflow
= no_overflow
;
1032 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1033 doesn't overflow. */
1036 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1039 struct version_info
*info
= name_info (data
, iv
);
1041 gcc_assert (!info
->iv
);
1043 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1044 info
->iv
= alloc_iv (base
, step
, no_overflow
);
1045 info
->iv
->ssa_name
= iv
;
1048 /* Finds induction variable declaration for VAR. */
1051 get_iv (struct ivopts_data
*data
, tree var
)
1054 tree type
= TREE_TYPE (var
);
1056 if (!POINTER_TYPE_P (type
)
1057 && !INTEGRAL_TYPE_P (type
))
1060 if (!name_info (data
, var
)->iv
)
1062 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1065 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1066 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1069 return name_info (data
, var
)->iv
;
1072 /* Return the first non-invariant ssa var found in EXPR. */
1075 extract_single_var_from_expr (tree expr
)
1079 enum tree_code code
;
1081 if (!expr
|| is_gimple_min_invariant (expr
))
1084 code
= TREE_CODE (expr
);
1085 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1087 n
= TREE_OPERAND_LENGTH (expr
);
1088 for (i
= 0; i
< n
; i
++)
1090 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1096 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1099 /* Finds basic ivs. */
1102 find_bivs (struct ivopts_data
*data
)
1106 tree step
, type
, base
, stop
;
1108 struct loop
*loop
= data
->current_loop
;
1111 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1115 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1118 if (virtual_operand_p (PHI_RESULT (phi
)))
1121 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1124 if (integer_zerop (iv
.step
))
1128 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1129 /* Stop expanding iv base at the first ssa var referred by iv step.
1130 Ideally we should stop at any ssa var, because that's expensive
1131 and unusual to happen, we just do it on the first one.
1133 See PR64705 for the rationale. */
1134 stop
= extract_single_var_from_expr (step
);
1135 base
= expand_simple_operations (base
, stop
);
1136 if (contains_abnormal_ssa_name_p (base
)
1137 || contains_abnormal_ssa_name_p (step
))
1140 type
= TREE_TYPE (PHI_RESULT (phi
));
1141 base
= fold_convert (type
, base
);
1144 if (POINTER_TYPE_P (type
))
1145 step
= convert_to_ptrofftype (step
);
1147 step
= fold_convert (type
, step
);
1150 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1157 /* Marks basic ivs. */
1160 mark_bivs (struct ivopts_data
*data
)
1165 struct iv
*iv
, *incr_iv
;
1166 struct loop
*loop
= data
->current_loop
;
1167 basic_block incr_bb
;
1170 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1174 iv
= get_iv (data
, PHI_RESULT (phi
));
1178 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1179 def
= SSA_NAME_DEF_STMT (var
);
1180 /* Don't mark iv peeled from other one as biv. */
1182 && gimple_code (def
) == GIMPLE_PHI
1183 && gimple_bb (def
) == loop
->header
)
1186 incr_iv
= get_iv (data
, var
);
1190 /* If the increment is in the subloop, ignore it. */
1191 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1192 if (incr_bb
->loop_father
!= data
->current_loop
1193 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1197 incr_iv
->biv_p
= true;
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202 parameters to IV. */
1205 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1208 struct loop
*loop
= data
->current_loop
;
1210 iv
->base
= NULL_TREE
;
1211 iv
->step
= NULL_TREE
;
1213 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1216 lhs
= gimple_assign_lhs (stmt
);
1217 if (TREE_CODE (lhs
) != SSA_NAME
)
1220 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1223 /* Stop expanding iv base at the first ssa var referred by iv step.
1224 Ideally we should stop at any ssa var, because that's expensive
1225 and unusual to happen, we just do it on the first one.
1227 See PR64705 for the rationale. */
1228 stop
= extract_single_var_from_expr (iv
->step
);
1229 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1230 if (contains_abnormal_ssa_name_p (iv
->base
)
1231 || contains_abnormal_ssa_name_p (iv
->step
))
1234 /* If STMT could throw, then do not consider STMT as defining a GIV.
1235 While this will suppress optimizations, we can not safely delete this
1236 GIV and associated statements, even if it appears it is not used. */
1237 if (stmt_could_throw_p (stmt
))
1243 /* Finds general ivs in statement STMT. */
1246 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1250 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1253 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1256 /* Finds general ivs in basic block BB. */
1259 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1261 gimple_stmt_iterator bsi
;
1263 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1264 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1267 /* Finds general ivs. */
1270 find_givs (struct ivopts_data
*data
)
1272 struct loop
*loop
= data
->current_loop
;
1273 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1276 for (i
= 0; i
< loop
->num_nodes
; i
++)
1277 find_givs_in_bb (data
, body
[i
]);
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282 variable and if so, its initial value and step. */
1285 find_induction_variables (struct ivopts_data
*data
)
1290 if (!find_bivs (data
))
1296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1298 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1302 fprintf (dump_file
, " number of iterations ");
1303 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1304 if (!integer_zerop (niter
->may_be_zero
))
1306 fprintf (dump_file
, "; zero if ");
1307 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1309 fprintf (dump_file
, "\n\n");
1312 fprintf (dump_file
, "Induction variables:\n\n");
1314 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1316 if (ver_info (data
, i
)->iv
)
1317 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326 is the const offset stripped from IV base. For uses of other types,
1327 ADDR_BASE and ADDR_OFFSET are zero by default. */
1329 static struct iv_use
*
1330 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1331 gimple stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1332 unsigned HOST_WIDE_INT addr_offset
= 0)
1334 struct iv_use
*use
= XCNEW (struct iv_use
);
1336 use
->id
= n_iv_uses (data
);
1338 use
->type
= use_type
;
1342 use
->related_cands
= BITMAP_ALLOC (NULL
);
1344 use
->addr_base
= addr_base
;
1345 use
->addr_offset
= addr_offset
;
1347 data
->iv_uses
.safe_push (use
);
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353 The sub use is recorded under the one whose use id is ID_GROUP. */
1355 static struct iv_use
*
1356 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1357 struct iv
*iv
, gimple stmt
, enum use_type use_type
,
1358 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1359 unsigned int id_group
)
1361 struct iv_use
*use
= XCNEW (struct iv_use
);
1362 struct iv_use
*group
= iv_use (data
, id_group
);
1364 use
->id
= group
->id
;
1366 use
->type
= use_type
;
1370 use
->related_cands
= NULL
;
1371 use
->addr_base
= addr_base
;
1372 use
->addr_offset
= addr_offset
;
1374 /* Sub use list is maintained in offset ascending order. */
1375 if (addr_offset
<= group
->addr_offset
)
1377 use
->related_cands
= group
->related_cands
;
1378 group
->related_cands
= NULL
;
1380 data
->iv_uses
[id_group
] = use
;
1388 group
= group
->next
;
1390 while (group
&& addr_offset
> group
->addr_offset
);
1391 use
->next
= pre
->next
;
1395 /* To avoid showing ssa name in the dumps, if it was not reset by the
1397 iv
->ssa_name
= NULL_TREE
;
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403 NONLINEAR_USE is true if the invariant is used in a way we do not
1404 handle specially. */
1407 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1410 struct version_info
*info
;
1412 if (TREE_CODE (op
) != SSA_NAME
1413 || virtual_operand_p (op
))
1416 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1418 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1421 info
= name_info (data
, op
);
1423 info
->has_nonlin_use
|= nonlinear_use
;
1425 info
->inv_id
= ++data
->max_inv_id
;
1426 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1429 /* Checks whether the use OP is interesting and if so, records it. */
1431 static struct iv_use
*
1432 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1439 if (TREE_CODE (op
) != SSA_NAME
)
1442 iv
= get_iv (data
, op
);
1446 if (iv
->have_use_for
)
1448 use
= iv_use (data
, iv
->use_id
);
1450 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1454 if (integer_zerop (iv
->step
))
1456 record_invariant (data
, op
, true);
1459 iv
->have_use_for
= true;
1461 civ
= XNEW (struct iv
);
1464 stmt
= SSA_NAME_DEF_STMT (op
);
1465 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1466 || is_gimple_assign (stmt
));
1468 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1469 iv
->use_id
= use
->id
;
1474 /* Given a condition in statement STMT, checks whether it is a compare
1475 of an induction variable and an invariant. If this is the case,
1476 CONTROL_VAR is set to location of the iv, BOUND to the location of
1477 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1478 induction variable descriptions, and true is returned. If this is not
1479 the case, CONTROL_VAR and BOUND are set to the arguments of the
1480 condition and false is returned. */
1483 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1484 tree
**control_var
, tree
**bound
,
1485 struct iv
**iv_var
, struct iv
**iv_bound
)
1487 /* The objects returned when COND has constant operands. */
1488 static struct iv const_iv
;
1490 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1491 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1494 if (gimple_code (stmt
) == GIMPLE_COND
)
1496 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1497 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1498 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1502 op0
= gimple_assign_rhs1_ptr (stmt
);
1503 op1
= gimple_assign_rhs2_ptr (stmt
);
1506 zero
= integer_zero_node
;
1507 const_iv
.step
= integer_zero_node
;
1509 if (TREE_CODE (*op0
) == SSA_NAME
)
1510 iv0
= get_iv (data
, *op0
);
1511 if (TREE_CODE (*op1
) == SSA_NAME
)
1512 iv1
= get_iv (data
, *op1
);
1514 /* Exactly one of the compared values must be an iv, and the other one must
1519 if (integer_zerop (iv0
->step
))
1521 /* Control variable may be on the other side. */
1522 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1523 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1525 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1540 /* Checks whether the condition in STMT is interesting and if so,
1544 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1546 tree
*var_p
, *bound_p
;
1547 struct iv
*var_iv
, *civ
;
1549 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1551 find_interesting_uses_op (data
, *var_p
);
1552 find_interesting_uses_op (data
, *bound_p
);
1556 civ
= XNEW (struct iv
);
1558 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1561 /* Returns the outermost loop EXPR is obviously invariant in
1562 relative to the loop LOOP, i.e. if all its operands are defined
1563 outside of the returned loop. Returns NULL if EXPR is not
1564 even obviously invariant in LOOP. */
1567 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1572 if (is_gimple_min_invariant (expr
))
1573 return current_loops
->tree_root
;
1575 if (TREE_CODE (expr
) == SSA_NAME
)
1577 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1580 if (flow_bb_inside_loop_p (loop
, def_bb
))
1582 return superloop_at_depth (loop
,
1583 loop_depth (def_bb
->loop_father
) + 1);
1586 return current_loops
->tree_root
;
1592 unsigned maxdepth
= 0;
1593 len
= TREE_OPERAND_LENGTH (expr
);
1594 for (i
= 0; i
< len
; i
++)
1596 struct loop
*ivloop
;
1597 if (!TREE_OPERAND (expr
, i
))
1600 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1603 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1606 return superloop_at_depth (loop
, maxdepth
);
1609 /* Returns true if expression EXPR is obviously invariant in LOOP,
1610 i.e. if all its operands are defined outside of the LOOP. LOOP
1611 should not be the function body. */
1614 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1619 gcc_assert (loop_depth (loop
) > 0);
1621 if (is_gimple_min_invariant (expr
))
1624 if (TREE_CODE (expr
) == SSA_NAME
)
1626 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1628 && flow_bb_inside_loop_p (loop
, def_bb
))
1637 len
= TREE_OPERAND_LENGTH (expr
);
1638 for (i
= 0; i
< len
; i
++)
1639 if (TREE_OPERAND (expr
, i
)
1640 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1646 /* Cumulates the steps of indices into DATA and replaces their values with the
1647 initial ones. Returns false when the value of the index cannot be determined.
1648 Callback for for_each_index. */
1650 struct ifs_ivopts_data
1652 struct ivopts_data
*ivopts_data
;
1658 idx_find_step (tree base
, tree
*idx
, void *data
)
1660 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1662 bool use_overflow_semantics
= false;
1663 tree step
, iv_base
, iv_step
, lbound
, off
;
1664 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1666 /* If base is a component ref, require that the offset of the reference
1668 if (TREE_CODE (base
) == COMPONENT_REF
)
1670 off
= component_ref_field_offset (base
);
1671 return expr_invariant_in_loop_p (loop
, off
);
1674 /* If base is array, first check whether we will be able to move the
1675 reference out of the loop (in order to take its address in strength
1676 reduction). In order for this to work we need both lower bound
1677 and step to be loop invariants. */
1678 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1680 /* Moreover, for a range, the size needs to be invariant as well. */
1681 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1682 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1685 step
= array_ref_element_size (base
);
1686 lbound
= array_ref_low_bound (base
);
1688 if (!expr_invariant_in_loop_p (loop
, step
)
1689 || !expr_invariant_in_loop_p (loop
, lbound
))
1693 if (TREE_CODE (*idx
) != SSA_NAME
)
1696 iv
= get_iv (dta
->ivopts_data
, *idx
);
1700 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1701 *&x[0], which is not folded and does not trigger the
1702 ARRAY_REF path below. */
1705 if (integer_zerop (iv
->step
))
1708 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1710 step
= array_ref_element_size (base
);
1712 /* We only handle addresses whose step is an integer constant. */
1713 if (TREE_CODE (step
) != INTEGER_CST
)
1717 /* The step for pointer arithmetics already is 1 byte. */
1718 step
= size_one_node
;
1722 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1723 use_overflow_semantics
= true;
1725 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1726 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1727 use_overflow_semantics
))
1729 /* The index might wrap. */
1733 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1734 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1739 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1740 object is passed to it in DATA. */
1743 idx_record_use (tree base
, tree
*idx
,
1746 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1747 find_interesting_uses_op (data
, *idx
);
1748 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1750 find_interesting_uses_op (data
, array_ref_element_size (base
));
1751 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1756 /* If we can prove that TOP = cst * BOT for some constant cst,
1757 store cst to MUL and return true. Otherwise return false.
1758 The returned value is always sign-extended, regardless of the
1759 signedness of TOP and BOT. */
1762 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1765 enum tree_code code
;
1766 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1767 widest_int res
, p0
, p1
;
1772 if (operand_equal_p (top
, bot
, 0))
1778 code
= TREE_CODE (top
);
1782 mby
= TREE_OPERAND (top
, 1);
1783 if (TREE_CODE (mby
) != INTEGER_CST
)
1786 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1789 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1794 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1795 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1798 if (code
== MINUS_EXPR
)
1800 *mul
= wi::sext (p0
+ p1
, precision
);
1804 if (TREE_CODE (bot
) != INTEGER_CST
)
1807 p0
= widest_int::from (top
, SIGNED
);
1808 p1
= widest_int::from (bot
, SIGNED
);
1811 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1819 /* Return true if memory reference REF with step STEP may be unaligned. */
1822 may_be_unaligned_p (tree ref
, tree step
)
1824 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1825 thus they are not misaligned. */
1826 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1829 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1830 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1831 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1833 unsigned HOST_WIDE_INT bitpos
;
1834 unsigned int ref_align
;
1835 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1836 if (ref_align
< align
1837 || (bitpos
% align
) != 0
1838 || (bitpos
% BITS_PER_UNIT
) != 0)
1841 unsigned int trailing_zeros
= tree_ctz (step
);
1842 if (trailing_zeros
< HOST_BITS_PER_INT
1843 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1849 /* Return true if EXPR may be non-addressable. */
1852 may_be_nonaddressable_p (tree expr
)
1854 switch (TREE_CODE (expr
))
1856 case TARGET_MEM_REF
:
1857 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1858 target, thus they are always addressable. */
1862 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1863 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1865 case VIEW_CONVERT_EXPR
:
1866 /* This kind of view-conversions may wrap non-addressable objects
1867 and make them look addressable. After some processing the
1868 non-addressability may be uncovered again, causing ADDR_EXPRs
1869 of inappropriate objects to be built. */
1870 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1871 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1874 /* ... fall through ... */
1877 case ARRAY_RANGE_REF
:
1878 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1891 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1893 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1894 If there is an existing use which has same stripped iv base and step,
1895 this function records this one as a sub use to that; otherwise records
1896 it as a normal one. */
1898 static struct iv_use
*
1899 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1900 struct iv
*iv
, gimple stmt
, enum use_type use_type
)
1905 unsigned HOST_WIDE_INT addr_offset
;
1907 /* Only support sub use for address type uses, that is, with base
1909 if (!iv
->base_object
)
1910 return record_use (data
, use_p
, iv
, stmt
, use_type
);
1912 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1913 for (i
= 0; i
< n_iv_uses (data
); i
++)
1915 use
= iv_use (data
, i
);
1916 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1919 /* Check if it has the same stripped base and step. */
1920 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1921 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1922 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1926 if (i
== n_iv_uses (data
))
1927 return record_use (data
, use_p
, iv
, stmt
,
1928 use_type
, addr_base
, addr_offset
);
1930 return record_sub_use (data
, use_p
, iv
, stmt
,
1931 use_type
, addr_base
, addr_offset
, i
);
1934 /* Finds addresses in *OP_P inside STMT. */
1937 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1939 tree base
= *op_p
, step
= size_zero_node
;
1941 struct ifs_ivopts_data ifs_ivopts_data
;
1943 /* Do not play with volatile memory references. A bit too conservative,
1944 perhaps, but safe. */
1945 if (gimple_has_volatile_ops (stmt
))
1948 /* Ignore bitfields for now. Not really something terribly complicated
1950 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1953 base
= unshare_expr (base
);
1955 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1957 tree type
= build_pointer_type (TREE_TYPE (base
));
1961 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1963 civ
= get_iv (data
, TMR_BASE (base
));
1967 TMR_BASE (base
) = civ
->base
;
1970 if (TMR_INDEX2 (base
)
1971 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1973 civ
= get_iv (data
, TMR_INDEX2 (base
));
1977 TMR_INDEX2 (base
) = civ
->base
;
1980 if (TMR_INDEX (base
)
1981 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1983 civ
= get_iv (data
, TMR_INDEX (base
));
1987 TMR_INDEX (base
) = civ
->base
;
1992 if (TMR_STEP (base
))
1993 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1995 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1999 if (integer_zerop (step
))
2001 base
= tree_mem_ref_addr (type
, base
);
2005 ifs_ivopts_data
.ivopts_data
= data
;
2006 ifs_ivopts_data
.stmt
= stmt
;
2007 ifs_ivopts_data
.step
= size_zero_node
;
2008 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2009 || integer_zerop (ifs_ivopts_data
.step
))
2011 step
= ifs_ivopts_data
.step
;
2013 /* Check that the base expression is addressable. This needs
2014 to be done after substituting bases of IVs into it. */
2015 if (may_be_nonaddressable_p (base
))
2018 /* Moreover, on strict alignment platforms, check that it is
2019 sufficiently aligned. */
2020 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2023 base
= build_fold_addr_expr (base
);
2025 /* Substituting bases of IVs into the base expression might
2026 have caused folding opportunities. */
2027 if (TREE_CODE (base
) == ADDR_EXPR
)
2029 tree
*ref
= &TREE_OPERAND (base
, 0);
2030 while (handled_component_p (*ref
))
2031 ref
= &TREE_OPERAND (*ref
, 0);
2032 if (TREE_CODE (*ref
) == MEM_REF
)
2034 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2035 TREE_OPERAND (*ref
, 0),
2036 TREE_OPERAND (*ref
, 1));
2043 civ
= alloc_iv (base
, step
);
2044 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2048 for_each_index (op_p
, idx_record_use
, data
);
2051 /* Finds and records invariants used in STMT. */
2054 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
2057 use_operand_p use_p
;
2060 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2062 op
= USE_FROM_PTR (use_p
);
2063 record_invariant (data
, op
, false);
2067 /* Finds interesting uses of induction variables in the statement STMT. */
2070 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
2073 tree op
, *lhs
, *rhs
;
2075 use_operand_p use_p
;
2076 enum tree_code code
;
2078 find_invariants_stmt (data
, stmt
);
2080 if (gimple_code (stmt
) == GIMPLE_COND
)
2082 find_interesting_uses_cond (data
, stmt
);
2086 if (is_gimple_assign (stmt
))
2088 lhs
= gimple_assign_lhs_ptr (stmt
);
2089 rhs
= gimple_assign_rhs1_ptr (stmt
);
2091 if (TREE_CODE (*lhs
) == SSA_NAME
)
2093 /* If the statement defines an induction variable, the uses are not
2094 interesting by themselves. */
2096 iv
= get_iv (data
, *lhs
);
2098 if (iv
&& !integer_zerop (iv
->step
))
2102 code
= gimple_assign_rhs_code (stmt
);
2103 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2104 && (REFERENCE_CLASS_P (*rhs
)
2105 || is_gimple_val (*rhs
)))
2107 if (REFERENCE_CLASS_P (*rhs
))
2108 find_interesting_uses_address (data
, stmt
, rhs
);
2110 find_interesting_uses_op (data
, *rhs
);
2112 if (REFERENCE_CLASS_P (*lhs
))
2113 find_interesting_uses_address (data
, stmt
, lhs
);
2116 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2118 find_interesting_uses_cond (data
, stmt
);
2122 /* TODO -- we should also handle address uses of type
2124 memory = call (whatever);
2131 if (gimple_code (stmt
) == GIMPLE_PHI
2132 && gimple_bb (stmt
) == data
->current_loop
->header
)
2134 iv
= get_iv (data
, PHI_RESULT (stmt
));
2136 if (iv
&& !integer_zerop (iv
->step
))
2140 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2142 op
= USE_FROM_PTR (use_p
);
2144 if (TREE_CODE (op
) != SSA_NAME
)
2147 iv
= get_iv (data
, op
);
2151 find_interesting_uses_op (data
, op
);
2155 /* Finds interesting uses of induction variables outside of loops
2156 on loop exit edge EXIT. */
2159 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2165 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2168 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2169 if (!virtual_operand_p (def
))
2170 find_interesting_uses_op (data
, def
);
2174 /* Finds uses of the induction variables that are interesting. */
2177 find_interesting_uses (struct ivopts_data
*data
)
2180 gimple_stmt_iterator bsi
;
2181 basic_block
*body
= get_loop_body (data
->current_loop
);
2183 struct version_info
*info
;
2186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2187 fprintf (dump_file
, "Uses:\n\n");
2189 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2194 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2195 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2196 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2197 find_interesting_uses_outside (data
, e
);
2199 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2200 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2201 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2202 if (!is_gimple_debug (gsi_stmt (bsi
)))
2203 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2210 fprintf (dump_file
, "\n");
2212 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2214 info
= ver_info (data
, i
);
2217 fprintf (dump_file
, " ");
2218 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2219 fprintf (dump_file
, " is invariant (%d)%s\n",
2220 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2224 fprintf (dump_file
, "\n");
2230 /* Compute maximum offset of [base + offset] addressing mode
2231 for memory reference represented by USE. */
2233 static HOST_WIDE_INT
2234 compute_max_addr_offset (struct iv_use
*use
)
2238 HOST_WIDE_INT i
, off
;
2239 unsigned list_index
, num
;
2241 machine_mode mem_mode
, addr_mode
;
2242 static vec
<HOST_WIDE_INT
> max_offset_list
;
2244 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2245 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2247 num
= max_offset_list
.length ();
2248 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2249 if (list_index
>= num
)
2251 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2252 for (; num
< max_offset_list
.length (); num
++)
2253 max_offset_list
[num
] = -1;
2256 off
= max_offset_list
[list_index
];
2260 addr_mode
= targetm
.addr_space
.address_mode (as
);
2261 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2262 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2264 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2265 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2266 width
= HOST_BITS_PER_WIDE_INT
- 1;
2268 for (i
= width
; i
> 0; i
--)
2270 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2271 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2272 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2275 /* For some strict-alignment targets, the offset must be naturally
2276 aligned. Try an aligned offset if mem_mode is not QImode. */
2277 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2278 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2280 off
-= GET_MODE_SIZE (mem_mode
);
2281 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2282 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2289 max_offset_list
[list_index
] = off
;
2293 /* Check if all small groups should be split. Return true if and
2296 1) At least one groups contain two uses with different offsets.
2297 2) No group contains more than two uses with different offsets.
2299 Return false otherwise. We want to split such groups because:
2301 1) Small groups don't have much benefit and may interfer with
2302 general candidate selection.
2303 2) Size for problem with only small groups is usually small and
2304 general algorithm can handle it well.
2306 TODO -- Above claim may not hold when auto increment is supported. */
2309 split_all_small_groups (struct ivopts_data
*data
)
2311 bool split_p
= false;
2312 unsigned int i
, n
, distinct
;
2313 struct iv_use
*pre
, *use
;
2315 n
= n_iv_uses (data
);
2316 for (i
= 0; i
< n
; i
++)
2318 use
= iv_use (data
, i
);
2323 gcc_assert (use
->type
== USE_ADDRESS
);
2324 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2326 if (pre
->addr_offset
!= use
->addr_offset
)
2339 /* For each group of address type uses, this function further groups
2340 these uses according to the maximum offset supported by target's
2341 [base + offset] addressing mode. */
2344 group_address_uses (struct ivopts_data
*data
)
2346 HOST_WIDE_INT max_offset
= -1;
2347 unsigned int i
, n
, sub_id
;
2348 struct iv_use
*pre
, *use
;
2349 unsigned HOST_WIDE_INT addr_offset_first
;
2351 /* Reset max offset to split all small groups. */
2352 if (split_all_small_groups (data
))
2355 n
= n_iv_uses (data
);
2356 for (i
= 0; i
< n
; i
++)
2358 use
= iv_use (data
, i
);
2362 gcc_assert (use
->type
== USE_ADDRESS
);
2363 if (max_offset
!= 0)
2364 max_offset
= compute_max_addr_offset (use
);
2369 addr_offset_first
= use
->addr_offset
;
2370 /* Only uses with offset that can fit in offset part against
2371 the first use can be grouped together. */
2372 for (pre
= use
, use
= use
->next
;
2373 use
&& (use
->addr_offset
- addr_offset_first
2374 <= (unsigned HOST_WIDE_INT
) max_offset
);
2375 pre
= use
, use
= use
->next
)
2378 use
->sub_id
= ++sub_id
;
2381 /* Break the list and create new group. */
2385 use
->id
= n_iv_uses (data
);
2386 use
->related_cands
= BITMAP_ALLOC (NULL
);
2387 data
->iv_uses
.safe_push (use
);
2392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2393 dump_uses (dump_file
, data
);
2396 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2397 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2398 we are at the top-level of the processed address. */
2401 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2402 HOST_WIDE_INT
*offset
)
2404 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2405 enum tree_code code
;
2406 tree type
, orig_type
= TREE_TYPE (expr
);
2407 HOST_WIDE_INT off0
, off1
, st
;
2408 tree orig_expr
= expr
;
2412 type
= TREE_TYPE (expr
);
2413 code
= TREE_CODE (expr
);
2419 if (!cst_and_fits_in_hwi (expr
)
2420 || integer_zerop (expr
))
2423 *offset
= int_cst_value (expr
);
2424 return build_int_cst (orig_type
, 0);
2426 case POINTER_PLUS_EXPR
:
2429 op0
= TREE_OPERAND (expr
, 0);
2430 op1
= TREE_OPERAND (expr
, 1);
2432 op0
= strip_offset_1 (op0
, false, false, &off0
);
2433 op1
= strip_offset_1 (op1
, false, false, &off1
);
2435 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2436 if (op0
== TREE_OPERAND (expr
, 0)
2437 && op1
== TREE_OPERAND (expr
, 1))
2440 if (integer_zerop (op1
))
2442 else if (integer_zerop (op0
))
2444 if (code
== MINUS_EXPR
)
2445 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2450 expr
= fold_build2 (code
, type
, op0
, op1
);
2452 return fold_convert (orig_type
, expr
);
2455 op1
= TREE_OPERAND (expr
, 1);
2456 if (!cst_and_fits_in_hwi (op1
))
2459 op0
= TREE_OPERAND (expr
, 0);
2460 op0
= strip_offset_1 (op0
, false, false, &off0
);
2461 if (op0
== TREE_OPERAND (expr
, 0))
2464 *offset
= off0
* int_cst_value (op1
);
2465 if (integer_zerop (op0
))
2468 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2470 return fold_convert (orig_type
, expr
);
2473 case ARRAY_RANGE_REF
:
2477 step
= array_ref_element_size (expr
);
2478 if (!cst_and_fits_in_hwi (step
))
2481 st
= int_cst_value (step
);
2482 op1
= TREE_OPERAND (expr
, 1);
2483 op1
= strip_offset_1 (op1
, false, false, &off1
);
2484 *offset
= off1
* st
;
2487 && integer_zerop (op1
))
2489 /* Strip the component reference completely. */
2490 op0
= TREE_OPERAND (expr
, 0);
2491 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2504 tmp
= component_ref_field_offset (expr
);
2505 field
= TREE_OPERAND (expr
, 1);
2507 && cst_and_fits_in_hwi (tmp
)
2508 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2510 HOST_WIDE_INT boffset
, abs_off
;
2512 /* Strip the component reference completely. */
2513 op0
= TREE_OPERAND (expr
, 0);
2514 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2515 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2516 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2520 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2527 op0
= TREE_OPERAND (expr
, 0);
2528 op0
= strip_offset_1 (op0
, true, true, &off0
);
2531 if (op0
== TREE_OPERAND (expr
, 0))
2534 expr
= build_fold_addr_expr (op0
);
2535 return fold_convert (orig_type
, expr
);
2538 /* ??? Offset operand? */
2539 inside_addr
= false;
2546 /* Default handling of expressions for that we want to recurse into
2547 the first operand. */
2548 op0
= TREE_OPERAND (expr
, 0);
2549 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2552 if (op0
== TREE_OPERAND (expr
, 0)
2553 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2556 expr
= copy_node (expr
);
2557 TREE_OPERAND (expr
, 0) = op0
;
2559 TREE_OPERAND (expr
, 1) = op1
;
2561 /* Inside address, we might strip the top level component references,
2562 thus changing type of the expression. Handling of ADDR_EXPR
2564 expr
= fold_convert (orig_type
, expr
);
2569 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2572 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2575 tree core
= strip_offset_1 (expr
, false, false, &off
);
2580 /* Returns variant of TYPE that can be used as base for different uses.
2581 We return unsigned type with the same precision, which avoids problems
2585 generic_type_for (tree type
)
2587 if (POINTER_TYPE_P (type
))
2588 return unsigned_type_for (type
);
2590 if (TYPE_UNSIGNED (type
))
2593 return unsigned_type_for (type
);
2596 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2597 the bitmap to that we should store it. */
2599 static struct ivopts_data
*fd_ivopts_data
;
2601 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2603 bitmap
*depends_on
= (bitmap
*) data
;
2604 struct version_info
*info
;
2606 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2608 info
= name_info (fd_ivopts_data
, *expr_p
);
2610 if (!info
->inv_id
|| info
->has_nonlin_use
)
2614 *depends_on
= BITMAP_ALLOC (NULL
);
2615 bitmap_set_bit (*depends_on
, info
->inv_id
);
2620 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2621 position to POS. If USE is not NULL, the candidate is set as related to
2622 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2623 replacement of the final value of the iv by a direct computation. */
2625 static struct iv_cand
*
2626 add_candidate_1 (struct ivopts_data
*data
,
2627 tree base
, tree step
, bool important
, enum iv_position pos
,
2628 struct iv_use
*use
, gimple incremented_at
)
2631 struct iv_cand
*cand
= NULL
;
2632 tree type
, orig_type
;
2634 /* For non-original variables, make sure their values are computed in a type
2635 that does not invoke undefined behavior on overflows (since in general,
2636 we cannot prove that these induction variables are non-wrapping). */
2637 if (pos
!= IP_ORIGINAL
)
2639 orig_type
= TREE_TYPE (base
);
2640 type
= generic_type_for (orig_type
);
2641 if (type
!= orig_type
)
2643 base
= fold_convert (type
, base
);
2644 step
= fold_convert (type
, step
);
2648 for (i
= 0; i
< n_iv_cands (data
); i
++)
2650 cand
= iv_cand (data
, i
);
2652 if (cand
->pos
!= pos
)
2655 if (cand
->incremented_at
!= incremented_at
2656 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2657 && cand
->ainc_use
!= use
))
2671 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2672 && operand_equal_p (step
, cand
->iv
->step
, 0)
2673 && (TYPE_PRECISION (TREE_TYPE (base
))
2674 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2678 if (i
== n_iv_cands (data
))
2680 cand
= XCNEW (struct iv_cand
);
2686 cand
->iv
= alloc_iv (base
, step
);
2689 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2691 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2692 cand
->var_after
= cand
->var_before
;
2694 cand
->important
= important
;
2695 cand
->incremented_at
= incremented_at
;
2696 data
->iv_candidates
.safe_push (cand
);
2699 && TREE_CODE (step
) != INTEGER_CST
)
2701 fd_ivopts_data
= data
;
2702 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2705 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2706 cand
->ainc_use
= use
;
2708 cand
->ainc_use
= NULL
;
2710 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2711 dump_cand (dump_file
, cand
);
2714 if (important
&& !cand
->important
)
2716 cand
->important
= true;
2717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2718 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2723 bitmap_set_bit (use
->related_cands
, i
);
2724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2725 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2732 /* Returns true if incrementing the induction variable at the end of the LOOP
2735 The purpose is to avoid splitting latch edge with a biv increment, thus
2736 creating a jump, possibly confusing other optimization passes and leaving
2737 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2738 is not available (so we do not have a better alternative), or if the latch
2739 edge is already nonempty. */
2742 allow_ip_end_pos_p (struct loop
*loop
)
2744 if (!ip_normal_pos (loop
))
2747 if (!empty_block_p (ip_end_pos (loop
)))
2753 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2754 Important field is set to IMPORTANT. */
2757 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2758 bool important
, struct iv_use
*use
)
2760 basic_block use_bb
= gimple_bb (use
->stmt
);
2761 machine_mode mem_mode
;
2762 unsigned HOST_WIDE_INT cstepi
;
2764 /* If we insert the increment in any position other than the standard
2765 ones, we must ensure that it is incremented once per iteration.
2766 It must not be in an inner nested loop, or one side of an if
2768 if (use_bb
->loop_father
!= data
->current_loop
2769 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2770 || stmt_could_throw_p (use
->stmt
)
2771 || !cst_and_fits_in_hwi (step
))
2774 cstepi
= int_cst_value (step
);
2776 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2777 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2778 || USE_STORE_PRE_INCREMENT (mem_mode
))
2779 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2780 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2781 || USE_STORE_PRE_DECREMENT (mem_mode
))
2782 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2784 enum tree_code code
= MINUS_EXPR
;
2786 tree new_step
= step
;
2788 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2790 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2791 code
= POINTER_PLUS_EXPR
;
2794 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2795 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2796 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2799 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2800 || USE_STORE_POST_INCREMENT (mem_mode
))
2801 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2802 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2803 || USE_STORE_POST_DECREMENT (mem_mode
))
2804 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2806 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2811 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2812 position to POS. If USE is not NULL, the candidate is set as related to
2813 it. The candidate computation is scheduled on all available positions. */
2816 add_candidate (struct ivopts_data
*data
,
2817 tree base
, tree step
, bool important
, struct iv_use
*use
)
2819 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2821 if (ip_normal_pos (data
->current_loop
))
2822 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2823 if (ip_end_pos (data
->current_loop
)
2824 && allow_ip_end_pos_p (data
->current_loop
))
2825 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2827 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2828 add_autoinc_candidates (data
, base
, step
, important
, use
);
2831 /* Adds standard iv candidates. */
2834 add_standard_iv_candidates (struct ivopts_data
*data
)
2836 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2838 /* The same for a double-integer type if it is still fast enough. */
2840 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2841 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2842 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2843 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2845 /* The same for a double-integer type if it is still fast enough. */
2847 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2848 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2849 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2850 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2854 /* Adds candidates bases on the old induction variable IV. */
2857 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2861 struct iv_cand
*cand
;
2863 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2865 /* The same, but with initial value zero. */
2866 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2867 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2869 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2870 iv
->step
, true, NULL
);
2872 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2873 if (gimple_code (phi
) == GIMPLE_PHI
)
2875 /* Additionally record the possibility of leaving the original iv
2877 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2878 /* Don't add candidate if it's from another PHI node because
2879 it's an affine iv appearing in the form of PEELED_CHREC. */
2880 phi
= SSA_NAME_DEF_STMT (def
);
2881 if (gimple_code (phi
) != GIMPLE_PHI
)
2883 cand
= add_candidate_1 (data
,
2884 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2885 SSA_NAME_DEF_STMT (def
));
2886 cand
->var_before
= iv
->ssa_name
;
2887 cand
->var_after
= def
;
2890 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2894 /* Adds candidates based on the old induction variables. */
2897 add_old_ivs_candidates (struct ivopts_data
*data
)
2903 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2905 iv
= ver_info (data
, i
)->iv
;
2906 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2907 add_old_iv_candidates (data
, iv
);
2911 /* Adds candidates based on the value of the induction variable IV and USE. */
2914 add_iv_value_candidates (struct ivopts_data
*data
,
2915 struct iv
*iv
, struct iv_use
*use
)
2917 unsigned HOST_WIDE_INT offset
;
2921 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2923 /* The same, but with initial value zero. Make such variable important,
2924 since it is generic enough so that possibly many uses may be based
2926 basetype
= TREE_TYPE (iv
->base
);
2927 if (POINTER_TYPE_P (basetype
))
2928 basetype
= sizetype
;
2929 add_candidate (data
, build_int_cst (basetype
, 0),
2930 iv
->step
, true, use
);
2932 /* Third, try removing the constant offset. Make sure to even
2933 add a candidate for &a[0] vs. (T *)&a. */
2934 base
= strip_offset (iv
->base
, &offset
);
2936 || base
!= iv
->base
)
2937 add_candidate (data
, base
, iv
->step
, false, use
);
2940 /* Adds candidates based on the uses. */
2943 add_derived_ivs_candidates (struct ivopts_data
*data
)
2947 for (i
= 0; i
< n_iv_uses (data
); i
++)
2949 struct iv_use
*use
= iv_use (data
, i
);
2956 case USE_NONLINEAR_EXPR
:
2959 /* Just add the ivs based on the value of the iv used here. */
2960 add_iv_value_candidates (data
, use
->iv
, use
);
2969 /* Record important candidates and add them to related_cands bitmaps
2973 record_important_candidates (struct ivopts_data
*data
)
2978 for (i
= 0; i
< n_iv_cands (data
); i
++)
2980 struct iv_cand
*cand
= iv_cand (data
, i
);
2982 if (cand
->important
)
2983 bitmap_set_bit (data
->important_candidates
, i
);
2986 data
->consider_all_candidates
= (n_iv_cands (data
)
2987 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2989 if (data
->consider_all_candidates
)
2991 /* We will not need "related_cands" bitmaps in this case,
2992 so release them to decrease peak memory consumption. */
2993 for (i
= 0; i
< n_iv_uses (data
); i
++)
2995 use
= iv_use (data
, i
);
2996 BITMAP_FREE (use
->related_cands
);
3001 /* Add important candidates to the related_cands bitmaps. */
3002 for (i
= 0; i
< n_iv_uses (data
); i
++)
3003 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
3004 data
->important_candidates
);
3008 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3009 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3010 we allocate a simple list to every use. */
3013 alloc_use_cost_map (struct ivopts_data
*data
)
3015 unsigned i
, size
, s
;
3017 for (i
= 0; i
< n_iv_uses (data
); i
++)
3019 struct iv_use
*use
= iv_use (data
, i
);
3021 if (data
->consider_all_candidates
)
3022 size
= n_iv_cands (data
);
3025 s
= bitmap_count_bits (use
->related_cands
);
3027 /* Round up to the power of two, so that moduling by it is fast. */
3028 size
= s
? (1 << ceil_log2 (s
)) : 1;
3031 use
->n_map_members
= size
;
3032 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3036 /* Returns description of computation cost of expression whose runtime
3037 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3040 new_cost (unsigned runtime
, unsigned complexity
)
3044 cost
.cost
= runtime
;
3045 cost
.complexity
= complexity
;
3050 /* Returns true if COST is infinite. */
3053 infinite_cost_p (comp_cost cost
)
3055 return cost
.cost
== INFTY
;
3058 /* Adds costs COST1 and COST2. */
3061 add_costs (comp_cost cost1
, comp_cost cost2
)
3063 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3064 return infinite_cost
;
3066 cost1
.cost
+= cost2
.cost
;
3067 cost1
.complexity
+= cost2
.complexity
;
3071 /* Subtracts costs COST1 and COST2. */
3074 sub_costs (comp_cost cost1
, comp_cost cost2
)
3076 cost1
.cost
-= cost2
.cost
;
3077 cost1
.complexity
-= cost2
.complexity
;
3082 /* Returns a negative number if COST1 < COST2, a positive number if
3083 COST1 > COST2, and 0 if COST1 = COST2. */
3086 compare_costs (comp_cost cost1
, comp_cost cost2
)
3088 if (cost1
.cost
== cost2
.cost
)
3089 return cost1
.complexity
- cost2
.complexity
;
3091 return cost1
.cost
- cost2
.cost
;
3094 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3095 on invariants DEPENDS_ON and that the value used in expressing it
3096 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3099 set_use_iv_cost (struct ivopts_data
*data
,
3100 struct iv_use
*use
, struct iv_cand
*cand
,
3101 comp_cost cost
, bitmap depends_on
, tree value
,
3102 enum tree_code comp
, int inv_expr_id
)
3106 if (infinite_cost_p (cost
))
3108 BITMAP_FREE (depends_on
);
3112 if (data
->consider_all_candidates
)
3114 use
->cost_map
[cand
->id
].cand
= cand
;
3115 use
->cost_map
[cand
->id
].cost
= cost
;
3116 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3117 use
->cost_map
[cand
->id
].value
= value
;
3118 use
->cost_map
[cand
->id
].comp
= comp
;
3119 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3123 /* n_map_members is a power of two, so this computes modulo. */
3124 s
= cand
->id
& (use
->n_map_members
- 1);
3125 for (i
= s
; i
< use
->n_map_members
; i
++)
3126 if (!use
->cost_map
[i
].cand
)
3128 for (i
= 0; i
< s
; i
++)
3129 if (!use
->cost_map
[i
].cand
)
3135 use
->cost_map
[i
].cand
= cand
;
3136 use
->cost_map
[i
].cost
= cost
;
3137 use
->cost_map
[i
].depends_on
= depends_on
;
3138 use
->cost_map
[i
].value
= value
;
3139 use
->cost_map
[i
].comp
= comp
;
3140 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3143 /* Gets cost of (USE, CANDIDATE) pair. */
3145 static struct cost_pair
*
3146 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3147 struct iv_cand
*cand
)
3150 struct cost_pair
*ret
;
3155 if (data
->consider_all_candidates
)
3157 ret
= use
->cost_map
+ cand
->id
;
3164 /* n_map_members is a power of two, so this computes modulo. */
3165 s
= cand
->id
& (use
->n_map_members
- 1);
3166 for (i
= s
; i
< use
->n_map_members
; i
++)
3167 if (use
->cost_map
[i
].cand
== cand
)
3168 return use
->cost_map
+ i
;
3169 else if (use
->cost_map
[i
].cand
== NULL
)
3171 for (i
= 0; i
< s
; i
++)
3172 if (use
->cost_map
[i
].cand
== cand
)
3173 return use
->cost_map
+ i
;
3174 else if (use
->cost_map
[i
].cand
== NULL
)
3180 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3182 produce_memory_decl_rtl (tree obj
, int *regno
)
3184 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3185 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3189 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3191 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3192 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3193 SET_SYMBOL_REF_DECL (x
, obj
);
3194 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3195 set_mem_addr_space (x
, as
);
3196 targetm
.encode_section_info (obj
, x
, true);
3200 x
= gen_raw_REG (address_mode
, (*regno
)++);
3201 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3202 set_mem_addr_space (x
, as
);
3208 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3209 walk_tree. DATA contains the actual fake register number. */
3212 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3214 tree obj
= NULL_TREE
;
3216 int *regno
= (int *) data
;
3218 switch (TREE_CODE (*expr_p
))
3221 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3222 handled_component_p (*expr_p
);
3223 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3226 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3227 x
= produce_memory_decl_rtl (obj
, regno
);
3232 obj
= SSA_NAME_VAR (*expr_p
);
3233 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3236 if (!DECL_RTL_SET_P (obj
))
3237 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3246 if (DECL_RTL_SET_P (obj
))
3249 if (DECL_MODE (obj
) == BLKmode
)
3250 x
= produce_memory_decl_rtl (obj
, regno
);
3252 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3262 decl_rtl_to_reset
.safe_push (obj
);
3263 SET_DECL_RTL (obj
, x
);
3269 /* Determines cost of the computation of EXPR. */
3272 computation_cost (tree expr
, bool speed
)
3276 tree type
= TREE_TYPE (expr
);
3278 /* Avoid using hard regs in ways which may be unsupported. */
3279 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3280 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3281 enum node_frequency real_frequency
= node
->frequency
;
3283 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3284 crtl
->maybe_hot_insn_p
= speed
;
3285 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3287 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3290 default_rtl_profile ();
3291 node
->frequency
= real_frequency
;
3293 cost
= seq_cost (seq
, speed
);
3295 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3296 TYPE_ADDR_SPACE (type
), speed
);
3297 else if (!REG_P (rslt
))
3298 cost
+= set_src_cost (rslt
, speed
);
3303 /* Returns variable containing the value of candidate CAND at statement AT. */
3306 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3308 if (stmt_after_increment (loop
, cand
, stmt
))
3309 return cand
->var_after
;
3311 return cand
->var_before
;
3314 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3315 same precision that is at least as wide as the precision of TYPE, stores
3316 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3320 determine_common_wider_type (tree
*a
, tree
*b
)
3322 tree wider_type
= NULL
;
3324 tree atype
= TREE_TYPE (*a
);
3326 if (CONVERT_EXPR_P (*a
))
3328 suba
= TREE_OPERAND (*a
, 0);
3329 wider_type
= TREE_TYPE (suba
);
3330 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3336 if (CONVERT_EXPR_P (*b
))
3338 subb
= TREE_OPERAND (*b
, 0);
3339 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3350 /* Determines the expression by that USE is expressed from induction variable
3351 CAND at statement AT in LOOP. The expression is stored in a decomposed
3352 form into AFF. Returns false if USE cannot be expressed using CAND. */
3355 get_computation_aff (struct loop
*loop
,
3356 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3357 struct aff_tree
*aff
)
3359 tree ubase
= use
->iv
->base
;
3360 tree ustep
= use
->iv
->step
;
3361 tree cbase
= cand
->iv
->base
;
3362 tree cstep
= cand
->iv
->step
, cstep_common
;
3363 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3364 tree common_type
, var
;
3366 aff_tree cbase_aff
, var_aff
;
3369 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3371 /* We do not have a precision to express the values of use. */
3375 var
= var_at_stmt (loop
, cand
, at
);
3376 uutype
= unsigned_type_for (utype
);
3378 /* If the conversion is not noop, perform it. */
3379 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3381 cstep
= fold_convert (uutype
, cstep
);
3382 cbase
= fold_convert (uutype
, cbase
);
3383 var
= fold_convert (uutype
, var
);
3386 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3389 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3390 type, we achieve better folding by computing their difference in this
3391 wider type, and cast the result to UUTYPE. We do not need to worry about
3392 overflows, as all the arithmetics will in the end be performed in UUTYPE
3394 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3396 /* use = ubase - ratio * cbase + ratio * var. */
3397 tree_to_aff_combination (ubase
, common_type
, aff
);
3398 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3399 tree_to_aff_combination (var
, uutype
, &var_aff
);
3401 /* We need to shift the value if we are after the increment. */
3402 if (stmt_after_increment (loop
, cand
, at
))
3406 if (common_type
!= uutype
)
3407 cstep_common
= fold_convert (common_type
, cstep
);
3409 cstep_common
= cstep
;
3411 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3412 aff_combination_add (&cbase_aff
, &cstep_aff
);
3415 aff_combination_scale (&cbase_aff
, -rat
);
3416 aff_combination_add (aff
, &cbase_aff
);
3417 if (common_type
!= uutype
)
3418 aff_combination_convert (aff
, uutype
);
3420 aff_combination_scale (&var_aff
, rat
);
3421 aff_combination_add (aff
, &var_aff
);
3426 /* Return the type of USE. */
3429 get_use_type (struct iv_use
*use
)
3431 tree base_type
= TREE_TYPE (use
->iv
->base
);
3434 if (use
->type
== USE_ADDRESS
)
3436 /* The base_type may be a void pointer. Create a pointer type based on
3437 the mem_ref instead. */
3438 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3439 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3440 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3448 /* Determines the expression by that USE is expressed from induction variable
3449 CAND at statement AT in LOOP. The computation is unshared. */
3452 get_computation_at (struct loop
*loop
,
3453 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3456 tree type
= get_use_type (use
);
3458 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3460 unshare_aff_combination (&aff
);
3461 return fold_convert (type
, aff_combination_to_tree (&aff
));
3464 /* Determines the expression by that USE is expressed from induction variable
3465 CAND in LOOP. The computation is unshared. */
3468 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3470 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3473 /* Adjust the cost COST for being in loop setup rather than loop body.
3474 If we're optimizing for space, the loop setup overhead is constant;
3475 if we're optimizing for speed, amortize it over the per-iteration cost. */
3477 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3481 else if (optimize_loop_for_speed_p (data
->current_loop
))
3482 return cost
/ avg_loop_niter (data
->current_loop
);
3487 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3488 validity for a memory reference accessing memory of mode MODE in
3489 address space AS. */
3493 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3496 #define MAX_RATIO 128
3497 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3498 static vec
<sbitmap
> valid_mult_list
;
3501 if (data_index
>= valid_mult_list
.length ())
3502 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3504 valid_mult
= valid_mult_list
[data_index
];
3507 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3508 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3509 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3513 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3514 bitmap_clear (valid_mult
);
3515 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3516 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3517 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3519 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3520 if (memory_address_addr_space_p (mode
, addr
, as
)
3521 || memory_address_addr_space_p (mode
, scaled
, as
))
3522 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3525 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3527 fprintf (dump_file
, " allowed multipliers:");
3528 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3529 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3530 fprintf (dump_file
, " %d", (int) i
);
3531 fprintf (dump_file
, "\n");
3532 fprintf (dump_file
, "\n");
3535 valid_mult_list
[data_index
] = valid_mult
;
3538 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3541 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3544 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3545 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3546 variable is omitted. Compute the cost for a memory reference that accesses
3547 a memory location of mode MEM_MODE in address space AS.
3549 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3550 size of MEM_MODE / RATIO) is available. To make this determination, we
3551 look at the size of the increment to be made, which is given in CSTEP.
3552 CSTEP may be zero if the step is unknown.
3553 STMT_AFTER_INC is true iff the statement we're looking at is after the
3554 increment of the original biv.
3556 TODO -- there must be some better way. This all is quite crude. */
3560 AINC_PRE_INC
, /* Pre increment. */
3561 AINC_PRE_DEC
, /* Pre decrement. */
3562 AINC_POST_INC
, /* Post increment. */
3563 AINC_POST_DEC
, /* Post decrement. */
3564 AINC_NONE
/* Also the number of auto increment types. */
3567 typedef struct address_cost_data_s
3569 HOST_WIDE_INT min_offset
, max_offset
;
3570 unsigned costs
[2][2][2][2];
3571 unsigned ainc_costs
[AINC_NONE
];
3572 } *address_cost_data
;
3576 get_address_cost (bool symbol_present
, bool var_present
,
3577 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3578 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3579 addr_space_t as
, bool speed
,
3580 bool stmt_after_inc
, bool *may_autoinc
)
3582 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3583 static vec
<address_cost_data
> address_cost_data_list
;
3584 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3585 address_cost_data data
;
3586 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3587 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3588 unsigned cost
, acost
, complexity
;
3589 enum ainc_type autoinc_type
;
3590 bool offset_p
, ratio_p
, autoinc
;
3591 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3592 unsigned HOST_WIDE_INT mask
;
3595 if (data_index
>= address_cost_data_list
.length ())
3596 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3598 data
= address_cost_data_list
[data_index
];
3602 HOST_WIDE_INT rat
, off
= 0;
3603 int old_cse_not_expected
, width
;
3604 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3609 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3611 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3613 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3614 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3615 width
= HOST_BITS_PER_WIDE_INT
- 1;
3616 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3618 for (i
= width
; i
>= 0; i
--)
3620 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3621 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3622 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3625 data
->min_offset
= (i
== -1? 0 : off
);
3627 for (i
= width
; i
>= 0; i
--)
3629 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3630 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3631 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3633 /* For some strict-alignment targets, the offset must be naturally
3634 aligned. Try an aligned offset if mem_mode is not QImode. */
3635 off
= mem_mode
!= QImode
3636 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3637 - GET_MODE_SIZE (mem_mode
)
3641 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3642 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3648 data
->max_offset
= off
;
3650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3652 fprintf (dump_file
, "get_address_cost:\n");
3653 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3654 GET_MODE_NAME (mem_mode
),
3656 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3657 GET_MODE_NAME (mem_mode
),
3662 for (i
= 2; i
<= MAX_RATIO
; i
++)
3663 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3669 /* Compute the cost of various addressing modes. */
3671 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3672 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3674 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3675 || USE_STORE_PRE_DECREMENT (mem_mode
))
3677 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3678 has_predec
[mem_mode
]
3679 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3681 if (has_predec
[mem_mode
])
3682 data
->ainc_costs
[AINC_PRE_DEC
]
3683 = address_cost (addr
, mem_mode
, as
, speed
);
3685 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3686 || USE_STORE_POST_DECREMENT (mem_mode
))
3688 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3689 has_postdec
[mem_mode
]
3690 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3692 if (has_postdec
[mem_mode
])
3693 data
->ainc_costs
[AINC_POST_DEC
]
3694 = address_cost (addr
, mem_mode
, as
, speed
);
3696 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3697 || USE_STORE_PRE_DECREMENT (mem_mode
))
3699 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3700 has_preinc
[mem_mode
]
3701 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3703 if (has_preinc
[mem_mode
])
3704 data
->ainc_costs
[AINC_PRE_INC
]
3705 = address_cost (addr
, mem_mode
, as
, speed
);
3707 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3708 || USE_STORE_POST_INCREMENT (mem_mode
))
3710 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3711 has_postinc
[mem_mode
]
3712 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3714 if (has_postinc
[mem_mode
])
3715 data
->ainc_costs
[AINC_POST_INC
]
3716 = address_cost (addr
, mem_mode
, as
, speed
);
3718 for (i
= 0; i
< 16; i
++)
3721 var_p
= (i
>> 1) & 1;
3722 off_p
= (i
>> 2) & 1;
3723 rat_p
= (i
>> 3) & 1;
3727 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3728 gen_int_mode (rat
, address_mode
));
3731 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3735 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3736 /* ??? We can run into trouble with some backends by presenting
3737 it with symbols which haven't been properly passed through
3738 targetm.encode_section_info. By setting the local bit, we
3739 enhance the probability of things working. */
3740 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3743 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3745 (PLUS
, address_mode
, base
,
3746 gen_int_mode (off
, address_mode
)));
3749 base
= gen_int_mode (off
, address_mode
);
3754 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3757 /* To avoid splitting addressing modes, pretend that no cse will
3759 old_cse_not_expected
= cse_not_expected
;
3760 cse_not_expected
= true;
3761 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3762 cse_not_expected
= old_cse_not_expected
;
3766 acost
= seq_cost (seq
, speed
);
3767 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3771 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3774 /* On some targets, it is quite expensive to load symbol to a register,
3775 which makes addresses that contain symbols look much more expensive.
3776 However, the symbol will have to be loaded in any case before the
3777 loop (and quite likely we have it in register already), so it does not
3778 make much sense to penalize them too heavily. So make some final
3779 tweaks for the SYMBOL_PRESENT modes:
3781 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3782 var is cheaper, use this mode with small penalty.
3783 If VAR_PRESENT is true, try whether the mode with
3784 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3785 if this is the case, use it. */
3786 add_c
= add_cost (speed
, address_mode
);
3787 for (i
= 0; i
< 8; i
++)
3790 off_p
= (i
>> 1) & 1;
3791 rat_p
= (i
>> 2) & 1;
3793 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3797 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3798 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3803 fprintf (dump_file
, "Address costs:\n");
3805 for (i
= 0; i
< 16; i
++)
3808 var_p
= (i
>> 1) & 1;
3809 off_p
= (i
>> 2) & 1;
3810 rat_p
= (i
>> 3) & 1;
3812 fprintf (dump_file
, " ");
3814 fprintf (dump_file
, "sym + ");
3816 fprintf (dump_file
, "var + ");
3818 fprintf (dump_file
, "cst + ");
3820 fprintf (dump_file
, "rat * ");
3822 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3823 fprintf (dump_file
, "index costs %d\n", acost
);
3825 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3826 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3827 fprintf (dump_file
, " May include autoinc/dec\n");
3828 fprintf (dump_file
, "\n");
3831 address_cost_data_list
[data_index
] = data
;
3834 bits
= GET_MODE_BITSIZE (address_mode
);
3835 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3837 if ((offset
>> (bits
- 1) & 1))
3842 autoinc_type
= AINC_NONE
;
3843 msize
= GET_MODE_SIZE (mem_mode
);
3844 autoinc_offset
= offset
;
3846 autoinc_offset
+= ratio
* cstep
;
3847 if (symbol_present
|| var_present
|| ratio
!= 1)
3851 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3853 autoinc_type
= AINC_POST_INC
;
3854 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3856 autoinc_type
= AINC_POST_DEC
;
3857 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3859 autoinc_type
= AINC_PRE_INC
;
3860 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3862 autoinc_type
= AINC_PRE_DEC
;
3864 if (autoinc_type
!= AINC_NONE
)
3869 offset_p
= (s_offset
!= 0
3870 && data
->min_offset
<= s_offset
3871 && s_offset
<= data
->max_offset
);
3872 ratio_p
= (ratio
!= 1
3873 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3875 if (ratio
!= 1 && !ratio_p
)
3876 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3878 if (s_offset
&& !offset_p
&& !symbol_present
)
3879 cost
+= add_cost (speed
, address_mode
);
3882 *may_autoinc
= autoinc
;
3884 acost
= data
->ainc_costs
[autoinc_type
];
3886 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3887 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3888 return new_cost (cost
+ acost
, complexity
);
3891 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3892 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3893 calculating the operands of EXPR. Returns true if successful, and returns
3894 the cost in COST. */
3897 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3898 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3901 tree op1
= TREE_OPERAND (expr
, 1);
3902 tree cst
= TREE_OPERAND (mult
, 1);
3903 tree multop
= TREE_OPERAND (mult
, 0);
3904 int m
= exact_log2 (int_cst_value (cst
));
3905 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3906 int as_cost
, sa_cost
;
3909 if (!(m
>= 0 && m
< maxm
))
3912 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3914 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3916 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3917 use that in preference to a shift insn followed by an add insn. */
3918 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3919 ? shiftadd_cost (speed
, mode
, m
)
3921 ? shiftsub1_cost (speed
, mode
, m
)
3922 : shiftsub0_cost (speed
, mode
, m
)));
3924 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3925 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3927 STRIP_NOPS (multop
);
3928 if (!is_gimple_val (multop
))
3929 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3935 /* Estimates cost of forcing expression EXPR into a variable. */
3938 force_expr_to_var_cost (tree expr
, bool speed
)
3940 static bool costs_initialized
= false;
3941 static unsigned integer_cost
[2];
3942 static unsigned symbol_cost
[2];
3943 static unsigned address_cost
[2];
3945 comp_cost cost0
, cost1
, cost
;
3948 if (!costs_initialized
)
3950 tree type
= build_pointer_type (integer_type_node
);
3955 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3956 TREE_STATIC (var
) = 1;
3957 x
= produce_memory_decl_rtl (var
, NULL
);
3958 SET_DECL_RTL (var
, x
);
3960 addr
= build1 (ADDR_EXPR
, type
, var
);
3963 for (i
= 0; i
< 2; i
++)
3965 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3968 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3971 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3972 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3974 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3975 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3976 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3977 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3978 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3979 fprintf (dump_file
, "\n");
3983 costs_initialized
= true;
3988 if (SSA_VAR_P (expr
))
3991 if (is_gimple_min_invariant (expr
))
3993 if (TREE_CODE (expr
) == INTEGER_CST
)
3994 return new_cost (integer_cost
[speed
], 0);
3996 if (TREE_CODE (expr
) == ADDR_EXPR
)
3998 tree obj
= TREE_OPERAND (expr
, 0);
4000 if (TREE_CODE (obj
) == VAR_DECL
4001 || TREE_CODE (obj
) == PARM_DECL
4002 || TREE_CODE (obj
) == RESULT_DECL
)
4003 return new_cost (symbol_cost
[speed
], 0);
4006 return new_cost (address_cost
[speed
], 0);
4009 switch (TREE_CODE (expr
))
4011 case POINTER_PLUS_EXPR
:
4015 op0
= TREE_OPERAND (expr
, 0);
4016 op1
= TREE_OPERAND (expr
, 1);
4023 op0
= TREE_OPERAND (expr
, 0);
4029 /* Just an arbitrary value, FIXME. */
4030 return new_cost (target_spill_cost
[speed
], 0);
4033 if (op0
== NULL_TREE
4034 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4037 cost0
= force_expr_to_var_cost (op0
, speed
);
4039 if (op1
== NULL_TREE
4040 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4043 cost1
= force_expr_to_var_cost (op1
, speed
);
4045 mode
= TYPE_MODE (TREE_TYPE (expr
));
4046 switch (TREE_CODE (expr
))
4048 case POINTER_PLUS_EXPR
:
4052 cost
= new_cost (add_cost (speed
, mode
), 0);
4053 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4055 tree mult
= NULL_TREE
;
4057 if (TREE_CODE (op1
) == MULT_EXPR
)
4059 else if (TREE_CODE (op0
) == MULT_EXPR
)
4062 if (mult
!= NULL_TREE
4063 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4064 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4072 tree inner_mode
, outer_mode
;
4073 outer_mode
= TREE_TYPE (expr
);
4074 inner_mode
= TREE_TYPE (op0
);
4075 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4076 TYPE_MODE (inner_mode
), speed
), 0);
4081 if (cst_and_fits_in_hwi (op0
))
4082 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4084 else if (cst_and_fits_in_hwi (op1
))
4085 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4088 return new_cost (target_spill_cost
[speed
], 0);
4095 cost
= add_costs (cost
, cost0
);
4096 cost
= add_costs (cost
, cost1
);
4098 /* Bound the cost by target_spill_cost. The parts of complicated
4099 computations often are either loop invariant or at least can
4100 be shared between several iv uses, so letting this grow without
4101 limits would not give reasonable results. */
4102 if (cost
.cost
> (int) target_spill_cost
[speed
])
4103 cost
.cost
= target_spill_cost
[speed
];
4108 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4109 invariants the computation depends on. */
4112 force_var_cost (struct ivopts_data
*data
,
4113 tree expr
, bitmap
*depends_on
)
4117 fd_ivopts_data
= data
;
4118 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4121 return force_expr_to_var_cost (expr
, data
->speed
);
4124 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4125 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4126 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4127 invariants the computation depends on. */
4130 split_address_cost (struct ivopts_data
*data
,
4131 tree addr
, bool *symbol_present
, bool *var_present
,
4132 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4135 HOST_WIDE_INT bitsize
;
4136 HOST_WIDE_INT bitpos
;
4139 int unsignedp
, volatilep
;
4141 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4142 &unsignedp
, &volatilep
, false);
4145 || bitpos
% BITS_PER_UNIT
!= 0
4146 || TREE_CODE (core
) != VAR_DECL
)
4148 *symbol_present
= false;
4149 *var_present
= true;
4150 fd_ivopts_data
= data
;
4151 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4152 return new_cost (target_spill_cost
[data
->speed
], 0);
4155 *offset
+= bitpos
/ BITS_PER_UNIT
;
4156 if (TREE_STATIC (core
)
4157 || DECL_EXTERNAL (core
))
4159 *symbol_present
= true;
4160 *var_present
= false;
4164 *symbol_present
= false;
4165 *var_present
= true;
4169 /* Estimates cost of expressing difference of addresses E1 - E2 as
4170 var + symbol + offset. The value of offset is added to OFFSET,
4171 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4172 part is missing. DEPENDS_ON is a set of the invariants the computation
4176 ptr_difference_cost (struct ivopts_data
*data
,
4177 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4178 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4180 HOST_WIDE_INT diff
= 0;
4181 aff_tree aff_e1
, aff_e2
;
4184 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4186 if (ptr_difference_const (e1
, e2
, &diff
))
4189 *symbol_present
= false;
4190 *var_present
= false;
4194 if (integer_zerop (e2
))
4195 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4196 symbol_present
, var_present
, offset
, depends_on
);
4198 *symbol_present
= false;
4199 *var_present
= true;
4201 type
= signed_type_for (TREE_TYPE (e1
));
4202 tree_to_aff_combination (e1
, type
, &aff_e1
);
4203 tree_to_aff_combination (e2
, type
, &aff_e2
);
4204 aff_combination_scale (&aff_e2
, -1);
4205 aff_combination_add (&aff_e1
, &aff_e2
);
4207 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4210 /* Estimates cost of expressing difference E1 - E2 as
4211 var + symbol + offset. The value of offset is added to OFFSET,
4212 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4213 part is missing. DEPENDS_ON is a set of the invariants the computation
4217 difference_cost (struct ivopts_data
*data
,
4218 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4219 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4221 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4222 unsigned HOST_WIDE_INT off1
, off2
;
4223 aff_tree aff_e1
, aff_e2
;
4226 e1
= strip_offset (e1
, &off1
);
4227 e2
= strip_offset (e2
, &off2
);
4228 *offset
+= off1
- off2
;
4233 if (TREE_CODE (e1
) == ADDR_EXPR
)
4234 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4235 offset
, depends_on
);
4236 *symbol_present
= false;
4238 if (operand_equal_p (e1
, e2
, 0))
4240 *var_present
= false;
4244 *var_present
= true;
4246 if (integer_zerop (e2
))
4247 return force_var_cost (data
, e1
, depends_on
);
4249 if (integer_zerop (e1
))
4251 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4252 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4256 type
= signed_type_for (TREE_TYPE (e1
));
4257 tree_to_aff_combination (e1
, type
, &aff_e1
);
4258 tree_to_aff_combination (e2
, type
, &aff_e2
);
4259 aff_combination_scale (&aff_e2
, -1);
4260 aff_combination_add (&aff_e1
, &aff_e2
);
4262 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4265 /* Returns true if AFF1 and AFF2 are identical. */
4268 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4272 if (aff1
->n
!= aff2
->n
)
4275 for (i
= 0; i
< aff1
->n
; i
++)
4277 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4280 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4286 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4289 get_expr_id (struct ivopts_data
*data
, tree expr
)
4291 struct iv_inv_expr_ent ent
;
4292 struct iv_inv_expr_ent
**slot
;
4295 ent
.hash
= iterative_hash_expr (expr
, 0);
4296 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4300 *slot
= XNEW (struct iv_inv_expr_ent
);
4301 (*slot
)->expr
= expr
;
4302 (*slot
)->hash
= ent
.hash
;
4303 (*slot
)->id
= data
->inv_expr_id
++;
4307 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4308 requires a new compiler generated temporary. Returns -1 otherwise.
4309 ADDRESS_P is a flag indicating if the expression is for address
4313 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4314 tree cbase
, HOST_WIDE_INT ratio
,
4317 aff_tree ubase_aff
, cbase_aff
;
4325 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4326 && (TREE_CODE (cbase
) == INTEGER_CST
))
4329 /* Strips the constant part. */
4330 if (TREE_CODE (ubase
) == PLUS_EXPR
4331 || TREE_CODE (ubase
) == MINUS_EXPR
4332 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4334 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4335 ubase
= TREE_OPERAND (ubase
, 0);
4338 /* Strips the constant part. */
4339 if (TREE_CODE (cbase
) == PLUS_EXPR
4340 || TREE_CODE (cbase
) == MINUS_EXPR
4341 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4343 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4344 cbase
= TREE_OPERAND (cbase
, 0);
4349 if (((TREE_CODE (ubase
) == SSA_NAME
)
4350 || (TREE_CODE (ubase
) == ADDR_EXPR
4351 && is_gimple_min_invariant (ubase
)))
4352 && (TREE_CODE (cbase
) == INTEGER_CST
))
4355 if (((TREE_CODE (cbase
) == SSA_NAME
)
4356 || (TREE_CODE (cbase
) == ADDR_EXPR
4357 && is_gimple_min_invariant (cbase
)))
4358 && (TREE_CODE (ubase
) == INTEGER_CST
))
4364 if (operand_equal_p (ubase
, cbase
, 0))
4367 if (TREE_CODE (ubase
) == ADDR_EXPR
4368 && TREE_CODE (cbase
) == ADDR_EXPR
)
4372 usym
= TREE_OPERAND (ubase
, 0);
4373 csym
= TREE_OPERAND (cbase
, 0);
4374 if (TREE_CODE (usym
) == ARRAY_REF
)
4376 tree ind
= TREE_OPERAND (usym
, 1);
4377 if (TREE_CODE (ind
) == INTEGER_CST
4378 && tree_fits_shwi_p (ind
)
4379 && tree_to_shwi (ind
) == 0)
4380 usym
= TREE_OPERAND (usym
, 0);
4382 if (TREE_CODE (csym
) == ARRAY_REF
)
4384 tree ind
= TREE_OPERAND (csym
, 1);
4385 if (TREE_CODE (ind
) == INTEGER_CST
4386 && tree_fits_shwi_p (ind
)
4387 && tree_to_shwi (ind
) == 0)
4388 csym
= TREE_OPERAND (csym
, 0);
4390 if (operand_equal_p (usym
, csym
, 0))
4393 /* Now do more complex comparison */
4394 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4395 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4396 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4400 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4401 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4403 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4404 aff_combination_add (&ubase_aff
, &cbase_aff
);
4405 expr
= aff_combination_to_tree (&ubase_aff
);
4406 return get_expr_id (data
, expr
);
4411 /* Determines the cost of the computation by that USE is expressed
4412 from induction variable CAND. If ADDRESS_P is true, we just need
4413 to create an address from it, otherwise we want to get it into
4414 register. A set of invariants we depend on is stored in
4415 DEPENDS_ON. AT is the statement at that the value is computed.
4416 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4417 addressing is likely. */
4420 get_computation_cost_at (struct ivopts_data
*data
,
4421 struct iv_use
*use
, struct iv_cand
*cand
,
4422 bool address_p
, bitmap
*depends_on
, gimple at
,
4426 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4428 tree utype
= TREE_TYPE (ubase
), ctype
;
4429 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4430 HOST_WIDE_INT ratio
, aratio
;
4431 bool var_present
, symbol_present
, stmt_is_after_inc
;
4434 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4435 machine_mode mem_mode
= (address_p
4436 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4441 /* Only consider real candidates. */
4443 return infinite_cost
;
4445 cbase
= cand
->iv
->base
;
4446 cstep
= cand
->iv
->step
;
4447 ctype
= TREE_TYPE (cbase
);
4449 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4451 /* We do not have a precision to express the values of use. */
4452 return infinite_cost
;
4456 || (use
->iv
->base_object
4457 && cand
->iv
->base_object
4458 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4459 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4461 /* Do not try to express address of an object with computation based
4462 on address of a different object. This may cause problems in rtl
4463 level alias analysis (that does not expect this to be happening,
4464 as this is illegal in C), and would be unlikely to be useful
4466 if (use
->iv
->base_object
4467 && cand
->iv
->base_object
4468 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4469 return infinite_cost
;
4472 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4474 /* TODO -- add direct handling of this case. */
4478 /* CSTEPI is removed from the offset in case statement is after the
4479 increment. If the step is not constant, we use zero instead.
4480 This is a bit imprecise (there is the extra addition), but
4481 redundancy elimination is likely to transform the code so that
4482 it uses value of the variable before increment anyway,
4483 so it is not that much unrealistic. */
4484 if (cst_and_fits_in_hwi (cstep
))
4485 cstepi
= int_cst_value (cstep
);
4489 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4490 return infinite_cost
;
4492 if (wi::fits_shwi_p (rat
))
4493 ratio
= rat
.to_shwi ();
4495 return infinite_cost
;
4498 ctype
= TREE_TYPE (cbase
);
4500 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4502 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4503 or ratio == 1, it is better to handle this like
4505 ubase - ratio * cbase + ratio * var
4507 (also holds in the case ratio == -1, TODO. */
4509 if (cst_and_fits_in_hwi (cbase
))
4511 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4512 cost
= difference_cost (data
,
4513 ubase
, build_int_cst (utype
, 0),
4514 &symbol_present
, &var_present
, &offset
,
4516 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4518 else if (ratio
== 1)
4520 tree real_cbase
= cbase
;
4522 /* Check to see if any adjustment is needed. */
4523 if (cstepi
== 0 && stmt_is_after_inc
)
4525 aff_tree real_cbase_aff
;
4528 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4530 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4532 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4533 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4536 cost
= difference_cost (data
,
4538 &symbol_present
, &var_present
, &offset
,
4540 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4543 && !POINTER_TYPE_P (ctype
)
4544 && multiplier_allowed_in_address_p
4546 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4549 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4550 cost
= difference_cost (data
,
4552 &symbol_present
, &var_present
, &offset
,
4554 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4558 cost
= force_var_cost (data
, cbase
, depends_on
);
4559 cost
= add_costs (cost
,
4560 difference_cost (data
,
4561 ubase
, build_int_cst (utype
, 0),
4562 &symbol_present
, &var_present
,
4563 &offset
, depends_on
));
4564 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4565 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4568 /* Set of invariants depended on by sub use has already been computed
4569 for the first use in the group. */
4573 if (depends_on
&& *depends_on
)
4574 bitmap_clear (*depends_on
);
4576 else if (inv_expr_id
)
4579 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4580 /* Clear depends on. */
4581 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4582 bitmap_clear (*depends_on
);
4585 /* If we are after the increment, the value of the candidate is higher by
4587 if (stmt_is_after_inc
)
4588 offset
-= ratio
* cstepi
;
4590 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4591 (symbol/var1/const parts may be omitted). If we are looking for an
4592 address, find the cost of addressing this. */
4594 return add_costs (cost
,
4595 get_address_cost (symbol_present
, var_present
,
4596 offset
, ratio
, cstepi
,
4598 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4599 speed
, stmt_is_after_inc
,
4602 /* Otherwise estimate the costs for computing the expression. */
4603 if (!symbol_present
&& !var_present
&& !offset
)
4606 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4610 /* Symbol + offset should be compile-time computable so consider that they
4611 are added once to the variable, if present. */
4612 if (var_present
&& (symbol_present
|| offset
))
4613 cost
.cost
+= adjust_setup_cost (data
,
4614 add_cost (speed
, TYPE_MODE (ctype
)));
4616 /* Having offset does not affect runtime cost in case it is added to
4617 symbol, but it increases complexity. */
4621 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4623 aratio
= ratio
> 0 ? ratio
: -ratio
;
4625 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4630 *can_autoinc
= false;
4633 /* Just get the expression, expand it and measure the cost. */
4634 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4637 return infinite_cost
;
4640 comp
= build_simple_mem_ref (comp
);
4642 return new_cost (computation_cost (comp
, speed
), 0);
4646 /* Determines the cost of the computation by that USE is expressed
4647 from induction variable CAND. If ADDRESS_P is true, we just need
4648 to create an address from it, otherwise we want to get it into
4649 register. A set of invariants we depend on is stored in
4650 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4651 autoinc addressing is likely. */
4654 get_computation_cost (struct ivopts_data
*data
,
4655 struct iv_use
*use
, struct iv_cand
*cand
,
4656 bool address_p
, bitmap
*depends_on
,
4657 bool *can_autoinc
, int *inv_expr_id
)
4659 return get_computation_cost_at (data
,
4660 use
, cand
, address_p
, depends_on
, use
->stmt
,
4661 can_autoinc
, inv_expr_id
);
4664 /* Determines cost of basing replacement of USE on CAND in a generic
4668 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4669 struct iv_use
*use
, struct iv_cand
*cand
)
4673 int inv_expr_id
= -1;
4675 /* The simple case first -- if we need to express value of the preserved
4676 original biv, the cost is 0. This also prevents us from counting the
4677 cost of increment twice -- once at this use and once in the cost of
4679 if (cand
->pos
== IP_ORIGINAL
4680 && cand
->incremented_at
== use
->stmt
)
4682 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4687 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4688 NULL
, &inv_expr_id
);
4690 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4693 return !infinite_cost_p (cost
);
4696 /* Determines cost of basing replacement of USE on CAND in an address. */
4699 determine_use_iv_cost_address (struct ivopts_data
*data
,
4700 struct iv_use
*use
, struct iv_cand
*cand
)
4704 int inv_expr_id
= -1;
4705 struct iv_use
*sub_use
;
4707 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4708 &can_autoinc
, &inv_expr_id
);
4710 if (cand
->ainc_use
== use
)
4713 cost
.cost
-= cand
->cost_step
;
4714 /* If we generated the candidate solely for exploiting autoincrement
4715 opportunities, and it turns out it can't be used, set the cost to
4716 infinity to make sure we ignore it. */
4717 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4718 cost
= infinite_cost
;
4720 for (sub_use
= use
->next
;
4721 sub_use
&& !infinite_cost_p (cost
);
4722 sub_use
= sub_use
->next
)
4724 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4725 &can_autoinc
, &inv_expr_id
);
4726 cost
= add_costs (cost
, sub_cost
);
4729 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4732 return !infinite_cost_p (cost
);
4735 /* Computes value of candidate CAND at position AT in iteration NITER, and
4736 stores it to VAL. */
4739 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4742 aff_tree step
, delta
, nit
;
4743 struct iv
*iv
= cand
->iv
;
4744 tree type
= TREE_TYPE (iv
->base
);
4745 tree steptype
= type
;
4746 if (POINTER_TYPE_P (type
))
4747 steptype
= sizetype
;
4748 steptype
= unsigned_type_for (type
);
4750 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4751 aff_combination_convert (&step
, steptype
);
4752 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4753 aff_combination_convert (&nit
, steptype
);
4754 aff_combination_mult (&nit
, &step
, &delta
);
4755 if (stmt_after_increment (loop
, cand
, at
))
4756 aff_combination_add (&delta
, &step
);
4758 tree_to_aff_combination (iv
->base
, type
, val
);
4759 if (!POINTER_TYPE_P (type
))
4760 aff_combination_convert (val
, steptype
);
4761 aff_combination_add (val
, &delta
);
4764 /* Returns period of induction variable iv. */
4767 iv_period (struct iv
*iv
)
4769 tree step
= iv
->step
, period
, type
;
4772 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4774 type
= unsigned_type_for (TREE_TYPE (step
));
4775 /* Period of the iv is lcm (step, type_range)/step -1,
4776 i.e., N*type_range/step - 1. Since type range is power
4777 of two, N == (step >> num_of_ending_zeros_binary (step),
4778 so the final result is
4780 (type_range >> num_of_ending_zeros_binary (step)) - 1
4783 pow2div
= num_ending_zeros (step
);
4785 period
= build_low_bits_mask (type
,
4786 (TYPE_PRECISION (type
)
4787 - tree_to_uhwi (pow2div
)));
4792 /* Returns the comparison operator used when eliminating the iv USE. */
4794 static enum tree_code
4795 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4797 struct loop
*loop
= data
->current_loop
;
4801 ex_bb
= gimple_bb (use
->stmt
);
4802 exit
= EDGE_SUCC (ex_bb
, 0);
4803 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4804 exit
= EDGE_SUCC (ex_bb
, 1);
4806 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4809 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4810 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4811 calculation is performed in non-wrapping type.
4813 TODO: More generally, we could test for the situation that
4814 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4815 This would require knowing the sign of OFFSET. */
4818 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4820 enum tree_code code
;
4822 aff_tree aff_e1
, aff_e2
, aff_offset
;
4824 if (!nowrap_type_p (TREE_TYPE (base
)))
4827 base
= expand_simple_operations (base
);
4829 if (TREE_CODE (base
) == SSA_NAME
)
4831 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4833 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4836 code
= gimple_assign_rhs_code (stmt
);
4837 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4840 e1
= gimple_assign_rhs1 (stmt
);
4841 e2
= gimple_assign_rhs2 (stmt
);
4845 code
= TREE_CODE (base
);
4846 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4848 e1
= TREE_OPERAND (base
, 0);
4849 e2
= TREE_OPERAND (base
, 1);
4852 /* Use affine expansion as deeper inspection to prove the equality. */
4853 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4854 &aff_e2
, &data
->name_expansion_cache
);
4855 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4856 &aff_offset
, &data
->name_expansion_cache
);
4857 aff_combination_scale (&aff_offset
, -1);
4861 aff_combination_add (&aff_e2
, &aff_offset
);
4862 if (aff_combination_zero_p (&aff_e2
))
4865 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4866 &aff_e1
, &data
->name_expansion_cache
);
4867 aff_combination_add (&aff_e1
, &aff_offset
);
4868 return aff_combination_zero_p (&aff_e1
);
4870 case POINTER_PLUS_EXPR
:
4871 aff_combination_add (&aff_e2
, &aff_offset
);
4872 return aff_combination_zero_p (&aff_e2
);
4879 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4880 comparison with CAND. NITER describes the number of iterations of
4881 the loops. If successful, the comparison in COMP_P is altered accordingly.
4883 We aim to handle the following situation:
4899 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4900 We aim to optimize this to
4908 while (p < p_0 - a + b);
4910 This preserves the correctness, since the pointer arithmetics does not
4911 overflow. More precisely:
4913 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4914 overflow in computing it or the values of p.
4915 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4916 overflow. To prove this, we use the fact that p_0 = base + a. */
4919 iv_elimination_compare_lt (struct ivopts_data
*data
,
4920 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4921 struct tree_niter_desc
*niter
)
4923 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4924 struct aff_tree nit
, tmpa
, tmpb
;
4925 enum tree_code comp
;
4928 /* We need to know that the candidate induction variable does not overflow.
4929 While more complex analysis may be used to prove this, for now just
4930 check that the variable appears in the original program and that it
4931 is computed in a type that guarantees no overflows. */
4932 cand_type
= TREE_TYPE (cand
->iv
->base
);
4933 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4936 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4937 the calculation of the BOUND could overflow, making the comparison
4939 if (!data
->loop_single_exit_p
)
4942 /* We need to be able to decide whether candidate is increasing or decreasing
4943 in order to choose the right comparison operator. */
4944 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4946 step
= int_cst_value (cand
->iv
->step
);
4948 /* Check that the number of iterations matches the expected pattern:
4949 a + 1 > b ? 0 : b - a - 1. */
4950 mbz
= niter
->may_be_zero
;
4951 if (TREE_CODE (mbz
) == GT_EXPR
)
4953 /* Handle a + 1 > b. */
4954 tree op0
= TREE_OPERAND (mbz
, 0);
4955 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4957 a
= TREE_OPERAND (op0
, 0);
4958 b
= TREE_OPERAND (mbz
, 1);
4963 else if (TREE_CODE (mbz
) == LT_EXPR
)
4965 tree op1
= TREE_OPERAND (mbz
, 1);
4967 /* Handle b < a + 1. */
4968 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4970 a
= TREE_OPERAND (op1
, 0);
4971 b
= TREE_OPERAND (mbz
, 0);
4979 /* Expected number of iterations is B - A - 1. Check that it matches
4980 the actual number, i.e., that B - A - NITER = 1. */
4981 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4982 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4983 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4984 aff_combination_scale (&nit
, -1);
4985 aff_combination_scale (&tmpa
, -1);
4986 aff_combination_add (&tmpb
, &tmpa
);
4987 aff_combination_add (&tmpb
, &nit
);
4988 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4991 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4993 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4995 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4996 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4999 /* Determine the new comparison operator. */
5000 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5001 if (*comp_p
== NE_EXPR
)
5003 else if (*comp_p
== EQ_EXPR
)
5004 *comp_p
= invert_tree_comparison (comp
, false);
5011 /* Check whether it is possible to express the condition in USE by comparison
5012 of candidate CAND. If so, store the value compared with to BOUND, and the
5013 comparison operator to COMP. */
5016 may_eliminate_iv (struct ivopts_data
*data
,
5017 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5018 enum tree_code
*comp
)
5023 struct loop
*loop
= data
->current_loop
;
5025 struct tree_niter_desc
*desc
= NULL
;
5027 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5030 /* For now works only for exits that dominate the loop latch.
5031 TODO: extend to other conditions inside loop body. */
5032 ex_bb
= gimple_bb (use
->stmt
);
5033 if (use
->stmt
!= last_stmt (ex_bb
)
5034 || gimple_code (use
->stmt
) != GIMPLE_COND
5035 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5038 exit
= EDGE_SUCC (ex_bb
, 0);
5039 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5040 exit
= EDGE_SUCC (ex_bb
, 1);
5041 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5044 desc
= niter_for_exit (data
, exit
);
5048 /* Determine whether we can use the variable to test the exit condition.
5049 This is the case iff the period of the induction variable is greater
5050 than the number of iterations for which the exit condition is true. */
5051 period
= iv_period (cand
->iv
);
5053 /* If the number of iterations is constant, compare against it directly. */
5054 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5056 /* See cand_value_at. */
5057 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5059 if (!tree_int_cst_lt (desc
->niter
, period
))
5064 if (tree_int_cst_lt (period
, desc
->niter
))
5069 /* If not, and if this is the only possible exit of the loop, see whether
5070 we can get a conservative estimate on the number of iterations of the
5071 entire loop and compare against that instead. */
5074 widest_int period_value
, max_niter
;
5076 max_niter
= desc
->max
;
5077 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5079 period_value
= wi::to_widest (period
);
5080 if (wi::gtu_p (max_niter
, period_value
))
5082 /* See if we can take advantage of inferred loop bound information. */
5083 if (data
->loop_single_exit_p
)
5085 if (!max_loop_iterations (loop
, &max_niter
))
5087 /* The loop bound is already adjusted by adding 1. */
5088 if (wi::gtu_p (max_niter
, period_value
))
5096 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5098 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5099 aff_combination_to_tree (&bnd
));
5100 *comp
= iv_elimination_compare (data
, use
);
5102 /* It is unlikely that computing the number of iterations using division
5103 would be more profitable than keeping the original induction variable. */
5104 if (expression_expensive_p (*bound
))
5107 /* Sometimes, it is possible to handle the situation that the number of
5108 iterations may be zero unless additional assumtions by using <
5109 instead of != in the exit condition.
5111 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5112 base the exit condition on it. However, that is often too
5114 if (!integer_zerop (desc
->may_be_zero
))
5115 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5120 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5121 be copied, if is is used in the loop body and DATA->body_includes_call. */
5124 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5126 tree sbound
= bound
;
5127 STRIP_NOPS (sbound
);
5129 if (TREE_CODE (sbound
) == SSA_NAME
5130 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5131 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5132 && data
->body_includes_call
)
5133 return COSTS_N_INSNS (1);
5138 /* Determines cost of basing replacement of USE on CAND in a condition. */
5141 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5142 struct iv_use
*use
, struct iv_cand
*cand
)
5144 tree bound
= NULL_TREE
;
5146 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5147 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5149 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5150 tree
*control_var
, *bound_cst
;
5151 enum tree_code comp
= ERROR_MARK
;
5153 /* Only consider real candidates. */
5156 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5161 /* Try iv elimination. */
5162 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5164 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5165 if (elim_cost
.cost
== 0)
5166 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5167 else if (TREE_CODE (bound
) == INTEGER_CST
)
5169 /* If we replace a loop condition 'i < n' with 'p < base + n',
5170 depends_on_elim will have 'base' and 'n' set, which implies
5171 that both 'base' and 'n' will be live during the loop. More likely,
5172 'base + n' will be loop invariant, resulting in only one live value
5173 during the loop. So in that case we clear depends_on_elim and set
5174 elim_inv_expr_id instead. */
5175 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5177 elim_inv_expr_id
= get_expr_id (data
, bound
);
5178 bitmap_clear (depends_on_elim
);
5180 /* The bound is a loop invariant, so it will be only computed
5182 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5185 elim_cost
= infinite_cost
;
5187 /* Try expressing the original giv. If it is compared with an invariant,
5188 note that we cannot get rid of it. */
5189 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5193 /* When the condition is a comparison of the candidate IV against
5194 zero, prefer this IV.
5196 TODO: The constant that we're subtracting from the cost should
5197 be target-dependent. This information should be added to the
5198 target costs for each backend. */
5199 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5200 && integer_zerop (*bound_cst
)
5201 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5202 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5203 elim_cost
.cost
-= 1;
5205 express_cost
= get_computation_cost (data
, use
, cand
, false,
5206 &depends_on_express
, NULL
,
5207 &express_inv_expr_id
);
5208 fd_ivopts_data
= data
;
5209 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5211 /* Count the cost of the original bound as well. */
5212 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5213 if (bound_cost
.cost
== 0)
5214 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5215 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5216 bound_cost
.cost
= 0;
5217 express_cost
.cost
+= bound_cost
.cost
;
5219 /* Choose the better approach, preferring the eliminated IV. */
5220 if (compare_costs (elim_cost
, express_cost
) <= 0)
5223 depends_on
= depends_on_elim
;
5224 depends_on_elim
= NULL
;
5225 inv_expr_id
= elim_inv_expr_id
;
5229 cost
= express_cost
;
5230 depends_on
= depends_on_express
;
5231 depends_on_express
= NULL
;
5234 inv_expr_id
= express_inv_expr_id
;
5237 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5239 if (depends_on_elim
)
5240 BITMAP_FREE (depends_on_elim
);
5241 if (depends_on_express
)
5242 BITMAP_FREE (depends_on_express
);
5244 return !infinite_cost_p (cost
);
5247 /* Determines cost of basing replacement of USE on CAND. Returns false
5248 if USE cannot be based on CAND. */
5251 determine_use_iv_cost (struct ivopts_data
*data
,
5252 struct iv_use
*use
, struct iv_cand
*cand
)
5256 case USE_NONLINEAR_EXPR
:
5257 return determine_use_iv_cost_generic (data
, use
, cand
);
5260 return determine_use_iv_cost_address (data
, use
, cand
);
5263 return determine_use_iv_cost_condition (data
, use
, cand
);
5270 /* Return true if get_computation_cost indicates that autoincrement is
5271 a possibility for the pair of USE and CAND, false otherwise. */
5274 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5275 struct iv_cand
*cand
)
5281 if (use
->type
!= USE_ADDRESS
)
5284 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5285 &can_autoinc
, NULL
);
5287 BITMAP_FREE (depends_on
);
5289 return !infinite_cost_p (cost
) && can_autoinc
;
5292 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5293 use that allows autoincrement, and set their AINC_USE if possible. */
5296 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5300 for (i
= 0; i
< n_iv_cands (data
); i
++)
5302 struct iv_cand
*cand
= iv_cand (data
, i
);
5303 struct iv_use
*closest_before
= NULL
;
5304 struct iv_use
*closest_after
= NULL
;
5305 if (cand
->pos
!= IP_ORIGINAL
)
5308 for (j
= 0; j
< n_iv_uses (data
); j
++)
5310 struct iv_use
*use
= iv_use (data
, j
);
5311 unsigned uid
= gimple_uid (use
->stmt
);
5313 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5316 if (uid
< gimple_uid (cand
->incremented_at
)
5317 && (closest_before
== NULL
5318 || uid
> gimple_uid (closest_before
->stmt
)))
5319 closest_before
= use
;
5321 if (uid
> gimple_uid (cand
->incremented_at
)
5322 && (closest_after
== NULL
5323 || uid
< gimple_uid (closest_after
->stmt
)))
5324 closest_after
= use
;
5327 if (closest_before
!= NULL
5328 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5329 cand
->ainc_use
= closest_before
;
5330 else if (closest_after
!= NULL
5331 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5332 cand
->ainc_use
= closest_after
;
5336 /* Finds the candidates for the induction variables. */
5339 find_iv_candidates (struct ivopts_data
*data
)
5341 /* Add commonly used ivs. */
5342 add_standard_iv_candidates (data
);
5344 /* Add old induction variables. */
5345 add_old_ivs_candidates (data
);
5347 /* Add induction variables derived from uses. */
5348 add_derived_ivs_candidates (data
);
5350 set_autoinc_for_original_candidates (data
);
5352 /* Record the important candidates. */
5353 record_important_candidates (data
);
5356 /* Determines costs of basing the use of the iv on an iv candidate. */
5359 determine_use_iv_costs (struct ivopts_data
*data
)
5363 struct iv_cand
*cand
;
5364 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5366 alloc_use_cost_map (data
);
5368 for (i
= 0; i
< n_iv_uses (data
); i
++)
5370 use
= iv_use (data
, i
);
5372 if (data
->consider_all_candidates
)
5374 for (j
= 0; j
< n_iv_cands (data
); j
++)
5376 cand
= iv_cand (data
, j
);
5377 determine_use_iv_cost (data
, use
, cand
);
5384 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5386 cand
= iv_cand (data
, j
);
5387 if (!determine_use_iv_cost (data
, use
, cand
))
5388 bitmap_set_bit (to_clear
, j
);
5391 /* Remove the candidates for that the cost is infinite from
5392 the list of related candidates. */
5393 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5394 bitmap_clear (to_clear
);
5398 BITMAP_FREE (to_clear
);
5400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5402 fprintf (dump_file
, "Use-candidate costs:\n");
5404 for (i
= 0; i
< n_iv_uses (data
); i
++)
5406 use
= iv_use (data
, i
);
5408 fprintf (dump_file
, "Use %d:\n", i
);
5409 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5410 for (j
= 0; j
< use
->n_map_members
; j
++)
5412 if (!use
->cost_map
[j
].cand
5413 || infinite_cost_p (use
->cost_map
[j
].cost
))
5416 fprintf (dump_file
, " %d\t%d\t%d\t",
5417 use
->cost_map
[j
].cand
->id
,
5418 use
->cost_map
[j
].cost
.cost
,
5419 use
->cost_map
[j
].cost
.complexity
);
5420 if (use
->cost_map
[j
].depends_on
)
5421 bitmap_print (dump_file
,
5422 use
->cost_map
[j
].depends_on
, "","");
5423 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5424 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5425 fprintf (dump_file
, "\n");
5428 fprintf (dump_file
, "\n");
5430 fprintf (dump_file
, "\n");
5434 /* Determines cost of the candidate CAND. */
5437 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5439 comp_cost cost_base
;
5440 unsigned cost
, cost_step
;
5449 /* There are two costs associated with the candidate -- its increment
5450 and its initialization. The second is almost negligible for any loop
5451 that rolls enough, so we take it just very little into account. */
5453 base
= cand
->iv
->base
;
5454 cost_base
= force_var_cost (data
, base
, NULL
);
5455 /* It will be exceptional that the iv register happens to be initialized with
5456 the proper value at no cost. In general, there will at least be a regcopy
5458 if (cost_base
.cost
== 0)
5459 cost_base
.cost
= COSTS_N_INSNS (1);
5460 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5462 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5464 /* Prefer the original ivs unless we may gain something by replacing it.
5465 The reason is to make debugging simpler; so this is not relevant for
5466 artificial ivs created by other optimization passes. */
5467 if (cand
->pos
!= IP_ORIGINAL
5468 || !SSA_NAME_VAR (cand
->var_before
)
5469 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5472 /* Prefer not to insert statements into latch unless there are some
5473 already (so that we do not create unnecessary jumps). */
5474 if (cand
->pos
== IP_END
5475 && empty_block_p (ip_end_pos (data
->current_loop
)))
5479 cand
->cost_step
= cost_step
;
5482 /* Determines costs of computation of the candidates. */
5485 determine_iv_costs (struct ivopts_data
*data
)
5489 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5491 fprintf (dump_file
, "Candidate costs:\n");
5492 fprintf (dump_file
, " cand\tcost\n");
5495 for (i
= 0; i
< n_iv_cands (data
); i
++)
5497 struct iv_cand
*cand
= iv_cand (data
, i
);
5499 determine_iv_cost (data
, cand
);
5501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5502 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5506 fprintf (dump_file
, "\n");
5509 /* Calculates cost for having SIZE induction variables. */
5512 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5514 /* We add size to the cost, so that we prefer eliminating ivs
5516 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5517 data
->body_includes_call
);
5520 /* For each size of the induction variable set determine the penalty. */
5523 determine_set_costs (struct ivopts_data
*data
)
5529 struct loop
*loop
= data
->current_loop
;
5532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5534 fprintf (dump_file
, "Global costs:\n");
5535 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5536 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5537 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5538 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5542 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5545 op
= PHI_RESULT (phi
);
5547 if (virtual_operand_p (op
))
5550 if (get_iv (data
, op
))
5556 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5558 struct version_info
*info
= ver_info (data
, j
);
5560 if (info
->inv_id
&& info
->has_nonlin_use
)
5564 data
->regs_used
= n
;
5565 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5566 fprintf (dump_file
, " regs_used %d\n", n
);
5568 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5570 fprintf (dump_file
, " cost for size:\n");
5571 fprintf (dump_file
, " ivs\tcost\n");
5572 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5573 fprintf (dump_file
, " %d\t%d\n", j
,
5574 ivopts_global_cost_for_size (data
, j
));
5575 fprintf (dump_file
, "\n");
5579 /* Returns true if A is a cheaper cost pair than B. */
5582 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5592 cmp
= compare_costs (a
->cost
, b
->cost
);
5599 /* In case the costs are the same, prefer the cheaper candidate. */
5600 if (a
->cand
->cost
< b
->cand
->cost
)
5607 /* Returns candidate by that USE is expressed in IVS. */
5609 static struct cost_pair
*
5610 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5612 return ivs
->cand_for_use
[use
->id
];
5615 /* Computes the cost field of IVS structure. */
5618 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5620 comp_cost cost
= ivs
->cand_use_cost
;
5622 cost
.cost
+= ivs
->cand_cost
;
5624 cost
.cost
+= ivopts_global_cost_for_size (data
,
5625 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5630 /* Remove invariants in set INVS to set IVS. */
5633 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5641 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5643 ivs
->n_invariant_uses
[iid
]--;
5644 if (ivs
->n_invariant_uses
[iid
] == 0)
5649 /* Set USE not to be expressed by any candidate in IVS. */
5652 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5655 unsigned uid
= use
->id
, cid
;
5656 struct cost_pair
*cp
;
5658 cp
= ivs
->cand_for_use
[uid
];
5664 ivs
->cand_for_use
[uid
] = NULL
;
5665 ivs
->n_cand_uses
[cid
]--;
5667 if (ivs
->n_cand_uses
[cid
] == 0)
5669 bitmap_clear_bit (ivs
->cands
, cid
);
5670 /* Do not count the pseudocandidates. */
5674 ivs
->cand_cost
-= cp
->cand
->cost
;
5676 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5679 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5681 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5683 if (cp
->inv_expr_id
!= -1)
5685 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5686 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5687 ivs
->num_used_inv_expr
--;
5689 iv_ca_recount_cost (data
, ivs
);
5692 /* Add invariants in set INVS to set IVS. */
5695 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5703 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5705 ivs
->n_invariant_uses
[iid
]++;
5706 if (ivs
->n_invariant_uses
[iid
] == 1)
5711 /* Set cost pair for USE in set IVS to CP. */
5714 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5715 struct iv_use
*use
, struct cost_pair
*cp
)
5717 unsigned uid
= use
->id
, cid
;
5719 if (ivs
->cand_for_use
[uid
] == cp
)
5722 if (ivs
->cand_for_use
[uid
])
5723 iv_ca_set_no_cp (data
, ivs
, use
);
5730 ivs
->cand_for_use
[uid
] = cp
;
5731 ivs
->n_cand_uses
[cid
]++;
5732 if (ivs
->n_cand_uses
[cid
] == 1)
5734 bitmap_set_bit (ivs
->cands
, cid
);
5735 /* Do not count the pseudocandidates. */
5739 ivs
->cand_cost
+= cp
->cand
->cost
;
5741 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5744 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5745 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5747 if (cp
->inv_expr_id
!= -1)
5749 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5750 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5751 ivs
->num_used_inv_expr
++;
5753 iv_ca_recount_cost (data
, ivs
);
5757 /* Extend set IVS by expressing USE by some of the candidates in it
5758 if possible. Consider all important candidates if candidates in
5759 set IVS don't give any result. */
5762 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5765 struct cost_pair
*best_cp
= NULL
, *cp
;
5768 struct iv_cand
*cand
;
5770 gcc_assert (ivs
->upto
>= use
->id
);
5774 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5776 cand
= iv_cand (data
, i
);
5777 cp
= get_use_iv_cost (data
, use
, cand
);
5778 if (cheaper_cost_pair (cp
, best_cp
))
5782 if (best_cp
== NULL
)
5784 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5786 cand
= iv_cand (data
, i
);
5787 cp
= get_use_iv_cost (data
, use
, cand
);
5788 if (cheaper_cost_pair (cp
, best_cp
))
5793 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5796 /* Get cost for assignment IVS. */
5799 iv_ca_cost (struct iv_ca
*ivs
)
5801 /* This was a conditional expression but it triggered a bug in
5804 return infinite_cost
;
5809 /* Returns true if all dependences of CP are among invariants in IVS. */
5812 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5817 if (!cp
->depends_on
)
5820 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5822 if (ivs
->n_invariant_uses
[i
] == 0)
5829 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5830 it before NEXT_CHANGE. */
5832 static struct iv_ca_delta
*
5833 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5834 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5836 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5839 change
->old_cp
= old_cp
;
5840 change
->new_cp
= new_cp
;
5841 change
->next_change
= next_change
;
5846 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5849 static struct iv_ca_delta
*
5850 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5852 struct iv_ca_delta
*last
;
5860 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5862 last
->next_change
= l2
;
5867 /* Reverse the list of changes DELTA, forming the inverse to it. */
5869 static struct iv_ca_delta
*
5870 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5872 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5874 for (act
= delta
; act
; act
= next
)
5876 next
= act
->next_change
;
5877 act
->next_change
= prev
;
5880 std::swap (act
->old_cp
, act
->new_cp
);
5886 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5887 reverted instead. */
5890 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5891 struct iv_ca_delta
*delta
, bool forward
)
5893 struct cost_pair
*from
, *to
;
5894 struct iv_ca_delta
*act
;
5897 delta
= iv_ca_delta_reverse (delta
);
5899 for (act
= delta
; act
; act
= act
->next_change
)
5903 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5904 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5908 iv_ca_delta_reverse (delta
);
5911 /* Returns true if CAND is used in IVS. */
5914 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5916 return ivs
->n_cand_uses
[cand
->id
] > 0;
5919 /* Returns number of induction variable candidates in the set IVS. */
5922 iv_ca_n_cands (struct iv_ca
*ivs
)
5924 return ivs
->n_cands
;
5927 /* Free the list of changes DELTA. */
5930 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5932 struct iv_ca_delta
*act
, *next
;
5934 for (act
= *delta
; act
; act
= next
)
5936 next
= act
->next_change
;
5943 /* Allocates new iv candidates assignment. */
5945 static struct iv_ca
*
5946 iv_ca_new (struct ivopts_data
*data
)
5948 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5952 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5953 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5954 nw
->cands
= BITMAP_ALLOC (NULL
);
5957 nw
->cand_use_cost
= no_cost
;
5959 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5961 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5962 nw
->num_used_inv_expr
= 0;
5967 /* Free memory occupied by the set IVS. */
5970 iv_ca_free (struct iv_ca
**ivs
)
5972 free ((*ivs
)->cand_for_use
);
5973 free ((*ivs
)->n_cand_uses
);
5974 BITMAP_FREE ((*ivs
)->cands
);
5975 free ((*ivs
)->n_invariant_uses
);
5976 free ((*ivs
)->used_inv_expr
);
5981 /* Dumps IVS to FILE. */
5984 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5986 const char *pref
= " invariants ";
5988 comp_cost cost
= iv_ca_cost (ivs
);
5990 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5991 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5992 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5993 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5995 for (i
= 0; i
< ivs
->upto
; i
++)
5997 struct iv_use
*use
= iv_use (data
, i
);
5998 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6000 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6001 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6003 fprintf (file
, " use:%d --> ??\n", use
->id
);
6006 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6007 if (ivs
->n_invariant_uses
[i
])
6009 fprintf (file
, "%s%d", pref
, i
);
6012 fprintf (file
, "\n\n");
6015 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6016 new set, and store differences in DELTA. Number of induction variables
6017 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6018 the function will try to find a solution with mimimal iv candidates. */
6021 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6022 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6023 unsigned *n_ivs
, bool min_ncand
)
6028 struct cost_pair
*old_cp
, *new_cp
;
6031 for (i
= 0; i
< ivs
->upto
; i
++)
6033 use
= iv_use (data
, i
);
6034 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6037 && old_cp
->cand
== cand
)
6040 new_cp
= get_use_iv_cost (data
, use
, cand
);
6044 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6047 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6050 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6053 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6054 cost
= iv_ca_cost (ivs
);
6056 *n_ivs
= iv_ca_n_cands (ivs
);
6057 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6062 /* Try narrowing set IVS by removing CAND. Return the cost of
6063 the new set and store the differences in DELTA. START is
6064 the candidate with which we start narrowing. */
6067 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6068 struct iv_cand
*cand
, struct iv_cand
*start
,
6069 struct iv_ca_delta
**delta
)
6073 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6075 struct iv_cand
*cnd
;
6076 comp_cost cost
, best_cost
, acost
;
6079 for (i
= 0; i
< n_iv_uses (data
); i
++)
6081 use
= iv_use (data
, i
);
6083 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6084 if (old_cp
->cand
!= cand
)
6087 best_cost
= iv_ca_cost (ivs
);
6088 /* Start narrowing with START. */
6089 new_cp
= get_use_iv_cost (data
, use
, start
);
6091 if (data
->consider_all_candidates
)
6093 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6095 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6098 cnd
= iv_cand (data
, ci
);
6100 cp
= get_use_iv_cost (data
, use
, cnd
);
6104 iv_ca_set_cp (data
, ivs
, use
, cp
);
6105 acost
= iv_ca_cost (ivs
);
6107 if (compare_costs (acost
, best_cost
) < 0)
6116 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6118 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6121 cnd
= iv_cand (data
, ci
);
6123 cp
= get_use_iv_cost (data
, use
, cnd
);
6127 iv_ca_set_cp (data
, ivs
, use
, cp
);
6128 acost
= iv_ca_cost (ivs
);
6130 if (compare_costs (acost
, best_cost
) < 0)
6137 /* Restore to old cp for use. */
6138 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6142 iv_ca_delta_free (delta
);
6143 return infinite_cost
;
6146 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6149 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6150 cost
= iv_ca_cost (ivs
);
6151 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6156 /* Try optimizing the set of candidates IVS by removing candidates different
6157 from to EXCEPT_CAND from it. Return cost of the new set, and store
6158 differences in DELTA. */
6161 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6162 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6165 struct iv_ca_delta
*act_delta
, *best_delta
;
6167 comp_cost best_cost
, acost
;
6168 struct iv_cand
*cand
;
6171 best_cost
= iv_ca_cost (ivs
);
6173 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6175 cand
= iv_cand (data
, i
);
6177 if (cand
== except_cand
)
6180 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6182 if (compare_costs (acost
, best_cost
) < 0)
6185 iv_ca_delta_free (&best_delta
);
6186 best_delta
= act_delta
;
6189 iv_ca_delta_free (&act_delta
);
6198 /* Recurse to possibly remove other unnecessary ivs. */
6199 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6200 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6201 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6202 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6206 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6207 cheaper local cost for USE than BEST_CP. Return pointer to
6208 the corresponding cost_pair, otherwise just return BEST_CP. */
6210 static struct cost_pair
*
6211 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6212 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6213 struct cost_pair
*best_cp
)
6215 struct iv_cand
*cand
;
6216 struct cost_pair
*cp
;
6218 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6219 if (cand_idx
== old_cand
->id
)
6222 cand
= iv_cand (data
, cand_idx
);
6223 cp
= get_use_iv_cost (data
, use
, cand
);
6224 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6230 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6231 which are used by more than one iv uses. For each of those candidates,
6232 this function tries to represent iv uses under that candidate using
6233 other ones with lower local cost, then tries to prune the new set.
6234 If the new set has lower cost, It returns the new cost after recording
6235 candidate replacement in list DELTA. */
6238 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6239 struct iv_ca_delta
**delta
)
6241 bitmap_iterator bi
, bj
;
6242 unsigned int i
, j
, k
;
6244 struct iv_cand
*cand
;
6245 comp_cost orig_cost
, acost
;
6246 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6247 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6250 orig_cost
= iv_ca_cost (ivs
);
6252 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6254 if (ivs
->n_cand_uses
[i
] == 1
6255 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6258 cand
= iv_cand (data
, i
);
6261 /* Represent uses under current candidate using other ones with
6262 lower local cost. */
6263 for (j
= 0; j
< ivs
->upto
; j
++)
6265 use
= iv_use (data
, j
);
6266 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6268 if (old_cp
->cand
!= cand
)
6272 if (data
->consider_all_candidates
)
6273 for (k
= 0; k
< n_iv_cands (data
); k
++)
6274 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6275 old_cp
->cand
, best_cp
);
6277 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6278 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6279 old_cp
->cand
, best_cp
);
6281 if (best_cp
== old_cp
)
6284 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6286 /* No need for further prune. */
6290 /* Prune the new candidate set. */
6291 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6292 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6293 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6294 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6296 if (compare_costs (acost
, orig_cost
) < 0)
6302 iv_ca_delta_free (&act_delta
);
6308 /* Tries to extend the sets IVS in the best possible way in order
6309 to express the USE. If ORIGINALP is true, prefer candidates from
6310 the original set of IVs, otherwise favor important candidates not
6311 based on any memory object. */
6314 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6315 struct iv_use
*use
, bool originalp
)
6317 comp_cost best_cost
, act_cost
;
6320 struct iv_cand
*cand
;
6321 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6322 struct cost_pair
*cp
;
6324 iv_ca_add_use (data
, ivs
, use
);
6325 best_cost
= iv_ca_cost (ivs
);
6326 cp
= iv_ca_cand_for_use (ivs
, use
);
6329 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6330 iv_ca_set_no_cp (data
, ivs
, use
);
6333 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6334 first try important candidates not based on any memory object. Only if
6335 this fails, try the specific ones. Rationale -- in loops with many
6336 variables the best choice often is to use just one generic biv. If we
6337 added here many ivs specific to the uses, the optimization algorithm later
6338 would be likely to get stuck in a local minimum, thus causing us to create
6339 too many ivs. The approach from few ivs to more seems more likely to be
6340 successful -- starting from few ivs, replacing an expensive use by a
6341 specific iv should always be a win. */
6342 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6344 cand
= iv_cand (data
, i
);
6346 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6349 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6352 if (iv_ca_cand_used_p (ivs
, cand
))
6355 cp
= get_use_iv_cost (data
, use
, cand
);
6359 iv_ca_set_cp (data
, ivs
, use
, cp
);
6360 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6362 iv_ca_set_no_cp (data
, ivs
, use
);
6363 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6365 if (compare_costs (act_cost
, best_cost
) < 0)
6367 best_cost
= act_cost
;
6369 iv_ca_delta_free (&best_delta
);
6370 best_delta
= act_delta
;
6373 iv_ca_delta_free (&act_delta
);
6376 if (infinite_cost_p (best_cost
))
6378 for (i
= 0; i
< use
->n_map_members
; i
++)
6380 cp
= use
->cost_map
+ i
;
6385 /* Already tried this. */
6386 if (cand
->important
)
6388 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6390 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6394 if (iv_ca_cand_used_p (ivs
, cand
))
6398 iv_ca_set_cp (data
, ivs
, use
, cp
);
6399 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6400 iv_ca_set_no_cp (data
, ivs
, use
);
6401 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6404 if (compare_costs (act_cost
, best_cost
) < 0)
6406 best_cost
= act_cost
;
6409 iv_ca_delta_free (&best_delta
);
6410 best_delta
= act_delta
;
6413 iv_ca_delta_free (&act_delta
);
6417 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6418 iv_ca_delta_free (&best_delta
);
6420 return !infinite_cost_p (best_cost
);
6423 /* Finds an initial assignment of candidates to uses. */
6425 static struct iv_ca
*
6426 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6428 struct iv_ca
*ivs
= iv_ca_new (data
);
6431 for (i
= 0; i
< n_iv_uses (data
); i
++)
6432 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6441 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6442 points to a bool variable, this function tries to break local
6443 optimal fixed-point by replacing candidates in IVS if it's true. */
6446 try_improve_iv_set (struct ivopts_data
*data
,
6447 struct iv_ca
*ivs
, bool *try_replace_p
)
6450 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6451 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6452 struct iv_cand
*cand
;
6454 /* Try extending the set of induction variables by one. */
6455 for (i
= 0; i
< n_iv_cands (data
); i
++)
6457 cand
= iv_cand (data
, i
);
6459 if (iv_ca_cand_used_p (ivs
, cand
))
6462 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6466 /* If we successfully added the candidate and the set is small enough,
6467 try optimizing it by removing other candidates. */
6468 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6470 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6471 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6472 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6473 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6476 if (compare_costs (acost
, best_cost
) < 0)
6479 iv_ca_delta_free (&best_delta
);
6480 best_delta
= act_delta
;
6483 iv_ca_delta_free (&act_delta
);
6488 /* Try removing the candidates from the set instead. */
6489 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6491 if (!best_delta
&& *try_replace_p
)
6493 *try_replace_p
= false;
6494 /* So far candidate selecting algorithm tends to choose fewer IVs
6495 so that it can handle cases in which loops have many variables
6496 but the best choice is often to use only one general biv. One
6497 weakness is it can't handle opposite cases, in which different
6498 candidates should be chosen with respect to each use. To solve
6499 the problem, we replace candidates in a manner described by the
6500 comments of iv_ca_replace, thus give general algorithm a chance
6501 to break local optimal fixed-point in these cases. */
6502 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6509 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6510 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6511 iv_ca_delta_free (&best_delta
);
6515 /* Attempts to find the optimal set of induction variables. We do simple
6516 greedy heuristic -- we try to replace at most one candidate in the selected
6517 solution and remove the unused ivs while this improves the cost. */
6519 static struct iv_ca
*
6520 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6523 bool try_replace_p
= true;
6525 /* Get the initial solution. */
6526 set
= get_initial_solution (data
, originalp
);
6529 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6530 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6534 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6536 fprintf (dump_file
, "Initial set of candidates:\n");
6537 iv_ca_dump (data
, dump_file
, set
);
6540 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6544 fprintf (dump_file
, "Improved to:\n");
6545 iv_ca_dump (data
, dump_file
, set
);
6552 static struct iv_ca
*
6553 find_optimal_iv_set (struct ivopts_data
*data
)
6556 struct iv_ca
*set
, *origset
;
6558 comp_cost cost
, origcost
;
6560 /* Determine the cost based on a strategy that starts with original IVs,
6561 and try again using a strategy that prefers candidates not based
6563 origset
= find_optimal_iv_set_1 (data
, true);
6564 set
= find_optimal_iv_set_1 (data
, false);
6566 if (!origset
&& !set
)
6569 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6570 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6572 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6574 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6575 origcost
.cost
, origcost
.complexity
);
6576 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6577 cost
.cost
, cost
.complexity
);
6580 /* Choose the one with the best cost. */
6581 if (compare_costs (origcost
, cost
) <= 0)
6588 iv_ca_free (&origset
);
6590 for (i
= 0; i
< n_iv_uses (data
); i
++)
6592 use
= iv_use (data
, i
);
6593 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6599 /* Creates a new induction variable corresponding to CAND. */
6602 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6604 gimple_stmt_iterator incr_pos
;
6614 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6618 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6626 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6630 /* Mark that the iv is preserved. */
6631 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6632 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6634 /* Rewrite the increment so that it uses var_before directly. */
6635 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6639 gimple_add_tmp_var (cand
->var_before
);
6641 base
= unshare_expr (cand
->iv
->base
);
6643 create_iv (base
, unshare_expr (cand
->iv
->step
),
6644 cand
->var_before
, data
->current_loop
,
6645 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6648 /* Creates new induction variables described in SET. */
6651 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6654 struct iv_cand
*cand
;
6657 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6659 cand
= iv_cand (data
, i
);
6660 create_new_iv (data
, cand
);
6663 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6665 fprintf (dump_file
, "Selected IV set for loop %d",
6666 data
->current_loop
->num
);
6667 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6668 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6669 LOCATION_LINE (data
->loop_loc
));
6670 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6671 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6673 cand
= iv_cand (data
, i
);
6674 dump_cand (dump_file
, cand
);
6676 fprintf (dump_file
, "\n");
6680 /* Rewrites USE (definition of iv used in a nonlinear expression)
6681 using candidate CAND. */
6684 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6685 struct iv_use
*use
, struct iv_cand
*cand
)
6690 gimple_stmt_iterator bsi
;
6692 /* An important special case -- if we are asked to express value of
6693 the original iv by itself, just exit; there is no need to
6694 introduce a new computation (that might also need casting the
6695 variable to unsigned and back). */
6696 if (cand
->pos
== IP_ORIGINAL
6697 && cand
->incremented_at
== use
->stmt
)
6699 enum tree_code stmt_code
;
6701 gcc_assert (is_gimple_assign (use
->stmt
));
6702 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6704 /* Check whether we may leave the computation unchanged.
6705 This is the case only if it does not rely on other
6706 computations in the loop -- otherwise, the computation
6707 we rely upon may be removed in remove_unused_ivs,
6708 thus leading to ICE. */
6709 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6710 if (stmt_code
== PLUS_EXPR
6711 || stmt_code
== MINUS_EXPR
6712 || stmt_code
== POINTER_PLUS_EXPR
)
6714 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6715 op
= gimple_assign_rhs2 (use
->stmt
);
6716 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6717 op
= gimple_assign_rhs1 (use
->stmt
);
6724 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6728 comp
= get_computation (data
->current_loop
, use
, cand
);
6729 gcc_assert (comp
!= NULL_TREE
);
6731 switch (gimple_code (use
->stmt
))
6734 tgt
= PHI_RESULT (use
->stmt
);
6736 /* If we should keep the biv, do not replace it. */
6737 if (name_info (data
, tgt
)->preserve_biv
)
6740 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6744 tgt
= gimple_assign_lhs (use
->stmt
);
6745 bsi
= gsi_for_stmt (use
->stmt
);
6752 if (!valid_gimple_rhs_p (comp
)
6753 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6754 /* We can't allow re-allocating the stmt as it might be pointed
6756 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6757 >= gimple_num_ops (gsi_stmt (bsi
)))))
6759 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6760 true, GSI_SAME_STMT
);
6761 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6763 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6764 /* As this isn't a plain copy we have to reset alignment
6766 if (SSA_NAME_PTR_INFO (comp
))
6767 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6771 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6773 ass
= gimple_build_assign (tgt
, comp
);
6774 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6776 bsi
= gsi_for_stmt (use
->stmt
);
6777 remove_phi_node (&bsi
, false);
6781 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6782 use
->stmt
= gsi_stmt (bsi
);
6786 /* Performs a peephole optimization to reorder the iv update statement with
6787 a mem ref to enable instruction combining in later phases. The mem ref uses
6788 the iv value before the update, so the reordering transformation requires
6789 adjustment of the offset. CAND is the selected IV_CAND.
6793 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6801 directly propagating t over to (1) will introduce overlapping live range
6802 thus increase register pressure. This peephole transform it into:
6806 t = MEM_REF (base, iv2, 8, 8);
6813 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6816 gimple iv_update
, stmt
;
6818 gimple_stmt_iterator gsi
, gsi_iv
;
6820 if (cand
->pos
!= IP_NORMAL
)
6823 var_after
= cand
->var_after
;
6824 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6826 bb
= gimple_bb (iv_update
);
6827 gsi
= gsi_last_nondebug_bb (bb
);
6828 stmt
= gsi_stmt (gsi
);
6830 /* Only handle conditional statement for now. */
6831 if (gimple_code (stmt
) != GIMPLE_COND
)
6834 gsi_prev_nondebug (&gsi
);
6835 stmt
= gsi_stmt (gsi
);
6836 if (stmt
!= iv_update
)
6839 gsi_prev_nondebug (&gsi
);
6840 if (gsi_end_p (gsi
))
6843 stmt
= gsi_stmt (gsi
);
6844 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6847 if (stmt
!= use
->stmt
)
6850 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6855 fprintf (dump_file
, "Reordering \n");
6856 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6857 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6858 fprintf (dump_file
, "\n");
6861 gsi
= gsi_for_stmt (use
->stmt
);
6862 gsi_iv
= gsi_for_stmt (iv_update
);
6863 gsi_move_before (&gsi_iv
, &gsi
);
6865 cand
->pos
= IP_BEFORE_USE
;
6866 cand
->incremented_at
= use
->stmt
;
6869 /* Rewrites USE (address that is an iv) using candidate CAND. */
6872 rewrite_use_address_1 (struct ivopts_data
*data
,
6873 struct iv_use
*use
, struct iv_cand
*cand
)
6876 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6877 tree base_hint
= NULL_TREE
;
6881 adjust_iv_update_pos (cand
, use
);
6882 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6884 unshare_aff_combination (&aff
);
6886 /* To avoid undefined overflow problems, all IV candidates use unsigned
6887 integer types. The drawback is that this makes it impossible for
6888 create_mem_ref to distinguish an IV that is based on a memory object
6889 from one that represents simply an offset.
6891 To work around this problem, we pass a hint to create_mem_ref that
6892 indicates which variable (if any) in aff is an IV based on a memory
6893 object. Note that we only consider the candidate. If this is not
6894 based on an object, the base of the reference is in some subexpression
6895 of the use -- but these will use pointer types, so they are recognized
6896 by the create_mem_ref heuristics anyway. */
6897 if (cand
->iv
->base_object
)
6898 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6900 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6901 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6902 reference_alias_ptr_type (*use
->op_p
),
6903 iv
, base_hint
, data
->speed
);
6904 copy_ref_info (ref
, *use
->op_p
);
6908 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6909 first use of a group, rewrites sub uses in the group too. */
6912 rewrite_use_address (struct ivopts_data
*data
,
6913 struct iv_use
*use
, struct iv_cand
*cand
)
6915 struct iv_use
*next
;
6917 gcc_assert (use
->sub_id
== 0);
6918 rewrite_use_address_1 (data
, use
, cand
);
6919 update_stmt (use
->stmt
);
6921 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
6923 rewrite_use_address_1 (data
, next
, cand
);
6924 update_stmt (next
->stmt
);
6930 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6934 rewrite_use_compare (struct ivopts_data
*data
,
6935 struct iv_use
*use
, struct iv_cand
*cand
)
6937 tree comp
, *var_p
, op
, bound
;
6938 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6939 enum tree_code compare
;
6940 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6946 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6947 tree var_type
= TREE_TYPE (var
);
6950 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6952 fprintf (dump_file
, "Replacing exit test: ");
6953 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6956 bound
= unshare_expr (fold_convert (var_type
, bound
));
6957 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6959 gsi_insert_seq_on_edge_immediate (
6960 loop_preheader_edge (data
->current_loop
),
6963 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6964 gimple_cond_set_lhs (cond_stmt
, var
);
6965 gimple_cond_set_code (cond_stmt
, compare
);
6966 gimple_cond_set_rhs (cond_stmt
, op
);
6970 /* The induction variable elimination failed; just express the original
6972 comp
= get_computation (data
->current_loop
, use
, cand
);
6973 gcc_assert (comp
!= NULL_TREE
);
6975 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6978 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6979 true, GSI_SAME_STMT
);
6982 /* Rewrites USE using candidate CAND. */
6985 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6989 case USE_NONLINEAR_EXPR
:
6990 rewrite_use_nonlinear_expr (data
, use
, cand
);
6994 rewrite_use_address (data
, use
, cand
);
6998 rewrite_use_compare (data
, use
, cand
);
7005 update_stmt (use
->stmt
);
7008 /* Rewrite the uses using the selected induction variables. */
7011 rewrite_uses (struct ivopts_data
*data
)
7014 struct iv_cand
*cand
;
7017 for (i
= 0; i
< n_iv_uses (data
); i
++)
7019 use
= iv_use (data
, i
);
7020 cand
= use
->selected
;
7023 rewrite_use (data
, use
, cand
);
7027 /* Removes the ivs that are not used after rewriting. */
7030 remove_unused_ivs (struct ivopts_data
*data
)
7034 bitmap toremove
= BITMAP_ALLOC (NULL
);
7036 /* Figure out an order in which to release SSA DEFs so that we don't
7037 release something that we'd have to propagate into a debug stmt
7039 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7041 struct version_info
*info
;
7043 info
= ver_info (data
, j
);
7045 && !integer_zerop (info
->iv
->step
)
7047 && !info
->iv
->have_use_for
7048 && !info
->preserve_biv
)
7050 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7052 tree def
= info
->iv
->ssa_name
;
7054 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7056 imm_use_iterator imm_iter
;
7057 use_operand_p use_p
;
7061 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7063 if (!gimple_debug_bind_p (stmt
))
7066 /* We just want to determine whether to do nothing
7067 (count == 0), to substitute the computed
7068 expression into a single use of the SSA DEF by
7069 itself (count == 1), or to use a debug temp
7070 because the SSA DEF is used multiple times or as
7071 part of a larger expression (count > 1). */
7073 if (gimple_debug_bind_get_value (stmt
) != def
)
7077 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7083 struct iv_use dummy_use
;
7084 struct iv_cand
*best_cand
= NULL
, *cand
;
7085 unsigned i
, best_pref
= 0, cand_pref
;
7087 memset (&dummy_use
, 0, sizeof (dummy_use
));
7088 dummy_use
.iv
= info
->iv
;
7089 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7091 cand
= iv_use (data
, i
)->selected
;
7092 if (cand
== best_cand
)
7094 cand_pref
= operand_equal_p (cand
->iv
->step
,
7098 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7099 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7102 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7104 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7107 best_pref
= cand_pref
;
7114 tree comp
= get_computation_at (data
->current_loop
,
7115 &dummy_use
, best_cand
,
7116 SSA_NAME_DEF_STMT (def
));
7122 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7123 DECL_ARTIFICIAL (vexpr
) = 1;
7124 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7125 if (SSA_NAME_VAR (def
))
7126 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7128 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7130 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7131 gimple_stmt_iterator gsi
;
7133 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7134 gsi
= gsi_after_labels (gimple_bb
7135 (SSA_NAME_DEF_STMT (def
)));
7137 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7139 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7143 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7145 if (!gimple_debug_bind_p (stmt
))
7148 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7149 SET_USE (use_p
, comp
);
7157 release_defs_bitset (toremove
);
7159 BITMAP_FREE (toremove
);
7162 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7163 for hash_map::traverse. */
7166 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7172 /* Frees data allocated by the optimization of a single loop. */
7175 free_loop_data (struct ivopts_data
*data
)
7183 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7184 delete data
->niters
;
7185 data
->niters
= NULL
;
7188 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7190 struct version_info
*info
;
7192 info
= ver_info (data
, i
);
7195 info
->has_nonlin_use
= false;
7196 info
->preserve_biv
= false;
7199 bitmap_clear (data
->relevant
);
7200 bitmap_clear (data
->important_candidates
);
7202 for (i
= 0; i
< n_iv_uses (data
); i
++)
7204 struct iv_use
*use
= iv_use (data
, i
);
7205 struct iv_use
*pre
= use
, *sub
= use
->next
;
7209 gcc_assert (sub
->related_cands
== NULL
);
7210 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7219 BITMAP_FREE (use
->related_cands
);
7220 for (j
= 0; j
< use
->n_map_members
; j
++)
7221 if (use
->cost_map
[j
].depends_on
)
7222 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7223 free (use
->cost_map
);
7226 data
->iv_uses
.truncate (0);
7228 for (i
= 0; i
< n_iv_cands (data
); i
++)
7230 struct iv_cand
*cand
= iv_cand (data
, i
);
7233 if (cand
->depends_on
)
7234 BITMAP_FREE (cand
->depends_on
);
7237 data
->iv_candidates
.truncate (0);
7239 if (data
->version_info_size
< num_ssa_names
)
7241 data
->version_info_size
= 2 * num_ssa_names
;
7242 free (data
->version_info
);
7243 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7246 data
->max_inv_id
= 0;
7248 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7249 SET_DECL_RTL (obj
, NULL_RTX
);
7251 decl_rtl_to_reset
.truncate (0);
7253 data
->inv_expr_tab
->empty ();
7254 data
->inv_expr_id
= 0;
7257 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7261 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7263 free_loop_data (data
);
7264 free (data
->version_info
);
7265 BITMAP_FREE (data
->relevant
);
7266 BITMAP_FREE (data
->important_candidates
);
7268 decl_rtl_to_reset
.release ();
7269 data
->iv_uses
.release ();
7270 data
->iv_candidates
.release ();
7271 delete data
->inv_expr_tab
;
7272 data
->inv_expr_tab
= NULL
;
7273 free_affine_expand_cache (&data
->name_expansion_cache
);
7276 /* Returns true if the loop body BODY includes any function calls. */
7279 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7281 gimple_stmt_iterator gsi
;
7284 for (i
= 0; i
< num_nodes
; i
++)
7285 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7287 gimple stmt
= gsi_stmt (gsi
);
7288 if (is_gimple_call (stmt
)
7289 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7295 /* Optimizes the LOOP. Returns true if anything changed. */
7298 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7300 bool changed
= false;
7301 struct iv_ca
*iv_ca
;
7302 edge exit
= single_dom_exit (loop
);
7305 gcc_assert (!data
->niters
);
7306 data
->current_loop
= loop
;
7307 data
->loop_loc
= find_loop_location (loop
);
7308 data
->speed
= optimize_loop_for_speed_p (loop
);
7310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7312 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7313 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7314 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7315 LOCATION_LINE (data
->loop_loc
));
7316 fprintf (dump_file
, "\n");
7320 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7321 exit
->src
->index
, exit
->dest
->index
);
7322 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7323 fprintf (dump_file
, "\n");
7326 fprintf (dump_file
, "\n");
7329 body
= get_loop_body (loop
);
7330 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7331 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7334 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7336 /* For each ssa name determines whether it behaves as an induction variable
7338 if (!find_induction_variables (data
))
7341 /* Finds interesting uses (item 1). */
7342 find_interesting_uses (data
);
7343 group_address_uses (data
);
7344 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7347 /* Finds candidates for the induction variables (item 2). */
7348 find_iv_candidates (data
);
7350 /* Calculates the costs (item 3, part 1). */
7351 determine_iv_costs (data
);
7352 determine_use_iv_costs (data
);
7353 determine_set_costs (data
);
7355 /* Find the optimal set of induction variables (item 3, part 2). */
7356 iv_ca
= find_optimal_iv_set (data
);
7361 /* Create the new induction variables (item 4, part 1). */
7362 create_new_ivs (data
, iv_ca
);
7363 iv_ca_free (&iv_ca
);
7365 /* Rewrite the uses (item 4, part 2). */
7366 rewrite_uses (data
);
7368 /* Remove the ivs that are unused after rewriting. */
7369 remove_unused_ivs (data
);
7371 /* We have changed the structure of induction variables; it might happen
7372 that definitions in the scev database refer to some of them that were
7377 free_loop_data (data
);
7382 /* Main entry point. Optimizes induction variables in loops. */
7385 tree_ssa_iv_optimize (void)
7388 struct ivopts_data data
;
7390 tree_ssa_iv_optimize_init (&data
);
7392 /* Optimize the loops starting with the innermost ones. */
7393 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7395 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7396 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7398 tree_ssa_iv_optimize_loop (&data
, loop
);
7401 tree_ssa_iv_optimize_finalize (&data
);