1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "double-int.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
82 #include "hard-reg-set.h"
84 #include "dominance.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
93 #include "gimple-expr.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
115 #include "statistics.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
123 #include "emit-rtl.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop
*loop
)
160 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
162 return AVG_LOOP_NITER (loop
);
167 /* Representation of the induction variable. */
170 tree base
; /* Initial value of the iv. */
171 tree base_object
; /* A memory object to that the induction variable points. */
172 tree step
; /* Step of the iv (constant only). */
173 tree ssa_name
; /* The ssa name with the value. */
174 bool biv_p
; /* Is it a biv? */
175 bool have_use_for
; /* Do we already have a use for it? */
176 unsigned use_id
; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
182 tree name
; /* The ssa name. */
183 struct iv
*iv
; /* Induction variable description. */
184 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv
; /* For the original biv, whether to preserve it. */
187 unsigned inv_id
; /* Id of an invariant. */
193 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
194 USE_ADDRESS
, /* Use in an address. */
195 USE_COMPARE
/* Use is a compare. */
198 /* Cost of a computation. */
201 int cost
; /* The runtime cost. */
202 unsigned complexity
; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
208 static const comp_cost no_cost
= {0, 0};
209 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
211 /* The candidate - cost pair. */
214 struct iv_cand
*cand
; /* The candidate. */
215 comp_cost cost
; /* The cost. */
216 bitmap depends_on
; /* The list of invariants that have to be
218 tree value
; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp
; /* For iv elimination, the comparison. */
222 int inv_expr_id
; /* Loop invariant expression id. */
228 unsigned id
; /* The id of the use. */
229 enum use_type type
; /* Type of the use. */
230 struct iv
*iv
; /* The induction variable it is based on. */
231 gimple stmt
; /* Statement in that it occurs. */
232 tree
*op_p
; /* The place where it occurs. */
233 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
236 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
237 struct cost_pair
*cost_map
;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand
*selected
;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
247 IP_NORMAL
, /* At the end, just before the exit condition. */
248 IP_END
, /* At the end of the latch block. */
249 IP_BEFORE_USE
, /* Immediately before a specific use. */
250 IP_AFTER_USE
, /* Immediately after a specific use. */
251 IP_ORIGINAL
/* The original biv. */
254 /* The induction variable candidate. */
257 unsigned id
; /* The number of the candidate. */
258 bool important
; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
261 gimple incremented_at
;/* For original biv, the statement where it is
263 tree var_before
; /* The variable used for it before increment. */
264 tree var_after
; /* The variable used for it after increment. */
265 struct iv
*iv
; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost
; /* Cost of the candidate. */
270 unsigned cost_step
; /* Cost of the candidate's increment operation. */
271 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on
; /* The list of invariants that are used in step of the
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use
*iv_use_p
;
289 typedef struct iv_cand
*iv_cand_p
;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
295 typedef iv_inv_expr_ent value_type
;
296 typedef iv_inv_expr_ent compare_type
;
297 static inline hashval_t
hash (const value_type
*);
298 static inline bool equal (const value_type
*, const compare_type
*);
301 /* Hash function for loop invariant expressions. */
304 iv_inv_expr_hasher::hash (const value_type
*expr
)
309 /* Hash table equality function for expressions. */
312 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
314 return expr1
->hash
== expr2
->hash
315 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
320 /* The currently optimized loop. */
321 struct loop
*current_loop
;
322 source_location loop_loc
;
324 /* Numbers of iterations for all exits of the current loop. */
325 hash_map
<edge
, tree_niter_desc
*> *niters
;
327 /* Number of registers used in it. */
330 /* The size of version_info array allocated. */
331 unsigned version_info_size
;
333 /* The array of information for the ssa names. */
334 struct version_info
*version_info
;
336 /* The hashtable of loop invariant expressions created
338 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
340 /* Loop invariant expression id. */
343 /* The bitmap of indices in version_info whose value was changed. */
346 /* The uses of induction variables. */
347 vec
<iv_use_p
> iv_uses
;
349 /* The candidates. */
350 vec
<iv_cand_p
> iv_candidates
;
352 /* A bitmap of important candidates. */
353 bitmap important_candidates
;
355 /* Cache used by tree_to_aff_combination_expand. */
356 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
358 /* The maximum invariant id. */
361 /* Whether to consider just related and important candidates when replacing a
363 bool consider_all_candidates
;
365 /* Are we optimizing for speed? */
368 /* Whether the loop body includes any function calls. */
369 bool body_includes_call
;
371 /* Whether the loop body can only be exited via single exit. */
372 bool loop_single_exit_p
;
375 /* An assignment of iv candidates to uses. */
379 /* The number of uses covered by the assignment. */
382 /* Number of uses that cannot be expressed by the candidates in the set. */
385 /* Candidate assigned to a use, together with the related costs. */
386 struct cost_pair
**cand_for_use
;
388 /* Number of times each candidate is used. */
389 unsigned *n_cand_uses
;
391 /* The candidates used. */
394 /* The number of candidates in the set. */
397 /* Total number of registers needed. */
400 /* Total cost of expressing uses. */
401 comp_cost cand_use_cost
;
403 /* Total cost of candidates. */
406 /* Number of times each invariant is used. */
407 unsigned *n_invariant_uses
;
409 /* The array holding the number of uses of each loop
410 invariant expressions created by ivopt. */
411 unsigned *used_inv_expr
;
413 /* The number of created loop invariants. */
414 unsigned num_used_inv_expr
;
416 /* Total cost of the assignment. */
420 /* Difference of two iv candidate assignments. */
427 /* An old assignment (for rollback purposes). */
428 struct cost_pair
*old_cp
;
430 /* A new assignment. */
431 struct cost_pair
*new_cp
;
433 /* Next change in the list. */
434 struct iv_ca_delta
*next_change
;
437 /* Bound on number of candidates below that all candidates are considered. */
439 #define CONSIDER_ALL_CANDIDATES_BOUND \
440 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
442 /* If there are more iv occurrences, we just give up (it is quite unlikely that
443 optimizing such a loop would help, and it would take ages). */
445 #define MAX_CONSIDERED_USES \
446 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
448 /* If there are at most this number of ivs in the set, try removing unnecessary
449 ivs from the set always. */
451 #define ALWAYS_PRUNE_CAND_SET_BOUND \
452 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
454 /* The list of trees for that the decl_rtl field must be reset is stored
457 static vec
<tree
> decl_rtl_to_reset
;
459 static comp_cost
force_expr_to_var_cost (tree
, bool);
461 /* Number of uses recorded in DATA. */
463 static inline unsigned
464 n_iv_uses (struct ivopts_data
*data
)
466 return data
->iv_uses
.length ();
469 /* Ith use recorded in DATA. */
471 static inline struct iv_use
*
472 iv_use (struct ivopts_data
*data
, unsigned i
)
474 return data
->iv_uses
[i
];
477 /* Number of candidates recorded in DATA. */
479 static inline unsigned
480 n_iv_cands (struct ivopts_data
*data
)
482 return data
->iv_candidates
.length ();
485 /* Ith candidate recorded in DATA. */
487 static inline struct iv_cand
*
488 iv_cand (struct ivopts_data
*data
, unsigned i
)
490 return data
->iv_candidates
[i
];
493 /* The single loop exit if it dominates the latch, NULL otherwise. */
496 single_dom_exit (struct loop
*loop
)
498 edge exit
= single_exit (loop
);
503 if (!just_once_each_iteration_p (loop
, exit
->src
))
509 /* Dumps information about the induction variable IV to FILE. */
512 dump_iv (FILE *file
, struct iv
*iv
)
516 fprintf (file
, "ssa name ");
517 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
518 fprintf (file
, "\n");
521 fprintf (file
, " type ");
522 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
523 fprintf (file
, "\n");
527 fprintf (file
, " base ");
528 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
529 fprintf (file
, "\n");
531 fprintf (file
, " step ");
532 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
533 fprintf (file
, "\n");
537 fprintf (file
, " invariant ");
538 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
539 fprintf (file
, "\n");
544 fprintf (file
, " base object ");
545 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
546 fprintf (file
, "\n");
550 fprintf (file
, " is a biv\n");
553 /* Dumps information about the USE to FILE. */
556 dump_use (FILE *file
, struct iv_use
*use
)
558 fprintf (file
, "use %d\n", use
->id
);
562 case USE_NONLINEAR_EXPR
:
563 fprintf (file
, " generic\n");
567 fprintf (file
, " address\n");
571 fprintf (file
, " compare\n");
578 fprintf (file
, " in statement ");
579 print_gimple_stmt (file
, use
->stmt
, 0, 0);
580 fprintf (file
, "\n");
582 fprintf (file
, " at position ");
584 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
585 fprintf (file
, "\n");
587 dump_iv (file
, use
->iv
);
589 if (use
->related_cands
)
591 fprintf (file
, " related candidates ");
592 dump_bitmap (file
, use
->related_cands
);
596 /* Dumps information about the uses to FILE. */
599 dump_uses (FILE *file
, struct ivopts_data
*data
)
604 for (i
= 0; i
< n_iv_uses (data
); i
++)
606 use
= iv_use (data
, i
);
608 dump_use (file
, use
);
609 fprintf (file
, "\n");
613 /* Dumps information about induction variable candidate CAND to FILE. */
616 dump_cand (FILE *file
, struct iv_cand
*cand
)
618 struct iv
*iv
= cand
->iv
;
620 fprintf (file
, "candidate %d%s\n",
621 cand
->id
, cand
->important
? " (important)" : "");
623 if (cand
->depends_on
)
625 fprintf (file
, " depends on ");
626 dump_bitmap (file
, cand
->depends_on
);
631 fprintf (file
, " final value replacement\n");
635 if (cand
->var_before
)
637 fprintf (file
, " var_before ");
638 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
639 fprintf (file
, "\n");
643 fprintf (file
, " var_after ");
644 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
645 fprintf (file
, "\n");
651 fprintf (file
, " incremented before exit test\n");
655 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
659 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
663 fprintf (file
, " incremented at end\n");
667 fprintf (file
, " original biv\n");
674 /* Returns the info for ssa version VER. */
676 static inline struct version_info
*
677 ver_info (struct ivopts_data
*data
, unsigned ver
)
679 return data
->version_info
+ ver
;
682 /* Returns the info for ssa name NAME. */
684 static inline struct version_info
*
685 name_info (struct ivopts_data
*data
, tree name
)
687 return ver_info (data
, SSA_NAME_VERSION (name
));
690 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
694 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
696 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
700 if (sbb
== loop
->latch
)
706 return stmt
== last_stmt (bb
);
709 /* Returns true if STMT if after the place where the original induction
710 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
711 if the positions are identical. */
714 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
716 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
717 basic_block stmt_bb
= gimple_bb (stmt
);
719 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
722 if (stmt_bb
!= cand_bb
)
726 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
728 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
731 /* Returns true if STMT if after the place where the induction variable
732 CAND is incremented in LOOP. */
735 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
743 return stmt_after_ip_normal_pos (loop
, stmt
);
747 return stmt_after_inc_pos (cand
, stmt
, false);
750 return stmt_after_inc_pos (cand
, stmt
, true);
757 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
760 abnormal_ssa_name_p (tree exp
)
765 if (TREE_CODE (exp
) != SSA_NAME
)
768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
771 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
772 abnormal phi node. Callback for for_each_index. */
775 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
776 void *data ATTRIBUTE_UNUSED
)
778 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
780 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
782 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
786 return !abnormal_ssa_name_p (*index
);
789 /* Returns true if EXPR contains a ssa name that occurs in an
790 abnormal phi node. */
793 contains_abnormal_ssa_name_p (tree expr
)
796 enum tree_code_class codeclass
;
801 code
= TREE_CODE (expr
);
802 codeclass
= TREE_CODE_CLASS (code
);
804 if (code
== SSA_NAME
)
805 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
807 if (code
== INTEGER_CST
808 || is_gimple_min_invariant (expr
))
811 if (code
== ADDR_EXPR
)
812 return !for_each_index (&TREE_OPERAND (expr
, 0),
813 idx_contains_abnormal_ssa_name_p
,
816 if (code
== COND_EXPR
)
817 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
830 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
842 /* Returns the structure describing number of iterations determined from
843 EXIT of DATA->current_loop, or NULL if something goes wrong. */
845 static struct tree_niter_desc
*
846 niter_for_exit (struct ivopts_data
*data
, edge exit
)
848 struct tree_niter_desc
*desc
;
849 tree_niter_desc
**slot
;
853 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
857 slot
= data
->niters
->get (exit
);
861 /* Try to determine number of iterations. We cannot safely work with ssa
862 names that appear in phi nodes on abnormal edges, so that we do not
863 create overlapping life ranges for them (PR 27283). */
864 desc
= XNEW (struct tree_niter_desc
);
865 if (!number_of_iterations_exit (data
->current_loop
,
867 || contains_abnormal_ssa_name_p (desc
->niter
))
872 data
->niters
->put (exit
, desc
);
880 /* Returns the structure describing number of iterations determined from
881 single dominating exit of DATA->current_loop, or NULL if something
884 static struct tree_niter_desc
*
885 niter_for_single_dom_exit (struct ivopts_data
*data
)
887 edge exit
= single_dom_exit (data
->current_loop
);
892 return niter_for_exit (data
, exit
);
895 /* Initializes data structures used by the iv optimization pass, stored
899 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
901 data
->version_info_size
= 2 * num_ssa_names
;
902 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
903 data
->relevant
= BITMAP_ALLOC (NULL
);
904 data
->important_candidates
= BITMAP_ALLOC (NULL
);
905 data
->max_inv_id
= 0;
907 data
->iv_uses
.create (20);
908 data
->iv_candidates
.create (20);
909 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
910 data
->inv_expr_id
= 0;
911 data
->name_expansion_cache
= NULL
;
912 decl_rtl_to_reset
.create (20);
915 /* Returns a memory object to that EXPR points. In case we are able to
916 determine that it does not point to any such object, NULL is returned. */
919 determine_base_object (tree expr
)
921 enum tree_code code
= TREE_CODE (expr
);
924 /* If this is a pointer casted to any type, we need to determine
925 the base object for the pointer; so handle conversions before
926 throwing away non-pointer expressions. */
927 if (CONVERT_EXPR_P (expr
))
928 return determine_base_object (TREE_OPERAND (expr
, 0));
930 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
939 obj
= TREE_OPERAND (expr
, 0);
940 base
= get_base_address (obj
);
945 if (TREE_CODE (base
) == MEM_REF
)
946 return determine_base_object (TREE_OPERAND (base
, 0));
948 return fold_convert (ptr_type_node
,
949 build_fold_addr_expr (base
));
951 case POINTER_PLUS_EXPR
:
952 return determine_base_object (TREE_OPERAND (expr
, 0));
956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
960 return fold_convert (ptr_type_node
, expr
);
964 /* Return true if address expression with non-DECL_P operand appears
968 contain_complex_addr_expr (tree expr
)
973 switch (TREE_CODE (expr
))
975 case POINTER_PLUS_EXPR
:
978 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
979 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
983 return (!DECL_P (TREE_OPERAND (expr
, 0)));
992 /* Allocates an induction variable with given initial value BASE and step STEP
996 alloc_iv (tree base
, tree step
)
999 struct iv
*iv
= XCNEW (struct iv
);
1000 gcc_assert (step
!= NULL_TREE
);
1002 /* Lower address expression in base except ones with DECL_P as operand.
1004 1) More accurate cost can be computed for address expressions;
1005 2) Duplicate candidates won't be created for bases in different
1006 forms, like &a[0] and &a. */
1008 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1009 || contain_complex_addr_expr (expr
))
1012 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1013 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1017 iv
->base_object
= determine_base_object (base
);
1020 iv
->have_use_for
= false;
1022 iv
->ssa_name
= NULL_TREE
;
1027 /* Sets STEP and BASE for induction variable IV. */
1030 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1032 struct version_info
*info
= name_info (data
, iv
);
1034 gcc_assert (!info
->iv
);
1036 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1037 info
->iv
= alloc_iv (base
, step
);
1038 info
->iv
->ssa_name
= iv
;
1041 /* Finds induction variable declaration for VAR. */
1044 get_iv (struct ivopts_data
*data
, tree var
)
1047 tree type
= TREE_TYPE (var
);
1049 if (!POINTER_TYPE_P (type
)
1050 && !INTEGRAL_TYPE_P (type
))
1053 if (!name_info (data
, var
)->iv
)
1055 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1058 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1059 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1062 return name_info (data
, var
)->iv
;
1065 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1066 not define a simple affine biv with nonzero step. */
1069 determine_biv_step (gphi
*phi
)
1071 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1072 tree name
= PHI_RESULT (phi
);
1075 if (virtual_operand_p (name
))
1078 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1081 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1084 /* Return the first non-invariant ssa var found in EXPR. */
1087 extract_single_var_from_expr (tree expr
)
1091 enum tree_code code
;
1093 if (!expr
|| is_gimple_min_invariant (expr
))
1096 code
= TREE_CODE (expr
);
1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1099 n
= TREE_OPERAND_LENGTH (expr
);
1100 for (i
= 0; i
< n
; i
++)
1102 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1108 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1111 /* Finds basic ivs. */
1114 find_bivs (struct ivopts_data
*data
)
1117 tree step
, type
, base
, stop
;
1119 struct loop
*loop
= data
->current_loop
;
1122 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1126 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1129 step
= determine_biv_step (phi
);
1133 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1134 /* Stop expanding iv base at the first ssa var referred by iv step.
1135 Ideally we should stop at any ssa var, because that's expensive
1136 and unusual to happen, we just do it on the first one.
1138 See PR64705 for the rationale. */
1139 stop
= extract_single_var_from_expr (step
);
1140 base
= expand_simple_operations (base
, stop
);
1141 if (contains_abnormal_ssa_name_p (base
)
1142 || contains_abnormal_ssa_name_p (step
))
1145 type
= TREE_TYPE (PHI_RESULT (phi
));
1146 base
= fold_convert (type
, base
);
1149 if (POINTER_TYPE_P (type
))
1150 step
= convert_to_ptrofftype (step
);
1152 step
= fold_convert (type
, step
);
1155 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1162 /* Marks basic ivs. */
1165 mark_bivs (struct ivopts_data
*data
)
1170 struct iv
*iv
, *incr_iv
;
1171 struct loop
*loop
= data
->current_loop
;
1172 basic_block incr_bb
;
1175 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1179 iv
= get_iv (data
, PHI_RESULT (phi
));
1183 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1184 def
= SSA_NAME_DEF_STMT (var
);
1185 /* Don't mark iv peeled from other one as biv. */
1187 && gimple_code (def
) == GIMPLE_PHI
1188 && gimple_bb (def
) == loop
->header
)
1191 incr_iv
= get_iv (data
, var
);
1195 /* If the increment is in the subloop, ignore it. */
1196 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1197 if (incr_bb
->loop_father
!= data
->current_loop
1198 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1202 incr_iv
->biv_p
= true;
1206 /* Checks whether STMT defines a linear induction variable and stores its
1207 parameters to IV. */
1210 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1213 struct loop
*loop
= data
->current_loop
;
1215 iv
->base
= NULL_TREE
;
1216 iv
->step
= NULL_TREE
;
1218 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1221 lhs
= gimple_assign_lhs (stmt
);
1222 if (TREE_CODE (lhs
) != SSA_NAME
)
1225 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1228 /* Stop expanding iv base at the first ssa var referred by iv step.
1229 Ideally we should stop at any ssa var, because that's expensive
1230 and unusual to happen, we just do it on the first one.
1232 See PR64705 for the rationale. */
1233 stop
= extract_single_var_from_expr (iv
->step
);
1234 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1235 if (contains_abnormal_ssa_name_p (iv
->base
)
1236 || contains_abnormal_ssa_name_p (iv
->step
))
1239 /* If STMT could throw, then do not consider STMT as defining a GIV.
1240 While this will suppress optimizations, we can not safely delete this
1241 GIV and associated statements, even if it appears it is not used. */
1242 if (stmt_could_throw_p (stmt
))
1248 /* Finds general ivs in statement STMT. */
1251 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1255 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1258 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1261 /* Finds general ivs in basic block BB. */
1264 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1266 gimple_stmt_iterator bsi
;
1268 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1269 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1272 /* Finds general ivs. */
1275 find_givs (struct ivopts_data
*data
)
1277 struct loop
*loop
= data
->current_loop
;
1278 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1281 for (i
= 0; i
< loop
->num_nodes
; i
++)
1282 find_givs_in_bb (data
, body
[i
]);
1286 /* For each ssa name defined in LOOP determines whether it is an induction
1287 variable and if so, its initial value and step. */
1290 find_induction_variables (struct ivopts_data
*data
)
1295 if (!find_bivs (data
))
1301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1303 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1307 fprintf (dump_file
, " number of iterations ");
1308 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1309 if (!integer_zerop (niter
->may_be_zero
))
1311 fprintf (dump_file
, "; zero if ");
1312 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1314 fprintf (dump_file
, "\n\n");
1317 fprintf (dump_file
, "Induction variables:\n\n");
1319 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1321 if (ver_info (data
, i
)->iv
)
1322 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1329 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1331 static struct iv_use
*
1332 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1333 gimple stmt
, enum use_type use_type
)
1335 struct iv_use
*use
= XCNEW (struct iv_use
);
1337 use
->id
= n_iv_uses (data
);
1338 use
->type
= use_type
;
1342 use
->related_cands
= BITMAP_ALLOC (NULL
);
1344 /* To avoid showing ssa name in the dumps, if it was not reset by the
1346 iv
->ssa_name
= NULL_TREE
;
1348 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1349 dump_use (dump_file
, use
);
1351 data
->iv_uses
.safe_push (use
);
1356 /* Checks whether OP is a loop-level invariant and if so, records it.
1357 NONLINEAR_USE is true if the invariant is used in a way we do not
1358 handle specially. */
1361 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1364 struct version_info
*info
;
1366 if (TREE_CODE (op
) != SSA_NAME
1367 || virtual_operand_p (op
))
1370 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1372 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1375 info
= name_info (data
, op
);
1377 info
->has_nonlin_use
|= nonlinear_use
;
1379 info
->inv_id
= ++data
->max_inv_id
;
1380 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1383 /* Checks whether the use OP is interesting and if so, records it. */
1385 static struct iv_use
*
1386 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1393 if (TREE_CODE (op
) != SSA_NAME
)
1396 iv
= get_iv (data
, op
);
1400 if (iv
->have_use_for
)
1402 use
= iv_use (data
, iv
->use_id
);
1404 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1408 if (integer_zerop (iv
->step
))
1410 record_invariant (data
, op
, true);
1413 iv
->have_use_for
= true;
1415 civ
= XNEW (struct iv
);
1418 stmt
= SSA_NAME_DEF_STMT (op
);
1419 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1420 || is_gimple_assign (stmt
));
1422 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1423 iv
->use_id
= use
->id
;
1428 /* Given a condition in statement STMT, checks whether it is a compare
1429 of an induction variable and an invariant. If this is the case,
1430 CONTROL_VAR is set to location of the iv, BOUND to the location of
1431 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1432 induction variable descriptions, and true is returned. If this is not
1433 the case, CONTROL_VAR and BOUND are set to the arguments of the
1434 condition and false is returned. */
1437 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1438 tree
**control_var
, tree
**bound
,
1439 struct iv
**iv_var
, struct iv
**iv_bound
)
1441 /* The objects returned when COND has constant operands. */
1442 static struct iv const_iv
;
1444 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1445 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1448 if (gimple_code (stmt
) == GIMPLE_COND
)
1450 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1451 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1452 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1456 op0
= gimple_assign_rhs1_ptr (stmt
);
1457 op1
= gimple_assign_rhs2_ptr (stmt
);
1460 zero
= integer_zero_node
;
1461 const_iv
.step
= integer_zero_node
;
1463 if (TREE_CODE (*op0
) == SSA_NAME
)
1464 iv0
= get_iv (data
, *op0
);
1465 if (TREE_CODE (*op1
) == SSA_NAME
)
1466 iv1
= get_iv (data
, *op1
);
1468 /* Exactly one of the compared values must be an iv, and the other one must
1473 if (integer_zerop (iv0
->step
))
1475 /* Control variable may be on the other side. */
1476 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1477 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1479 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1483 *control_var
= op0
;;
1494 /* Checks whether the condition in STMT is interesting and if so,
1498 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1500 tree
*var_p
, *bound_p
;
1501 struct iv
*var_iv
, *civ
;
1503 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1505 find_interesting_uses_op (data
, *var_p
);
1506 find_interesting_uses_op (data
, *bound_p
);
1510 civ
= XNEW (struct iv
);
1512 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1515 /* Returns the outermost loop EXPR is obviously invariant in
1516 relative to the loop LOOP, i.e. if all its operands are defined
1517 outside of the returned loop. Returns NULL if EXPR is not
1518 even obviously invariant in LOOP. */
1521 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1526 if (is_gimple_min_invariant (expr
))
1527 return current_loops
->tree_root
;
1529 if (TREE_CODE (expr
) == SSA_NAME
)
1531 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1534 if (flow_bb_inside_loop_p (loop
, def_bb
))
1536 return superloop_at_depth (loop
,
1537 loop_depth (def_bb
->loop_father
) + 1);
1540 return current_loops
->tree_root
;
1546 unsigned maxdepth
= 0;
1547 len
= TREE_OPERAND_LENGTH (expr
);
1548 for (i
= 0; i
< len
; i
++)
1550 struct loop
*ivloop
;
1551 if (!TREE_OPERAND (expr
, i
))
1554 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1557 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1560 return superloop_at_depth (loop
, maxdepth
);
1563 /* Returns true if expression EXPR is obviously invariant in LOOP,
1564 i.e. if all its operands are defined outside of the LOOP. LOOP
1565 should not be the function body. */
1568 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1573 gcc_assert (loop_depth (loop
) > 0);
1575 if (is_gimple_min_invariant (expr
))
1578 if (TREE_CODE (expr
) == SSA_NAME
)
1580 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1582 && flow_bb_inside_loop_p (loop
, def_bb
))
1591 len
= TREE_OPERAND_LENGTH (expr
);
1592 for (i
= 0; i
< len
; i
++)
1593 if (TREE_OPERAND (expr
, i
)
1594 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1600 /* Cumulates the steps of indices into DATA and replaces their values with the
1601 initial ones. Returns false when the value of the index cannot be determined.
1602 Callback for for_each_index. */
1604 struct ifs_ivopts_data
1606 struct ivopts_data
*ivopts_data
;
1612 idx_find_step (tree base
, tree
*idx
, void *data
)
1614 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1616 tree step
, iv_base
, iv_step
, lbound
, off
;
1617 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1619 /* If base is a component ref, require that the offset of the reference
1621 if (TREE_CODE (base
) == COMPONENT_REF
)
1623 off
= component_ref_field_offset (base
);
1624 return expr_invariant_in_loop_p (loop
, off
);
1627 /* If base is array, first check whether we will be able to move the
1628 reference out of the loop (in order to take its address in strength
1629 reduction). In order for this to work we need both lower bound
1630 and step to be loop invariants. */
1631 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1633 /* Moreover, for a range, the size needs to be invariant as well. */
1634 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1635 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1638 step
= array_ref_element_size (base
);
1639 lbound
= array_ref_low_bound (base
);
1641 if (!expr_invariant_in_loop_p (loop
, step
)
1642 || !expr_invariant_in_loop_p (loop
, lbound
))
1646 if (TREE_CODE (*idx
) != SSA_NAME
)
1649 iv
= get_iv (dta
->ivopts_data
, *idx
);
1653 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1654 *&x[0], which is not folded and does not trigger the
1655 ARRAY_REF path below. */
1658 if (integer_zerop (iv
->step
))
1661 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1663 step
= array_ref_element_size (base
);
1665 /* We only handle addresses whose step is an integer constant. */
1666 if (TREE_CODE (step
) != INTEGER_CST
)
1670 /* The step for pointer arithmetics already is 1 byte. */
1671 step
= size_one_node
;
1675 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1676 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1679 /* The index might wrap. */
1683 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1684 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1689 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1690 object is passed to it in DATA. */
1693 idx_record_use (tree base
, tree
*idx
,
1696 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1697 find_interesting_uses_op (data
, *idx
);
1698 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1700 find_interesting_uses_op (data
, array_ref_element_size (base
));
1701 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1706 /* If we can prove that TOP = cst * BOT for some constant cst,
1707 store cst to MUL and return true. Otherwise return false.
1708 The returned value is always sign-extended, regardless of the
1709 signedness of TOP and BOT. */
1712 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1715 enum tree_code code
;
1716 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1717 widest_int res
, p0
, p1
;
1722 if (operand_equal_p (top
, bot
, 0))
1728 code
= TREE_CODE (top
);
1732 mby
= TREE_OPERAND (top
, 1);
1733 if (TREE_CODE (mby
) != INTEGER_CST
)
1736 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1739 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1744 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1745 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1748 if (code
== MINUS_EXPR
)
1750 *mul
= wi::sext (p0
+ p1
, precision
);
1754 if (TREE_CODE (bot
) != INTEGER_CST
)
1757 p0
= widest_int::from (top
, SIGNED
);
1758 p1
= widest_int::from (bot
, SIGNED
);
1761 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1769 /* Return true if memory reference REF with step STEP may be unaligned. */
1772 may_be_unaligned_p (tree ref
, tree step
)
1774 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1775 thus they are not misaligned. */
1776 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1779 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1780 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1781 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1783 unsigned HOST_WIDE_INT bitpos
;
1784 unsigned int ref_align
;
1785 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1786 if (ref_align
< align
1787 || (bitpos
% align
) != 0
1788 || (bitpos
% BITS_PER_UNIT
) != 0)
1791 unsigned int trailing_zeros
= tree_ctz (step
);
1792 if (trailing_zeros
< HOST_BITS_PER_INT
1793 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1799 /* Return true if EXPR may be non-addressable. */
1802 may_be_nonaddressable_p (tree expr
)
1804 switch (TREE_CODE (expr
))
1806 case TARGET_MEM_REF
:
1807 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1808 target, thus they are always addressable. */
1812 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1813 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1815 case VIEW_CONVERT_EXPR
:
1816 /* This kind of view-conversions may wrap non-addressable objects
1817 and make them look addressable. After some processing the
1818 non-addressability may be uncovered again, causing ADDR_EXPRs
1819 of inappropriate objects to be built. */
1820 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1821 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1824 /* ... fall through ... */
1827 case ARRAY_RANGE_REF
:
1828 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1840 /* Finds addresses in *OP_P inside STMT. */
1843 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1845 tree base
= *op_p
, step
= size_zero_node
;
1847 struct ifs_ivopts_data ifs_ivopts_data
;
1849 /* Do not play with volatile memory references. A bit too conservative,
1850 perhaps, but safe. */
1851 if (gimple_has_volatile_ops (stmt
))
1854 /* Ignore bitfields for now. Not really something terribly complicated
1856 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1859 base
= unshare_expr (base
);
1861 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1863 tree type
= build_pointer_type (TREE_TYPE (base
));
1867 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1869 civ
= get_iv (data
, TMR_BASE (base
));
1873 TMR_BASE (base
) = civ
->base
;
1876 if (TMR_INDEX2 (base
)
1877 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1879 civ
= get_iv (data
, TMR_INDEX2 (base
));
1883 TMR_INDEX2 (base
) = civ
->base
;
1886 if (TMR_INDEX (base
)
1887 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1889 civ
= get_iv (data
, TMR_INDEX (base
));
1893 TMR_INDEX (base
) = civ
->base
;
1898 if (TMR_STEP (base
))
1899 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1901 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1905 if (integer_zerop (step
))
1907 base
= tree_mem_ref_addr (type
, base
);
1911 ifs_ivopts_data
.ivopts_data
= data
;
1912 ifs_ivopts_data
.stmt
= stmt
;
1913 ifs_ivopts_data
.step
= size_zero_node
;
1914 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1915 || integer_zerop (ifs_ivopts_data
.step
))
1917 step
= ifs_ivopts_data
.step
;
1919 /* Check that the base expression is addressable. This needs
1920 to be done after substituting bases of IVs into it. */
1921 if (may_be_nonaddressable_p (base
))
1924 /* Moreover, on strict alignment platforms, check that it is
1925 sufficiently aligned. */
1926 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1929 base
= build_fold_addr_expr (base
);
1931 /* Substituting bases of IVs into the base expression might
1932 have caused folding opportunities. */
1933 if (TREE_CODE (base
) == ADDR_EXPR
)
1935 tree
*ref
= &TREE_OPERAND (base
, 0);
1936 while (handled_component_p (*ref
))
1937 ref
= &TREE_OPERAND (*ref
, 0);
1938 if (TREE_CODE (*ref
) == MEM_REF
)
1940 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1941 TREE_OPERAND (*ref
, 0),
1942 TREE_OPERAND (*ref
, 1));
1949 civ
= alloc_iv (base
, step
);
1950 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1954 for_each_index (op_p
, idx_record_use
, data
);
1957 /* Finds and records invariants used in STMT. */
1960 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1963 use_operand_p use_p
;
1966 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1968 op
= USE_FROM_PTR (use_p
);
1969 record_invariant (data
, op
, false);
1973 /* Finds interesting uses of induction variables in the statement STMT. */
1976 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1979 tree op
, *lhs
, *rhs
;
1981 use_operand_p use_p
;
1982 enum tree_code code
;
1984 find_invariants_stmt (data
, stmt
);
1986 if (gimple_code (stmt
) == GIMPLE_COND
)
1988 find_interesting_uses_cond (data
, stmt
);
1992 if (is_gimple_assign (stmt
))
1994 lhs
= gimple_assign_lhs_ptr (stmt
);
1995 rhs
= gimple_assign_rhs1_ptr (stmt
);
1997 if (TREE_CODE (*lhs
) == SSA_NAME
)
1999 /* If the statement defines an induction variable, the uses are not
2000 interesting by themselves. */
2002 iv
= get_iv (data
, *lhs
);
2004 if (iv
&& !integer_zerop (iv
->step
))
2008 code
= gimple_assign_rhs_code (stmt
);
2009 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2010 && (REFERENCE_CLASS_P (*rhs
)
2011 || is_gimple_val (*rhs
)))
2013 if (REFERENCE_CLASS_P (*rhs
))
2014 find_interesting_uses_address (data
, stmt
, rhs
);
2016 find_interesting_uses_op (data
, *rhs
);
2018 if (REFERENCE_CLASS_P (*lhs
))
2019 find_interesting_uses_address (data
, stmt
, lhs
);
2022 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2024 find_interesting_uses_cond (data
, stmt
);
2028 /* TODO -- we should also handle address uses of type
2030 memory = call (whatever);
2037 if (gimple_code (stmt
) == GIMPLE_PHI
2038 && gimple_bb (stmt
) == data
->current_loop
->header
)
2040 iv
= get_iv (data
, PHI_RESULT (stmt
));
2042 if (iv
&& !integer_zerop (iv
->step
))
2046 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2048 op
= USE_FROM_PTR (use_p
);
2050 if (TREE_CODE (op
) != SSA_NAME
)
2053 iv
= get_iv (data
, op
);
2057 find_interesting_uses_op (data
, op
);
2061 /* Finds interesting uses of induction variables outside of loops
2062 on loop exit edge EXIT. */
2065 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2071 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2074 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2075 if (!virtual_operand_p (def
))
2076 find_interesting_uses_op (data
, def
);
2080 /* Finds uses of the induction variables that are interesting. */
2083 find_interesting_uses (struct ivopts_data
*data
)
2086 gimple_stmt_iterator bsi
;
2087 basic_block
*body
= get_loop_body (data
->current_loop
);
2089 struct version_info
*info
;
2092 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2093 fprintf (dump_file
, "Uses:\n\n");
2095 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2100 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2101 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2102 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2103 find_interesting_uses_outside (data
, e
);
2105 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2106 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2107 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2108 if (!is_gimple_debug (gsi_stmt (bsi
)))
2109 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2112 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2116 fprintf (dump_file
, "\n");
2118 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2120 info
= ver_info (data
, i
);
2123 fprintf (dump_file
, " ");
2124 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2125 fprintf (dump_file
, " is invariant (%d)%s\n",
2126 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2130 fprintf (dump_file
, "\n");
2136 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2137 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2138 we are at the top-level of the processed address. */
2141 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2142 HOST_WIDE_INT
*offset
)
2144 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2145 enum tree_code code
;
2146 tree type
, orig_type
= TREE_TYPE (expr
);
2147 HOST_WIDE_INT off0
, off1
, st
;
2148 tree orig_expr
= expr
;
2152 type
= TREE_TYPE (expr
);
2153 code
= TREE_CODE (expr
);
2159 if (!cst_and_fits_in_hwi (expr
)
2160 || integer_zerop (expr
))
2163 *offset
= int_cst_value (expr
);
2164 return build_int_cst (orig_type
, 0);
2166 case POINTER_PLUS_EXPR
:
2169 op0
= TREE_OPERAND (expr
, 0);
2170 op1
= TREE_OPERAND (expr
, 1);
2172 op0
= strip_offset_1 (op0
, false, false, &off0
);
2173 op1
= strip_offset_1 (op1
, false, false, &off1
);
2175 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2176 if (op0
== TREE_OPERAND (expr
, 0)
2177 && op1
== TREE_OPERAND (expr
, 1))
2180 if (integer_zerop (op1
))
2182 else if (integer_zerop (op0
))
2184 if (code
== MINUS_EXPR
)
2185 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2190 expr
= fold_build2 (code
, type
, op0
, op1
);
2192 return fold_convert (orig_type
, expr
);
2195 op1
= TREE_OPERAND (expr
, 1);
2196 if (!cst_and_fits_in_hwi (op1
))
2199 op0
= TREE_OPERAND (expr
, 0);
2200 op0
= strip_offset_1 (op0
, false, false, &off0
);
2201 if (op0
== TREE_OPERAND (expr
, 0))
2204 *offset
= off0
* int_cst_value (op1
);
2205 if (integer_zerop (op0
))
2208 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2210 return fold_convert (orig_type
, expr
);
2213 case ARRAY_RANGE_REF
:
2217 step
= array_ref_element_size (expr
);
2218 if (!cst_and_fits_in_hwi (step
))
2221 st
= int_cst_value (step
);
2222 op1
= TREE_OPERAND (expr
, 1);
2223 op1
= strip_offset_1 (op1
, false, false, &off1
);
2224 *offset
= off1
* st
;
2227 && integer_zerop (op1
))
2229 /* Strip the component reference completely. */
2230 op0
= TREE_OPERAND (expr
, 0);
2231 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2244 tmp
= component_ref_field_offset (expr
);
2245 field
= TREE_OPERAND (expr
, 1);
2247 && cst_and_fits_in_hwi (tmp
)
2248 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2250 HOST_WIDE_INT boffset
, abs_off
;
2252 /* Strip the component reference completely. */
2253 op0
= TREE_OPERAND (expr
, 0);
2254 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2255 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2256 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2260 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2267 op0
= TREE_OPERAND (expr
, 0);
2268 op0
= strip_offset_1 (op0
, true, true, &off0
);
2271 if (op0
== TREE_OPERAND (expr
, 0))
2274 expr
= build_fold_addr_expr (op0
);
2275 return fold_convert (orig_type
, expr
);
2278 /* ??? Offset operand? */
2279 inside_addr
= false;
2286 /* Default handling of expressions for that we want to recurse into
2287 the first operand. */
2288 op0
= TREE_OPERAND (expr
, 0);
2289 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2292 if (op0
== TREE_OPERAND (expr
, 0)
2293 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2296 expr
= copy_node (expr
);
2297 TREE_OPERAND (expr
, 0) = op0
;
2299 TREE_OPERAND (expr
, 1) = op1
;
2301 /* Inside address, we might strip the top level component references,
2302 thus changing type of the expression. Handling of ADDR_EXPR
2304 expr
= fold_convert (orig_type
, expr
);
2309 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2312 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2315 tree core
= strip_offset_1 (expr
, false, false, &off
);
2320 /* Returns variant of TYPE that can be used as base for different uses.
2321 We return unsigned type with the same precision, which avoids problems
2325 generic_type_for (tree type
)
2327 if (POINTER_TYPE_P (type
))
2328 return unsigned_type_for (type
);
2330 if (TYPE_UNSIGNED (type
))
2333 return unsigned_type_for (type
);
2336 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2337 the bitmap to that we should store it. */
2339 static struct ivopts_data
*fd_ivopts_data
;
2341 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2343 bitmap
*depends_on
= (bitmap
*) data
;
2344 struct version_info
*info
;
2346 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2348 info
= name_info (fd_ivopts_data
, *expr_p
);
2350 if (!info
->inv_id
|| info
->has_nonlin_use
)
2354 *depends_on
= BITMAP_ALLOC (NULL
);
2355 bitmap_set_bit (*depends_on
, info
->inv_id
);
2360 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2361 position to POS. If USE is not NULL, the candidate is set as related to
2362 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2363 replacement of the final value of the iv by a direct computation. */
2365 static struct iv_cand
*
2366 add_candidate_1 (struct ivopts_data
*data
,
2367 tree base
, tree step
, bool important
, enum iv_position pos
,
2368 struct iv_use
*use
, gimple incremented_at
)
2371 struct iv_cand
*cand
= NULL
;
2372 tree type
, orig_type
;
2374 /* For non-original variables, make sure their values are computed in a type
2375 that does not invoke undefined behavior on overflows (since in general,
2376 we cannot prove that these induction variables are non-wrapping). */
2377 if (pos
!= IP_ORIGINAL
)
2379 orig_type
= TREE_TYPE (base
);
2380 type
= generic_type_for (orig_type
);
2381 if (type
!= orig_type
)
2383 base
= fold_convert (type
, base
);
2384 step
= fold_convert (type
, step
);
2388 for (i
= 0; i
< n_iv_cands (data
); i
++)
2390 cand
= iv_cand (data
, i
);
2392 if (cand
->pos
!= pos
)
2395 if (cand
->incremented_at
!= incremented_at
2396 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2397 && cand
->ainc_use
!= use
))
2411 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2412 && operand_equal_p (step
, cand
->iv
->step
, 0)
2413 && (TYPE_PRECISION (TREE_TYPE (base
))
2414 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2418 if (i
== n_iv_cands (data
))
2420 cand
= XCNEW (struct iv_cand
);
2426 cand
->iv
= alloc_iv (base
, step
);
2429 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2431 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2432 cand
->var_after
= cand
->var_before
;
2434 cand
->important
= important
;
2435 cand
->incremented_at
= incremented_at
;
2436 data
->iv_candidates
.safe_push (cand
);
2439 && TREE_CODE (step
) != INTEGER_CST
)
2441 fd_ivopts_data
= data
;
2442 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2445 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2446 cand
->ainc_use
= use
;
2448 cand
->ainc_use
= NULL
;
2450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2451 dump_cand (dump_file
, cand
);
2454 if (important
&& !cand
->important
)
2456 cand
->important
= true;
2457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2458 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2463 bitmap_set_bit (use
->related_cands
, i
);
2464 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2465 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2472 /* Returns true if incrementing the induction variable at the end of the LOOP
2475 The purpose is to avoid splitting latch edge with a biv increment, thus
2476 creating a jump, possibly confusing other optimization passes and leaving
2477 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2478 is not available (so we do not have a better alternative), or if the latch
2479 edge is already nonempty. */
2482 allow_ip_end_pos_p (struct loop
*loop
)
2484 if (!ip_normal_pos (loop
))
2487 if (!empty_block_p (ip_end_pos (loop
)))
2493 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2494 Important field is set to IMPORTANT. */
2497 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2498 bool important
, struct iv_use
*use
)
2500 basic_block use_bb
= gimple_bb (use
->stmt
);
2501 machine_mode mem_mode
;
2502 unsigned HOST_WIDE_INT cstepi
;
2504 /* If we insert the increment in any position other than the standard
2505 ones, we must ensure that it is incremented once per iteration.
2506 It must not be in an inner nested loop, or one side of an if
2508 if (use_bb
->loop_father
!= data
->current_loop
2509 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2510 || stmt_could_throw_p (use
->stmt
)
2511 || !cst_and_fits_in_hwi (step
))
2514 cstepi
= int_cst_value (step
);
2516 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2517 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2518 || USE_STORE_PRE_INCREMENT (mem_mode
))
2519 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2520 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2521 || USE_STORE_PRE_DECREMENT (mem_mode
))
2522 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2524 enum tree_code code
= MINUS_EXPR
;
2526 tree new_step
= step
;
2528 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2530 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2531 code
= POINTER_PLUS_EXPR
;
2534 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2535 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2536 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2539 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2540 || USE_STORE_POST_INCREMENT (mem_mode
))
2541 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2542 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2543 || USE_STORE_POST_DECREMENT (mem_mode
))
2544 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2546 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2551 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2552 position to POS. If USE is not NULL, the candidate is set as related to
2553 it. The candidate computation is scheduled on all available positions. */
2556 add_candidate (struct ivopts_data
*data
,
2557 tree base
, tree step
, bool important
, struct iv_use
*use
)
2559 if (ip_normal_pos (data
->current_loop
))
2560 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2561 if (ip_end_pos (data
->current_loop
)
2562 && allow_ip_end_pos_p (data
->current_loop
))
2563 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2565 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2566 add_autoinc_candidates (data
, base
, step
, important
, use
);
2569 /* Adds standard iv candidates. */
2572 add_standard_iv_candidates (struct ivopts_data
*data
)
2574 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2576 /* The same for a double-integer type if it is still fast enough. */
2578 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2579 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2580 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2581 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2583 /* The same for a double-integer type if it is still fast enough. */
2585 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2586 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2587 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2588 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2592 /* Adds candidates bases on the old induction variable IV. */
2595 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2599 struct iv_cand
*cand
;
2601 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2603 /* The same, but with initial value zero. */
2604 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2605 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2607 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2608 iv
->step
, true, NULL
);
2610 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2611 if (gimple_code (phi
) == GIMPLE_PHI
)
2613 /* Additionally record the possibility of leaving the original iv
2615 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2616 /* Don't add candidate if it's from another PHI node because
2617 it's an affine iv appearing in the form of PEELED_CHREC. */
2618 phi
= SSA_NAME_DEF_STMT (def
);
2619 if (gimple_code (phi
) != GIMPLE_PHI
)
2621 cand
= add_candidate_1 (data
,
2622 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2623 SSA_NAME_DEF_STMT (def
));
2624 cand
->var_before
= iv
->ssa_name
;
2625 cand
->var_after
= def
;
2628 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2632 /* Adds candidates based on the old induction variables. */
2635 add_old_ivs_candidates (struct ivopts_data
*data
)
2641 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2643 iv
= ver_info (data
, i
)->iv
;
2644 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2645 add_old_iv_candidates (data
, iv
);
2649 /* Adds candidates based on the value of the induction variable IV and USE. */
2652 add_iv_value_candidates (struct ivopts_data
*data
,
2653 struct iv
*iv
, struct iv_use
*use
)
2655 unsigned HOST_WIDE_INT offset
;
2659 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2661 /* The same, but with initial value zero. Make such variable important,
2662 since it is generic enough so that possibly many uses may be based
2664 basetype
= TREE_TYPE (iv
->base
);
2665 if (POINTER_TYPE_P (basetype
))
2666 basetype
= sizetype
;
2667 add_candidate (data
, build_int_cst (basetype
, 0),
2668 iv
->step
, true, use
);
2670 /* Third, try removing the constant offset. Make sure to even
2671 add a candidate for &a[0] vs. (T *)&a. */
2672 base
= strip_offset (iv
->base
, &offset
);
2674 || base
!= iv
->base
)
2675 add_candidate (data
, base
, iv
->step
, false, use
);
2678 /* Adds candidates based on the uses. */
2681 add_derived_ivs_candidates (struct ivopts_data
*data
)
2685 for (i
= 0; i
< n_iv_uses (data
); i
++)
2687 struct iv_use
*use
= iv_use (data
, i
);
2694 case USE_NONLINEAR_EXPR
:
2697 /* Just add the ivs based on the value of the iv used here. */
2698 add_iv_value_candidates (data
, use
->iv
, use
);
2707 /* Record important candidates and add them to related_cands bitmaps
2711 record_important_candidates (struct ivopts_data
*data
)
2716 for (i
= 0; i
< n_iv_cands (data
); i
++)
2718 struct iv_cand
*cand
= iv_cand (data
, i
);
2720 if (cand
->important
)
2721 bitmap_set_bit (data
->important_candidates
, i
);
2724 data
->consider_all_candidates
= (n_iv_cands (data
)
2725 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2727 if (data
->consider_all_candidates
)
2729 /* We will not need "related_cands" bitmaps in this case,
2730 so release them to decrease peak memory consumption. */
2731 for (i
= 0; i
< n_iv_uses (data
); i
++)
2733 use
= iv_use (data
, i
);
2734 BITMAP_FREE (use
->related_cands
);
2739 /* Add important candidates to the related_cands bitmaps. */
2740 for (i
= 0; i
< n_iv_uses (data
); i
++)
2741 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2742 data
->important_candidates
);
2746 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2747 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2748 we allocate a simple list to every use. */
2751 alloc_use_cost_map (struct ivopts_data
*data
)
2753 unsigned i
, size
, s
;
2755 for (i
= 0; i
< n_iv_uses (data
); i
++)
2757 struct iv_use
*use
= iv_use (data
, i
);
2759 if (data
->consider_all_candidates
)
2760 size
= n_iv_cands (data
);
2763 s
= bitmap_count_bits (use
->related_cands
);
2765 /* Round up to the power of two, so that moduling by it is fast. */
2766 size
= s
? (1 << ceil_log2 (s
)) : 1;
2769 use
->n_map_members
= size
;
2770 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2774 /* Returns description of computation cost of expression whose runtime
2775 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2778 new_cost (unsigned runtime
, unsigned complexity
)
2782 cost
.cost
= runtime
;
2783 cost
.complexity
= complexity
;
2788 /* Adds costs COST1 and COST2. */
2791 add_costs (comp_cost cost1
, comp_cost cost2
)
2793 cost1
.cost
+= cost2
.cost
;
2794 cost1
.complexity
+= cost2
.complexity
;
2798 /* Subtracts costs COST1 and COST2. */
2801 sub_costs (comp_cost cost1
, comp_cost cost2
)
2803 cost1
.cost
-= cost2
.cost
;
2804 cost1
.complexity
-= cost2
.complexity
;
2809 /* Returns a negative number if COST1 < COST2, a positive number if
2810 COST1 > COST2, and 0 if COST1 = COST2. */
2813 compare_costs (comp_cost cost1
, comp_cost cost2
)
2815 if (cost1
.cost
== cost2
.cost
)
2816 return cost1
.complexity
- cost2
.complexity
;
2818 return cost1
.cost
- cost2
.cost
;
2821 /* Returns true if COST is infinite. */
2824 infinite_cost_p (comp_cost cost
)
2826 return cost
.cost
== INFTY
;
2829 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2830 on invariants DEPENDS_ON and that the value used in expressing it
2831 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2834 set_use_iv_cost (struct ivopts_data
*data
,
2835 struct iv_use
*use
, struct iv_cand
*cand
,
2836 comp_cost cost
, bitmap depends_on
, tree value
,
2837 enum tree_code comp
, int inv_expr_id
)
2841 if (infinite_cost_p (cost
))
2843 BITMAP_FREE (depends_on
);
2847 if (data
->consider_all_candidates
)
2849 use
->cost_map
[cand
->id
].cand
= cand
;
2850 use
->cost_map
[cand
->id
].cost
= cost
;
2851 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2852 use
->cost_map
[cand
->id
].value
= value
;
2853 use
->cost_map
[cand
->id
].comp
= comp
;
2854 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s
= cand
->id
& (use
->n_map_members
- 1);
2860 for (i
= s
; i
< use
->n_map_members
; i
++)
2861 if (!use
->cost_map
[i
].cand
)
2863 for (i
= 0; i
< s
; i
++)
2864 if (!use
->cost_map
[i
].cand
)
2870 use
->cost_map
[i
].cand
= cand
;
2871 use
->cost_map
[i
].cost
= cost
;
2872 use
->cost_map
[i
].depends_on
= depends_on
;
2873 use
->cost_map
[i
].value
= value
;
2874 use
->cost_map
[i
].comp
= comp
;
2875 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2878 /* Gets cost of (USE, CANDIDATE) pair. */
2880 static struct cost_pair
*
2881 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2882 struct iv_cand
*cand
)
2885 struct cost_pair
*ret
;
2890 if (data
->consider_all_candidates
)
2892 ret
= use
->cost_map
+ cand
->id
;
2899 /* n_map_members is a power of two, so this computes modulo. */
2900 s
= cand
->id
& (use
->n_map_members
- 1);
2901 for (i
= s
; i
< use
->n_map_members
; i
++)
2902 if (use
->cost_map
[i
].cand
== cand
)
2903 return use
->cost_map
+ i
;
2904 else if (use
->cost_map
[i
].cand
== NULL
)
2906 for (i
= 0; i
< s
; i
++)
2907 if (use
->cost_map
[i
].cand
== cand
)
2908 return use
->cost_map
+ i
;
2909 else if (use
->cost_map
[i
].cand
== NULL
)
2915 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2917 produce_memory_decl_rtl (tree obj
, int *regno
)
2919 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2920 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2924 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2926 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2927 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2928 SET_SYMBOL_REF_DECL (x
, obj
);
2929 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2930 set_mem_addr_space (x
, as
);
2931 targetm
.encode_section_info (obj
, x
, true);
2935 x
= gen_raw_REG (address_mode
, (*regno
)++);
2936 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2937 set_mem_addr_space (x
, as
);
2943 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2944 walk_tree. DATA contains the actual fake register number. */
2947 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2949 tree obj
= NULL_TREE
;
2951 int *regno
= (int *) data
;
2953 switch (TREE_CODE (*expr_p
))
2956 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2957 handled_component_p (*expr_p
);
2958 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2961 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2962 x
= produce_memory_decl_rtl (obj
, regno
);
2967 obj
= SSA_NAME_VAR (*expr_p
);
2968 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2971 if (!DECL_RTL_SET_P (obj
))
2972 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2981 if (DECL_RTL_SET_P (obj
))
2984 if (DECL_MODE (obj
) == BLKmode
)
2985 x
= produce_memory_decl_rtl (obj
, regno
);
2987 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2997 decl_rtl_to_reset
.safe_push (obj
);
2998 SET_DECL_RTL (obj
, x
);
3004 /* Determines cost of the computation of EXPR. */
3007 computation_cost (tree expr
, bool speed
)
3011 tree type
= TREE_TYPE (expr
);
3013 /* Avoid using hard regs in ways which may be unsupported. */
3014 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3015 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3016 enum node_frequency real_frequency
= node
->frequency
;
3018 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3019 crtl
->maybe_hot_insn_p
= speed
;
3020 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3022 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3025 default_rtl_profile ();
3026 node
->frequency
= real_frequency
;
3028 cost
= seq_cost (seq
, speed
);
3030 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3031 TYPE_ADDR_SPACE (type
), speed
);
3032 else if (!REG_P (rslt
))
3033 cost
+= set_src_cost (rslt
, speed
);
3038 /* Returns variable containing the value of candidate CAND at statement AT. */
3041 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3043 if (stmt_after_increment (loop
, cand
, stmt
))
3044 return cand
->var_after
;
3046 return cand
->var_before
;
3049 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3050 same precision that is at least as wide as the precision of TYPE, stores
3051 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3055 determine_common_wider_type (tree
*a
, tree
*b
)
3057 tree wider_type
= NULL
;
3059 tree atype
= TREE_TYPE (*a
);
3061 if (CONVERT_EXPR_P (*a
))
3063 suba
= TREE_OPERAND (*a
, 0);
3064 wider_type
= TREE_TYPE (suba
);
3065 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3071 if (CONVERT_EXPR_P (*b
))
3073 subb
= TREE_OPERAND (*b
, 0);
3074 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3085 /* Determines the expression by that USE is expressed from induction variable
3086 CAND at statement AT in LOOP. The expression is stored in a decomposed
3087 form into AFF. Returns false if USE cannot be expressed using CAND. */
3090 get_computation_aff (struct loop
*loop
,
3091 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3092 struct aff_tree
*aff
)
3094 tree ubase
= use
->iv
->base
;
3095 tree ustep
= use
->iv
->step
;
3096 tree cbase
= cand
->iv
->base
;
3097 tree cstep
= cand
->iv
->step
, cstep_common
;
3098 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3099 tree common_type
, var
;
3101 aff_tree cbase_aff
, var_aff
;
3104 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3106 /* We do not have a precision to express the values of use. */
3110 var
= var_at_stmt (loop
, cand
, at
);
3111 uutype
= unsigned_type_for (utype
);
3113 /* If the conversion is not noop, perform it. */
3114 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3116 cstep
= fold_convert (uutype
, cstep
);
3117 cbase
= fold_convert (uutype
, cbase
);
3118 var
= fold_convert (uutype
, var
);
3121 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3124 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3125 type, we achieve better folding by computing their difference in this
3126 wider type, and cast the result to UUTYPE. We do not need to worry about
3127 overflows, as all the arithmetics will in the end be performed in UUTYPE
3129 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3131 /* use = ubase - ratio * cbase + ratio * var. */
3132 tree_to_aff_combination (ubase
, common_type
, aff
);
3133 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3134 tree_to_aff_combination (var
, uutype
, &var_aff
);
3136 /* We need to shift the value if we are after the increment. */
3137 if (stmt_after_increment (loop
, cand
, at
))
3141 if (common_type
!= uutype
)
3142 cstep_common
= fold_convert (common_type
, cstep
);
3144 cstep_common
= cstep
;
3146 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3147 aff_combination_add (&cbase_aff
, &cstep_aff
);
3150 aff_combination_scale (&cbase_aff
, -rat
);
3151 aff_combination_add (aff
, &cbase_aff
);
3152 if (common_type
!= uutype
)
3153 aff_combination_convert (aff
, uutype
);
3155 aff_combination_scale (&var_aff
, rat
);
3156 aff_combination_add (aff
, &var_aff
);
3161 /* Return the type of USE. */
3164 get_use_type (struct iv_use
*use
)
3166 tree base_type
= TREE_TYPE (use
->iv
->base
);
3169 if (use
->type
== USE_ADDRESS
)
3171 /* The base_type may be a void pointer. Create a pointer type based on
3172 the mem_ref instead. */
3173 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3174 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3175 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3183 /* Determines the expression by that USE is expressed from induction variable
3184 CAND at statement AT in LOOP. The computation is unshared. */
3187 get_computation_at (struct loop
*loop
,
3188 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3191 tree type
= get_use_type (use
);
3193 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3195 unshare_aff_combination (&aff
);
3196 return fold_convert (type
, aff_combination_to_tree (&aff
));
3199 /* Determines the expression by that USE is expressed from induction variable
3200 CAND in LOOP. The computation is unshared. */
3203 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3205 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3208 /* Adjust the cost COST for being in loop setup rather than loop body.
3209 If we're optimizing for space, the loop setup overhead is constant;
3210 if we're optimizing for speed, amortize it over the per-iteration cost. */
3212 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3216 else if (optimize_loop_for_speed_p (data
->current_loop
))
3217 return cost
/ avg_loop_niter (data
->current_loop
);
3222 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3223 validity for a memory reference accessing memory of mode MODE in
3224 address space AS. */
3228 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3231 #define MAX_RATIO 128
3232 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3233 static vec
<sbitmap
> valid_mult_list
;
3236 if (data_index
>= valid_mult_list
.length ())
3237 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3239 valid_mult
= valid_mult_list
[data_index
];
3242 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3243 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3244 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3248 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3249 bitmap_clear (valid_mult
);
3250 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3251 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3252 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3254 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3255 if (memory_address_addr_space_p (mode
, addr
, as
)
3256 || memory_address_addr_space_p (mode
, scaled
, as
))
3257 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3260 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3262 fprintf (dump_file
, " allowed multipliers:");
3263 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3264 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3265 fprintf (dump_file
, " %d", (int) i
);
3266 fprintf (dump_file
, "\n");
3267 fprintf (dump_file
, "\n");
3270 valid_mult_list
[data_index
] = valid_mult
;
3273 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3276 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3279 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3280 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3281 variable is omitted. Compute the cost for a memory reference that accesses
3282 a memory location of mode MEM_MODE in address space AS.
3284 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3285 size of MEM_MODE / RATIO) is available. To make this determination, we
3286 look at the size of the increment to be made, which is given in CSTEP.
3287 CSTEP may be zero if the step is unknown.
3288 STMT_AFTER_INC is true iff the statement we're looking at is after the
3289 increment of the original biv.
3291 TODO -- there must be some better way. This all is quite crude. */
3295 AINC_PRE_INC
, /* Pre increment. */
3296 AINC_PRE_DEC
, /* Pre decrement. */
3297 AINC_POST_INC
, /* Post increment. */
3298 AINC_POST_DEC
, /* Post decrement. */
3299 AINC_NONE
/* Also the number of auto increment types. */
3302 typedef struct address_cost_data_s
3304 HOST_WIDE_INT min_offset
, max_offset
;
3305 unsigned costs
[2][2][2][2];
3306 unsigned ainc_costs
[AINC_NONE
];
3307 } *address_cost_data
;
3311 get_address_cost (bool symbol_present
, bool var_present
,
3312 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3313 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3314 addr_space_t as
, bool speed
,
3315 bool stmt_after_inc
, bool *may_autoinc
)
3317 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3318 static vec
<address_cost_data
> address_cost_data_list
;
3319 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3320 address_cost_data data
;
3321 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3322 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3323 unsigned cost
, acost
, complexity
;
3324 enum ainc_type autoinc_type
;
3325 bool offset_p
, ratio_p
, autoinc
;
3326 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3327 unsigned HOST_WIDE_INT mask
;
3330 if (data_index
>= address_cost_data_list
.length ())
3331 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3333 data
= address_cost_data_list
[data_index
];
3337 HOST_WIDE_INT rat
, off
= 0;
3338 int old_cse_not_expected
, width
;
3339 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3344 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3346 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3348 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3349 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3350 width
= HOST_BITS_PER_WIDE_INT
- 1;
3351 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3353 for (i
= width
; i
>= 0; i
--)
3355 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3356 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3357 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3360 data
->min_offset
= (i
== -1? 0 : off
);
3362 for (i
= width
; i
>= 0; i
--)
3364 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3365 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3366 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3368 /* For some strict-alignment targets, the offset must be naturally
3369 aligned. Try an aligned offset if mem_mode is not QImode. */
3370 off
= mem_mode
!= QImode
3371 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3372 - GET_MODE_SIZE (mem_mode
)
3376 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3377 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3383 data
->max_offset
= off
;
3385 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3387 fprintf (dump_file
, "get_address_cost:\n");
3388 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3389 GET_MODE_NAME (mem_mode
),
3391 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3392 GET_MODE_NAME (mem_mode
),
3397 for (i
= 2; i
<= MAX_RATIO
; i
++)
3398 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3404 /* Compute the cost of various addressing modes. */
3406 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3407 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3409 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3410 || USE_STORE_PRE_DECREMENT (mem_mode
))
3412 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3413 has_predec
[mem_mode
]
3414 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3416 if (has_predec
[mem_mode
])
3417 data
->ainc_costs
[AINC_PRE_DEC
]
3418 = address_cost (addr
, mem_mode
, as
, speed
);
3420 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3421 || USE_STORE_POST_DECREMENT (mem_mode
))
3423 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3424 has_postdec
[mem_mode
]
3425 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3427 if (has_postdec
[mem_mode
])
3428 data
->ainc_costs
[AINC_POST_DEC
]
3429 = address_cost (addr
, mem_mode
, as
, speed
);
3431 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3432 || USE_STORE_PRE_DECREMENT (mem_mode
))
3434 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3435 has_preinc
[mem_mode
]
3436 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3438 if (has_preinc
[mem_mode
])
3439 data
->ainc_costs
[AINC_PRE_INC
]
3440 = address_cost (addr
, mem_mode
, as
, speed
);
3442 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3443 || USE_STORE_POST_INCREMENT (mem_mode
))
3445 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3446 has_postinc
[mem_mode
]
3447 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3449 if (has_postinc
[mem_mode
])
3450 data
->ainc_costs
[AINC_POST_INC
]
3451 = address_cost (addr
, mem_mode
, as
, speed
);
3453 for (i
= 0; i
< 16; i
++)
3456 var_p
= (i
>> 1) & 1;
3457 off_p
= (i
>> 2) & 1;
3458 rat_p
= (i
>> 3) & 1;
3462 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3463 gen_int_mode (rat
, address_mode
));
3466 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3470 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3471 /* ??? We can run into trouble with some backends by presenting
3472 it with symbols which haven't been properly passed through
3473 targetm.encode_section_info. By setting the local bit, we
3474 enhance the probability of things working. */
3475 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3478 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3480 (PLUS
, address_mode
, base
,
3481 gen_int_mode (off
, address_mode
)));
3484 base
= gen_int_mode (off
, address_mode
);
3489 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3492 /* To avoid splitting addressing modes, pretend that no cse will
3494 old_cse_not_expected
= cse_not_expected
;
3495 cse_not_expected
= true;
3496 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3497 cse_not_expected
= old_cse_not_expected
;
3501 acost
= seq_cost (seq
, speed
);
3502 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3506 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3509 /* On some targets, it is quite expensive to load symbol to a register,
3510 which makes addresses that contain symbols look much more expensive.
3511 However, the symbol will have to be loaded in any case before the
3512 loop (and quite likely we have it in register already), so it does not
3513 make much sense to penalize them too heavily. So make some final
3514 tweaks for the SYMBOL_PRESENT modes:
3516 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3517 var is cheaper, use this mode with small penalty.
3518 If VAR_PRESENT is true, try whether the mode with
3519 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3520 if this is the case, use it. */
3521 add_c
= add_cost (speed
, address_mode
);
3522 for (i
= 0; i
< 8; i
++)
3525 off_p
= (i
>> 1) & 1;
3526 rat_p
= (i
>> 2) & 1;
3528 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3532 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3533 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3538 fprintf (dump_file
, "Address costs:\n");
3540 for (i
= 0; i
< 16; i
++)
3543 var_p
= (i
>> 1) & 1;
3544 off_p
= (i
>> 2) & 1;
3545 rat_p
= (i
>> 3) & 1;
3547 fprintf (dump_file
, " ");
3549 fprintf (dump_file
, "sym + ");
3551 fprintf (dump_file
, "var + ");
3553 fprintf (dump_file
, "cst + ");
3555 fprintf (dump_file
, "rat * ");
3557 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3558 fprintf (dump_file
, "index costs %d\n", acost
);
3560 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3561 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3562 fprintf (dump_file
, " May include autoinc/dec\n");
3563 fprintf (dump_file
, "\n");
3566 address_cost_data_list
[data_index
] = data
;
3569 bits
= GET_MODE_BITSIZE (address_mode
);
3570 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3572 if ((offset
>> (bits
- 1) & 1))
3577 autoinc_type
= AINC_NONE
;
3578 msize
= GET_MODE_SIZE (mem_mode
);
3579 autoinc_offset
= offset
;
3581 autoinc_offset
+= ratio
* cstep
;
3582 if (symbol_present
|| var_present
|| ratio
!= 1)
3586 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3588 autoinc_type
= AINC_POST_INC
;
3589 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3591 autoinc_type
= AINC_POST_DEC
;
3592 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3594 autoinc_type
= AINC_PRE_INC
;
3595 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3597 autoinc_type
= AINC_PRE_DEC
;
3599 if (autoinc_type
!= AINC_NONE
)
3604 offset_p
= (s_offset
!= 0
3605 && data
->min_offset
<= s_offset
3606 && s_offset
<= data
->max_offset
);
3607 ratio_p
= (ratio
!= 1
3608 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3610 if (ratio
!= 1 && !ratio_p
)
3611 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3613 if (s_offset
&& !offset_p
&& !symbol_present
)
3614 cost
+= add_cost (speed
, address_mode
);
3617 *may_autoinc
= autoinc
;
3619 acost
= data
->ainc_costs
[autoinc_type
];
3621 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3622 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3623 return new_cost (cost
+ acost
, complexity
);
3626 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3627 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3628 calculating the operands of EXPR. Returns true if successful, and returns
3629 the cost in COST. */
3632 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3633 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3636 tree op1
= TREE_OPERAND (expr
, 1);
3637 tree cst
= TREE_OPERAND (mult
, 1);
3638 tree multop
= TREE_OPERAND (mult
, 0);
3639 int m
= exact_log2 (int_cst_value (cst
));
3640 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3641 int as_cost
, sa_cost
;
3644 if (!(m
>= 0 && m
< maxm
))
3647 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3649 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3651 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3652 use that in preference to a shift insn followed by an add insn. */
3653 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3654 ? shiftadd_cost (speed
, mode
, m
)
3656 ? shiftsub1_cost (speed
, mode
, m
)
3657 : shiftsub0_cost (speed
, mode
, m
)));
3659 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3660 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3662 STRIP_NOPS (multop
);
3663 if (!is_gimple_val (multop
))
3664 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3670 /* Estimates cost of forcing expression EXPR into a variable. */
3673 force_expr_to_var_cost (tree expr
, bool speed
)
3675 static bool costs_initialized
= false;
3676 static unsigned integer_cost
[2];
3677 static unsigned symbol_cost
[2];
3678 static unsigned address_cost
[2];
3680 comp_cost cost0
, cost1
, cost
;
3683 if (!costs_initialized
)
3685 tree type
= build_pointer_type (integer_type_node
);
3690 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3691 TREE_STATIC (var
) = 1;
3692 x
= produce_memory_decl_rtl (var
, NULL
);
3693 SET_DECL_RTL (var
, x
);
3695 addr
= build1 (ADDR_EXPR
, type
, var
);
3698 for (i
= 0; i
< 2; i
++)
3700 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3703 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3706 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3707 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3709 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3710 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3711 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3712 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3713 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3714 fprintf (dump_file
, "\n");
3718 costs_initialized
= true;
3723 if (SSA_VAR_P (expr
))
3726 if (is_gimple_min_invariant (expr
))
3728 if (TREE_CODE (expr
) == INTEGER_CST
)
3729 return new_cost (integer_cost
[speed
], 0);
3731 if (TREE_CODE (expr
) == ADDR_EXPR
)
3733 tree obj
= TREE_OPERAND (expr
, 0);
3735 if (TREE_CODE (obj
) == VAR_DECL
3736 || TREE_CODE (obj
) == PARM_DECL
3737 || TREE_CODE (obj
) == RESULT_DECL
)
3738 return new_cost (symbol_cost
[speed
], 0);
3741 return new_cost (address_cost
[speed
], 0);
3744 switch (TREE_CODE (expr
))
3746 case POINTER_PLUS_EXPR
:
3750 op0
= TREE_OPERAND (expr
, 0);
3751 op1
= TREE_OPERAND (expr
, 1);
3758 op0
= TREE_OPERAND (expr
, 0);
3764 /* Just an arbitrary value, FIXME. */
3765 return new_cost (target_spill_cost
[speed
], 0);
3768 if (op0
== NULL_TREE
3769 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3772 cost0
= force_expr_to_var_cost (op0
, speed
);
3774 if (op1
== NULL_TREE
3775 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3778 cost1
= force_expr_to_var_cost (op1
, speed
);
3780 mode
= TYPE_MODE (TREE_TYPE (expr
));
3781 switch (TREE_CODE (expr
))
3783 case POINTER_PLUS_EXPR
:
3787 cost
= new_cost (add_cost (speed
, mode
), 0);
3788 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3790 tree mult
= NULL_TREE
;
3792 if (TREE_CODE (op1
) == MULT_EXPR
)
3794 else if (TREE_CODE (op0
) == MULT_EXPR
)
3797 if (mult
!= NULL_TREE
3798 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3799 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3807 tree inner_mode
, outer_mode
;
3808 outer_mode
= TREE_TYPE (expr
);
3809 inner_mode
= TREE_TYPE (op0
);
3810 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3811 TYPE_MODE (inner_mode
), speed
), 0);
3816 if (cst_and_fits_in_hwi (op0
))
3817 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3819 else if (cst_and_fits_in_hwi (op1
))
3820 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3823 return new_cost (target_spill_cost
[speed
], 0);
3830 cost
= add_costs (cost
, cost0
);
3831 cost
= add_costs (cost
, cost1
);
3833 /* Bound the cost by target_spill_cost. The parts of complicated
3834 computations often are either loop invariant or at least can
3835 be shared between several iv uses, so letting this grow without
3836 limits would not give reasonable results. */
3837 if (cost
.cost
> (int) target_spill_cost
[speed
])
3838 cost
.cost
= target_spill_cost
[speed
];
3843 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3844 invariants the computation depends on. */
3847 force_var_cost (struct ivopts_data
*data
,
3848 tree expr
, bitmap
*depends_on
)
3852 fd_ivopts_data
= data
;
3853 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3856 return force_expr_to_var_cost (expr
, data
->speed
);
3859 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3860 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3861 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3862 invariants the computation depends on. */
3865 split_address_cost (struct ivopts_data
*data
,
3866 tree addr
, bool *symbol_present
, bool *var_present
,
3867 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3870 HOST_WIDE_INT bitsize
;
3871 HOST_WIDE_INT bitpos
;
3874 int unsignedp
, volatilep
;
3876 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3877 &unsignedp
, &volatilep
, false);
3880 || bitpos
% BITS_PER_UNIT
!= 0
3881 || TREE_CODE (core
) != VAR_DECL
)
3883 *symbol_present
= false;
3884 *var_present
= true;
3885 fd_ivopts_data
= data
;
3886 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3887 return new_cost (target_spill_cost
[data
->speed
], 0);
3890 *offset
+= bitpos
/ BITS_PER_UNIT
;
3891 if (TREE_STATIC (core
)
3892 || DECL_EXTERNAL (core
))
3894 *symbol_present
= true;
3895 *var_present
= false;
3899 *symbol_present
= false;
3900 *var_present
= true;
3904 /* Estimates cost of expressing difference of addresses E1 - E2 as
3905 var + symbol + offset. The value of offset is added to OFFSET,
3906 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3907 part is missing. DEPENDS_ON is a set of the invariants the computation
3911 ptr_difference_cost (struct ivopts_data
*data
,
3912 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3913 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3915 HOST_WIDE_INT diff
= 0;
3916 aff_tree aff_e1
, aff_e2
;
3919 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3921 if (ptr_difference_const (e1
, e2
, &diff
))
3924 *symbol_present
= false;
3925 *var_present
= false;
3929 if (integer_zerop (e2
))
3930 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3931 symbol_present
, var_present
, offset
, depends_on
);
3933 *symbol_present
= false;
3934 *var_present
= true;
3936 type
= signed_type_for (TREE_TYPE (e1
));
3937 tree_to_aff_combination (e1
, type
, &aff_e1
);
3938 tree_to_aff_combination (e2
, type
, &aff_e2
);
3939 aff_combination_scale (&aff_e2
, -1);
3940 aff_combination_add (&aff_e1
, &aff_e2
);
3942 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3945 /* Estimates cost of expressing difference E1 - E2 as
3946 var + symbol + offset. The value of offset is added to OFFSET,
3947 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3948 part is missing. DEPENDS_ON is a set of the invariants the computation
3952 difference_cost (struct ivopts_data
*data
,
3953 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3954 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3956 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3957 unsigned HOST_WIDE_INT off1
, off2
;
3958 aff_tree aff_e1
, aff_e2
;
3961 e1
= strip_offset (e1
, &off1
);
3962 e2
= strip_offset (e2
, &off2
);
3963 *offset
+= off1
- off2
;
3968 if (TREE_CODE (e1
) == ADDR_EXPR
)
3969 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3970 offset
, depends_on
);
3971 *symbol_present
= false;
3973 if (operand_equal_p (e1
, e2
, 0))
3975 *var_present
= false;
3979 *var_present
= true;
3981 if (integer_zerop (e2
))
3982 return force_var_cost (data
, e1
, depends_on
);
3984 if (integer_zerop (e1
))
3986 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3987 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3991 type
= signed_type_for (TREE_TYPE (e1
));
3992 tree_to_aff_combination (e1
, type
, &aff_e1
);
3993 tree_to_aff_combination (e2
, type
, &aff_e2
);
3994 aff_combination_scale (&aff_e2
, -1);
3995 aff_combination_add (&aff_e1
, &aff_e2
);
3997 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4000 /* Returns true if AFF1 and AFF2 are identical. */
4003 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4007 if (aff1
->n
!= aff2
->n
)
4010 for (i
= 0; i
< aff1
->n
; i
++)
4012 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4015 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4021 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4024 get_expr_id (struct ivopts_data
*data
, tree expr
)
4026 struct iv_inv_expr_ent ent
;
4027 struct iv_inv_expr_ent
**slot
;
4030 ent
.hash
= iterative_hash_expr (expr
, 0);
4031 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4035 *slot
= XNEW (struct iv_inv_expr_ent
);
4036 (*slot
)->expr
= expr
;
4037 (*slot
)->hash
= ent
.hash
;
4038 (*slot
)->id
= data
->inv_expr_id
++;
4042 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4043 requires a new compiler generated temporary. Returns -1 otherwise.
4044 ADDRESS_P is a flag indicating if the expression is for address
4048 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4049 tree cbase
, HOST_WIDE_INT ratio
,
4052 aff_tree ubase_aff
, cbase_aff
;
4060 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4061 && (TREE_CODE (cbase
) == INTEGER_CST
))
4064 /* Strips the constant part. */
4065 if (TREE_CODE (ubase
) == PLUS_EXPR
4066 || TREE_CODE (ubase
) == MINUS_EXPR
4067 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4069 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4070 ubase
= TREE_OPERAND (ubase
, 0);
4073 /* Strips the constant part. */
4074 if (TREE_CODE (cbase
) == PLUS_EXPR
4075 || TREE_CODE (cbase
) == MINUS_EXPR
4076 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4078 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4079 cbase
= TREE_OPERAND (cbase
, 0);
4084 if (((TREE_CODE (ubase
) == SSA_NAME
)
4085 || (TREE_CODE (ubase
) == ADDR_EXPR
4086 && is_gimple_min_invariant (ubase
)))
4087 && (TREE_CODE (cbase
) == INTEGER_CST
))
4090 if (((TREE_CODE (cbase
) == SSA_NAME
)
4091 || (TREE_CODE (cbase
) == ADDR_EXPR
4092 && is_gimple_min_invariant (cbase
)))
4093 && (TREE_CODE (ubase
) == INTEGER_CST
))
4099 if (operand_equal_p (ubase
, cbase
, 0))
4102 if (TREE_CODE (ubase
) == ADDR_EXPR
4103 && TREE_CODE (cbase
) == ADDR_EXPR
)
4107 usym
= TREE_OPERAND (ubase
, 0);
4108 csym
= TREE_OPERAND (cbase
, 0);
4109 if (TREE_CODE (usym
) == ARRAY_REF
)
4111 tree ind
= TREE_OPERAND (usym
, 1);
4112 if (TREE_CODE (ind
) == INTEGER_CST
4113 && tree_fits_shwi_p (ind
)
4114 && tree_to_shwi (ind
) == 0)
4115 usym
= TREE_OPERAND (usym
, 0);
4117 if (TREE_CODE (csym
) == ARRAY_REF
)
4119 tree ind
= TREE_OPERAND (csym
, 1);
4120 if (TREE_CODE (ind
) == INTEGER_CST
4121 && tree_fits_shwi_p (ind
)
4122 && tree_to_shwi (ind
) == 0)
4123 csym
= TREE_OPERAND (csym
, 0);
4125 if (operand_equal_p (usym
, csym
, 0))
4128 /* Now do more complex comparison */
4129 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4130 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4131 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4135 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4136 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4138 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4139 aff_combination_add (&ubase_aff
, &cbase_aff
);
4140 expr
= aff_combination_to_tree (&ubase_aff
);
4141 return get_expr_id (data
, expr
);
4146 /* Determines the cost of the computation by that USE is expressed
4147 from induction variable CAND. If ADDRESS_P is true, we just need
4148 to create an address from it, otherwise we want to get it into
4149 register. A set of invariants we depend on is stored in
4150 DEPENDS_ON. AT is the statement at that the value is computed.
4151 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4152 addressing is likely. */
4155 get_computation_cost_at (struct ivopts_data
*data
,
4156 struct iv_use
*use
, struct iv_cand
*cand
,
4157 bool address_p
, bitmap
*depends_on
, gimple at
,
4161 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4163 tree utype
= TREE_TYPE (ubase
), ctype
;
4164 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4165 HOST_WIDE_INT ratio
, aratio
;
4166 bool var_present
, symbol_present
, stmt_is_after_inc
;
4169 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4170 machine_mode mem_mode
= (address_p
4171 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4176 /* Only consider real candidates. */
4178 return infinite_cost
;
4180 cbase
= cand
->iv
->base
;
4181 cstep
= cand
->iv
->step
;
4182 ctype
= TREE_TYPE (cbase
);
4184 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4186 /* We do not have a precision to express the values of use. */
4187 return infinite_cost
;
4191 || (use
->iv
->base_object
4192 && cand
->iv
->base_object
4193 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4194 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4196 /* Do not try to express address of an object with computation based
4197 on address of a different object. This may cause problems in rtl
4198 level alias analysis (that does not expect this to be happening,
4199 as this is illegal in C), and would be unlikely to be useful
4201 if (use
->iv
->base_object
4202 && cand
->iv
->base_object
4203 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4204 return infinite_cost
;
4207 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4209 /* TODO -- add direct handling of this case. */
4213 /* CSTEPI is removed from the offset in case statement is after the
4214 increment. If the step is not constant, we use zero instead.
4215 This is a bit imprecise (there is the extra addition), but
4216 redundancy elimination is likely to transform the code so that
4217 it uses value of the variable before increment anyway,
4218 so it is not that much unrealistic. */
4219 if (cst_and_fits_in_hwi (cstep
))
4220 cstepi
= int_cst_value (cstep
);
4224 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4225 return infinite_cost
;
4227 if (wi::fits_shwi_p (rat
))
4228 ratio
= rat
.to_shwi ();
4230 return infinite_cost
;
4233 ctype
= TREE_TYPE (cbase
);
4235 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4237 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4238 or ratio == 1, it is better to handle this like
4240 ubase - ratio * cbase + ratio * var
4242 (also holds in the case ratio == -1, TODO. */
4244 if (cst_and_fits_in_hwi (cbase
))
4246 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4247 cost
= difference_cost (data
,
4248 ubase
, build_int_cst (utype
, 0),
4249 &symbol_present
, &var_present
, &offset
,
4251 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4253 else if (ratio
== 1)
4255 tree real_cbase
= cbase
;
4257 /* Check to see if any adjustment is needed. */
4258 if (cstepi
== 0 && stmt_is_after_inc
)
4260 aff_tree real_cbase_aff
;
4263 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4265 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4267 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4268 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4271 cost
= difference_cost (data
,
4273 &symbol_present
, &var_present
, &offset
,
4275 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4278 && !POINTER_TYPE_P (ctype
)
4279 && multiplier_allowed_in_address_p
4281 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4284 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4285 cost
= difference_cost (data
,
4287 &symbol_present
, &var_present
, &offset
,
4289 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4293 cost
= force_var_cost (data
, cbase
, depends_on
);
4294 cost
= add_costs (cost
,
4295 difference_cost (data
,
4296 ubase
, build_int_cst (utype
, 0),
4297 &symbol_present
, &var_present
,
4298 &offset
, depends_on
));
4299 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4300 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4306 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4307 /* Clear depends on. */
4308 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4309 bitmap_clear (*depends_on
);
4312 /* If we are after the increment, the value of the candidate is higher by
4314 if (stmt_is_after_inc
)
4315 offset
-= ratio
* cstepi
;
4317 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4318 (symbol/var1/const parts may be omitted). If we are looking for an
4319 address, find the cost of addressing this. */
4321 return add_costs (cost
,
4322 get_address_cost (symbol_present
, var_present
,
4323 offset
, ratio
, cstepi
,
4325 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4326 speed
, stmt_is_after_inc
,
4329 /* Otherwise estimate the costs for computing the expression. */
4330 if (!symbol_present
&& !var_present
&& !offset
)
4333 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4337 /* Symbol + offset should be compile-time computable so consider that they
4338 are added once to the variable, if present. */
4339 if (var_present
&& (symbol_present
|| offset
))
4340 cost
.cost
+= adjust_setup_cost (data
,
4341 add_cost (speed
, TYPE_MODE (ctype
)));
4343 /* Having offset does not affect runtime cost in case it is added to
4344 symbol, but it increases complexity. */
4348 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4350 aratio
= ratio
> 0 ? ratio
: -ratio
;
4352 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4357 *can_autoinc
= false;
4360 /* Just get the expression, expand it and measure the cost. */
4361 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4364 return infinite_cost
;
4367 comp
= build_simple_mem_ref (comp
);
4369 return new_cost (computation_cost (comp
, speed
), 0);
4373 /* Determines the cost of the computation by that USE is expressed
4374 from induction variable CAND. If ADDRESS_P is true, we just need
4375 to create an address from it, otherwise we want to get it into
4376 register. A set of invariants we depend on is stored in
4377 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4378 autoinc addressing is likely. */
4381 get_computation_cost (struct ivopts_data
*data
,
4382 struct iv_use
*use
, struct iv_cand
*cand
,
4383 bool address_p
, bitmap
*depends_on
,
4384 bool *can_autoinc
, int *inv_expr_id
)
4386 return get_computation_cost_at (data
,
4387 use
, cand
, address_p
, depends_on
, use
->stmt
,
4388 can_autoinc
, inv_expr_id
);
4391 /* Determines cost of basing replacement of USE on CAND in a generic
4395 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4396 struct iv_use
*use
, struct iv_cand
*cand
)
4400 int inv_expr_id
= -1;
4402 /* The simple case first -- if we need to express value of the preserved
4403 original biv, the cost is 0. This also prevents us from counting the
4404 cost of increment twice -- once at this use and once in the cost of
4406 if (cand
->pos
== IP_ORIGINAL
4407 && cand
->incremented_at
== use
->stmt
)
4409 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4414 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4415 NULL
, &inv_expr_id
);
4417 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4420 return !infinite_cost_p (cost
);
4423 /* Determines cost of basing replacement of USE on CAND in an address. */
4426 determine_use_iv_cost_address (struct ivopts_data
*data
,
4427 struct iv_use
*use
, struct iv_cand
*cand
)
4431 int inv_expr_id
= -1;
4432 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4433 &can_autoinc
, &inv_expr_id
);
4435 if (cand
->ainc_use
== use
)
4438 cost
.cost
-= cand
->cost_step
;
4439 /* If we generated the candidate solely for exploiting autoincrement
4440 opportunities, and it turns out it can't be used, set the cost to
4441 infinity to make sure we ignore it. */
4442 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4443 cost
= infinite_cost
;
4445 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4448 return !infinite_cost_p (cost
);
4451 /* Computes value of candidate CAND at position AT in iteration NITER, and
4452 stores it to VAL. */
4455 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4458 aff_tree step
, delta
, nit
;
4459 struct iv
*iv
= cand
->iv
;
4460 tree type
= TREE_TYPE (iv
->base
);
4461 tree steptype
= type
;
4462 if (POINTER_TYPE_P (type
))
4463 steptype
= sizetype
;
4464 steptype
= unsigned_type_for (type
);
4466 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4467 aff_combination_convert (&step
, steptype
);
4468 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4469 aff_combination_convert (&nit
, steptype
);
4470 aff_combination_mult (&nit
, &step
, &delta
);
4471 if (stmt_after_increment (loop
, cand
, at
))
4472 aff_combination_add (&delta
, &step
);
4474 tree_to_aff_combination (iv
->base
, type
, val
);
4475 if (!POINTER_TYPE_P (type
))
4476 aff_combination_convert (val
, steptype
);
4477 aff_combination_add (val
, &delta
);
4480 /* Returns period of induction variable iv. */
4483 iv_period (struct iv
*iv
)
4485 tree step
= iv
->step
, period
, type
;
4488 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4490 type
= unsigned_type_for (TREE_TYPE (step
));
4491 /* Period of the iv is lcm (step, type_range)/step -1,
4492 i.e., N*type_range/step - 1. Since type range is power
4493 of two, N == (step >> num_of_ending_zeros_binary (step),
4494 so the final result is
4496 (type_range >> num_of_ending_zeros_binary (step)) - 1
4499 pow2div
= num_ending_zeros (step
);
4501 period
= build_low_bits_mask (type
,
4502 (TYPE_PRECISION (type
)
4503 - tree_to_uhwi (pow2div
)));
4508 /* Returns the comparison operator used when eliminating the iv USE. */
4510 static enum tree_code
4511 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4513 struct loop
*loop
= data
->current_loop
;
4517 ex_bb
= gimple_bb (use
->stmt
);
4518 exit
= EDGE_SUCC (ex_bb
, 0);
4519 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4520 exit
= EDGE_SUCC (ex_bb
, 1);
4522 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4525 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4526 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4527 calculation is performed in non-wrapping type.
4529 TODO: More generally, we could test for the situation that
4530 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4531 This would require knowing the sign of OFFSET. */
4534 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4536 enum tree_code code
;
4538 aff_tree aff_e1
, aff_e2
, aff_offset
;
4540 if (!nowrap_type_p (TREE_TYPE (base
)))
4543 base
= expand_simple_operations (base
);
4545 if (TREE_CODE (base
) == SSA_NAME
)
4547 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4549 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4552 code
= gimple_assign_rhs_code (stmt
);
4553 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4556 e1
= gimple_assign_rhs1 (stmt
);
4557 e2
= gimple_assign_rhs2 (stmt
);
4561 code
= TREE_CODE (base
);
4562 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4564 e1
= TREE_OPERAND (base
, 0);
4565 e2
= TREE_OPERAND (base
, 1);
4568 /* Use affine expansion as deeper inspection to prove the equality. */
4569 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4570 &aff_e2
, &data
->name_expansion_cache
);
4571 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4572 &aff_offset
, &data
->name_expansion_cache
);
4573 aff_combination_scale (&aff_offset
, -1);
4577 aff_combination_add (&aff_e2
, &aff_offset
);
4578 if (aff_combination_zero_p (&aff_e2
))
4581 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4582 &aff_e1
, &data
->name_expansion_cache
);
4583 aff_combination_add (&aff_e1
, &aff_offset
);
4584 return aff_combination_zero_p (&aff_e1
);
4586 case POINTER_PLUS_EXPR
:
4587 aff_combination_add (&aff_e2
, &aff_offset
);
4588 return aff_combination_zero_p (&aff_e2
);
4595 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4596 comparison with CAND. NITER describes the number of iterations of
4597 the loops. If successful, the comparison in COMP_P is altered accordingly.
4599 We aim to handle the following situation:
4615 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4616 We aim to optimize this to
4624 while (p < p_0 - a + b);
4626 This preserves the correctness, since the pointer arithmetics does not
4627 overflow. More precisely:
4629 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4630 overflow in computing it or the values of p.
4631 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4632 overflow. To prove this, we use the fact that p_0 = base + a. */
4635 iv_elimination_compare_lt (struct ivopts_data
*data
,
4636 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4637 struct tree_niter_desc
*niter
)
4639 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4640 struct aff_tree nit
, tmpa
, tmpb
;
4641 enum tree_code comp
;
4644 /* We need to know that the candidate induction variable does not overflow.
4645 While more complex analysis may be used to prove this, for now just
4646 check that the variable appears in the original program and that it
4647 is computed in a type that guarantees no overflows. */
4648 cand_type
= TREE_TYPE (cand
->iv
->base
);
4649 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4652 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4653 the calculation of the BOUND could overflow, making the comparison
4655 if (!data
->loop_single_exit_p
)
4658 /* We need to be able to decide whether candidate is increasing or decreasing
4659 in order to choose the right comparison operator. */
4660 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4662 step
= int_cst_value (cand
->iv
->step
);
4664 /* Check that the number of iterations matches the expected pattern:
4665 a + 1 > b ? 0 : b - a - 1. */
4666 mbz
= niter
->may_be_zero
;
4667 if (TREE_CODE (mbz
) == GT_EXPR
)
4669 /* Handle a + 1 > b. */
4670 tree op0
= TREE_OPERAND (mbz
, 0);
4671 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4673 a
= TREE_OPERAND (op0
, 0);
4674 b
= TREE_OPERAND (mbz
, 1);
4679 else if (TREE_CODE (mbz
) == LT_EXPR
)
4681 tree op1
= TREE_OPERAND (mbz
, 1);
4683 /* Handle b < a + 1. */
4684 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4686 a
= TREE_OPERAND (op1
, 0);
4687 b
= TREE_OPERAND (mbz
, 0);
4695 /* Expected number of iterations is B - A - 1. Check that it matches
4696 the actual number, i.e., that B - A - NITER = 1. */
4697 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4698 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4699 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4700 aff_combination_scale (&nit
, -1);
4701 aff_combination_scale (&tmpa
, -1);
4702 aff_combination_add (&tmpb
, &tmpa
);
4703 aff_combination_add (&tmpb
, &nit
);
4704 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4707 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4709 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4711 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4712 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4715 /* Determine the new comparison operator. */
4716 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4717 if (*comp_p
== NE_EXPR
)
4719 else if (*comp_p
== EQ_EXPR
)
4720 *comp_p
= invert_tree_comparison (comp
, false);
4727 /* Check whether it is possible to express the condition in USE by comparison
4728 of candidate CAND. If so, store the value compared with to BOUND, and the
4729 comparison operator to COMP. */
4732 may_eliminate_iv (struct ivopts_data
*data
,
4733 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4734 enum tree_code
*comp
)
4739 struct loop
*loop
= data
->current_loop
;
4741 struct tree_niter_desc
*desc
= NULL
;
4743 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4746 /* For now works only for exits that dominate the loop latch.
4747 TODO: extend to other conditions inside loop body. */
4748 ex_bb
= gimple_bb (use
->stmt
);
4749 if (use
->stmt
!= last_stmt (ex_bb
)
4750 || gimple_code (use
->stmt
) != GIMPLE_COND
4751 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4754 exit
= EDGE_SUCC (ex_bb
, 0);
4755 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4756 exit
= EDGE_SUCC (ex_bb
, 1);
4757 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4760 desc
= niter_for_exit (data
, exit
);
4764 /* Determine whether we can use the variable to test the exit condition.
4765 This is the case iff the period of the induction variable is greater
4766 than the number of iterations for which the exit condition is true. */
4767 period
= iv_period (cand
->iv
);
4769 /* If the number of iterations is constant, compare against it directly. */
4770 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4772 /* See cand_value_at. */
4773 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4775 if (!tree_int_cst_lt (desc
->niter
, period
))
4780 if (tree_int_cst_lt (period
, desc
->niter
))
4785 /* If not, and if this is the only possible exit of the loop, see whether
4786 we can get a conservative estimate on the number of iterations of the
4787 entire loop and compare against that instead. */
4790 widest_int period_value
, max_niter
;
4792 max_niter
= desc
->max
;
4793 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4795 period_value
= wi::to_widest (period
);
4796 if (wi::gtu_p (max_niter
, period_value
))
4798 /* See if we can take advantage of inferred loop bound information. */
4799 if (data
->loop_single_exit_p
)
4801 if (!max_loop_iterations (loop
, &max_niter
))
4803 /* The loop bound is already adjusted by adding 1. */
4804 if (wi::gtu_p (max_niter
, period_value
))
4812 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4814 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4815 aff_combination_to_tree (&bnd
));
4816 *comp
= iv_elimination_compare (data
, use
);
4818 /* It is unlikely that computing the number of iterations using division
4819 would be more profitable than keeping the original induction variable. */
4820 if (expression_expensive_p (*bound
))
4823 /* Sometimes, it is possible to handle the situation that the number of
4824 iterations may be zero unless additional assumtions by using <
4825 instead of != in the exit condition.
4827 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4828 base the exit condition on it. However, that is often too
4830 if (!integer_zerop (desc
->may_be_zero
))
4831 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4836 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4837 be copied, if is is used in the loop body and DATA->body_includes_call. */
4840 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4842 tree sbound
= bound
;
4843 STRIP_NOPS (sbound
);
4845 if (TREE_CODE (sbound
) == SSA_NAME
4846 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4847 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4848 && data
->body_includes_call
)
4849 return COSTS_N_INSNS (1);
4854 /* Determines cost of basing replacement of USE on CAND in a condition. */
4857 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4858 struct iv_use
*use
, struct iv_cand
*cand
)
4860 tree bound
= NULL_TREE
;
4862 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4863 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4865 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4866 tree
*control_var
, *bound_cst
;
4867 enum tree_code comp
= ERROR_MARK
;
4869 /* Only consider real candidates. */
4872 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4877 /* Try iv elimination. */
4878 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4880 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4881 if (elim_cost
.cost
== 0)
4882 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4883 else if (TREE_CODE (bound
) == INTEGER_CST
)
4885 /* If we replace a loop condition 'i < n' with 'p < base + n',
4886 depends_on_elim will have 'base' and 'n' set, which implies
4887 that both 'base' and 'n' will be live during the loop. More likely,
4888 'base + n' will be loop invariant, resulting in only one live value
4889 during the loop. So in that case we clear depends_on_elim and set
4890 elim_inv_expr_id instead. */
4891 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4893 elim_inv_expr_id
= get_expr_id (data
, bound
);
4894 bitmap_clear (depends_on_elim
);
4896 /* The bound is a loop invariant, so it will be only computed
4898 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4901 elim_cost
= infinite_cost
;
4903 /* Try expressing the original giv. If it is compared with an invariant,
4904 note that we cannot get rid of it. */
4905 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4909 /* When the condition is a comparison of the candidate IV against
4910 zero, prefer this IV.
4912 TODO: The constant that we're subtracting from the cost should
4913 be target-dependent. This information should be added to the
4914 target costs for each backend. */
4915 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4916 && integer_zerop (*bound_cst
)
4917 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4918 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4919 elim_cost
.cost
-= 1;
4921 express_cost
= get_computation_cost (data
, use
, cand
, false,
4922 &depends_on_express
, NULL
,
4923 &express_inv_expr_id
);
4924 fd_ivopts_data
= data
;
4925 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4927 /* Count the cost of the original bound as well. */
4928 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4929 if (bound_cost
.cost
== 0)
4930 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4931 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4932 bound_cost
.cost
= 0;
4933 express_cost
.cost
+= bound_cost
.cost
;
4935 /* Choose the better approach, preferring the eliminated IV. */
4936 if (compare_costs (elim_cost
, express_cost
) <= 0)
4939 depends_on
= depends_on_elim
;
4940 depends_on_elim
= NULL
;
4941 inv_expr_id
= elim_inv_expr_id
;
4945 cost
= express_cost
;
4946 depends_on
= depends_on_express
;
4947 depends_on_express
= NULL
;
4950 inv_expr_id
= express_inv_expr_id
;
4953 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4955 if (depends_on_elim
)
4956 BITMAP_FREE (depends_on_elim
);
4957 if (depends_on_express
)
4958 BITMAP_FREE (depends_on_express
);
4960 return !infinite_cost_p (cost
);
4963 /* Determines cost of basing replacement of USE on CAND. Returns false
4964 if USE cannot be based on CAND. */
4967 determine_use_iv_cost (struct ivopts_data
*data
,
4968 struct iv_use
*use
, struct iv_cand
*cand
)
4972 case USE_NONLINEAR_EXPR
:
4973 return determine_use_iv_cost_generic (data
, use
, cand
);
4976 return determine_use_iv_cost_address (data
, use
, cand
);
4979 return determine_use_iv_cost_condition (data
, use
, cand
);
4986 /* Return true if get_computation_cost indicates that autoincrement is
4987 a possibility for the pair of USE and CAND, false otherwise. */
4990 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4991 struct iv_cand
*cand
)
4997 if (use
->type
!= USE_ADDRESS
)
5000 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5001 &can_autoinc
, NULL
);
5003 BITMAP_FREE (depends_on
);
5005 return !infinite_cost_p (cost
) && can_autoinc
;
5008 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5009 use that allows autoincrement, and set their AINC_USE if possible. */
5012 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5016 for (i
= 0; i
< n_iv_cands (data
); i
++)
5018 struct iv_cand
*cand
= iv_cand (data
, i
);
5019 struct iv_use
*closest_before
= NULL
;
5020 struct iv_use
*closest_after
= NULL
;
5021 if (cand
->pos
!= IP_ORIGINAL
)
5024 for (j
= 0; j
< n_iv_uses (data
); j
++)
5026 struct iv_use
*use
= iv_use (data
, j
);
5027 unsigned uid
= gimple_uid (use
->stmt
);
5029 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5032 if (uid
< gimple_uid (cand
->incremented_at
)
5033 && (closest_before
== NULL
5034 || uid
> gimple_uid (closest_before
->stmt
)))
5035 closest_before
= use
;
5037 if (uid
> gimple_uid (cand
->incremented_at
)
5038 && (closest_after
== NULL
5039 || uid
< gimple_uid (closest_after
->stmt
)))
5040 closest_after
= use
;
5043 if (closest_before
!= NULL
5044 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5045 cand
->ainc_use
= closest_before
;
5046 else if (closest_after
!= NULL
5047 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5048 cand
->ainc_use
= closest_after
;
5052 /* Finds the candidates for the induction variables. */
5055 find_iv_candidates (struct ivopts_data
*data
)
5057 /* Add commonly used ivs. */
5058 add_standard_iv_candidates (data
);
5060 /* Add old induction variables. */
5061 add_old_ivs_candidates (data
);
5063 /* Add induction variables derived from uses. */
5064 add_derived_ivs_candidates (data
);
5066 set_autoinc_for_original_candidates (data
);
5068 /* Record the important candidates. */
5069 record_important_candidates (data
);
5072 /* Determines costs of basing the use of the iv on an iv candidate. */
5075 determine_use_iv_costs (struct ivopts_data
*data
)
5079 struct iv_cand
*cand
;
5080 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5082 alloc_use_cost_map (data
);
5084 for (i
= 0; i
< n_iv_uses (data
); i
++)
5086 use
= iv_use (data
, i
);
5088 if (data
->consider_all_candidates
)
5090 for (j
= 0; j
< n_iv_cands (data
); j
++)
5092 cand
= iv_cand (data
, j
);
5093 determine_use_iv_cost (data
, use
, cand
);
5100 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5102 cand
= iv_cand (data
, j
);
5103 if (!determine_use_iv_cost (data
, use
, cand
))
5104 bitmap_set_bit (to_clear
, j
);
5107 /* Remove the candidates for that the cost is infinite from
5108 the list of related candidates. */
5109 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5110 bitmap_clear (to_clear
);
5114 BITMAP_FREE (to_clear
);
5116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5118 fprintf (dump_file
, "Use-candidate costs:\n");
5120 for (i
= 0; i
< n_iv_uses (data
); i
++)
5122 use
= iv_use (data
, i
);
5124 fprintf (dump_file
, "Use %d:\n", i
);
5125 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5126 for (j
= 0; j
< use
->n_map_members
; j
++)
5128 if (!use
->cost_map
[j
].cand
5129 || infinite_cost_p (use
->cost_map
[j
].cost
))
5132 fprintf (dump_file
, " %d\t%d\t%d\t",
5133 use
->cost_map
[j
].cand
->id
,
5134 use
->cost_map
[j
].cost
.cost
,
5135 use
->cost_map
[j
].cost
.complexity
);
5136 if (use
->cost_map
[j
].depends_on
)
5137 bitmap_print (dump_file
,
5138 use
->cost_map
[j
].depends_on
, "","");
5139 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5140 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5141 fprintf (dump_file
, "\n");
5144 fprintf (dump_file
, "\n");
5146 fprintf (dump_file
, "\n");
5150 /* Determines cost of the candidate CAND. */
5153 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5155 comp_cost cost_base
;
5156 unsigned cost
, cost_step
;
5165 /* There are two costs associated with the candidate -- its increment
5166 and its initialization. The second is almost negligible for any loop
5167 that rolls enough, so we take it just very little into account. */
5169 base
= cand
->iv
->base
;
5170 cost_base
= force_var_cost (data
, base
, NULL
);
5171 /* It will be exceptional that the iv register happens to be initialized with
5172 the proper value at no cost. In general, there will at least be a regcopy
5174 if (cost_base
.cost
== 0)
5175 cost_base
.cost
= COSTS_N_INSNS (1);
5176 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5178 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5180 /* Prefer the original ivs unless we may gain something by replacing it.
5181 The reason is to make debugging simpler; so this is not relevant for
5182 artificial ivs created by other optimization passes. */
5183 if (cand
->pos
!= IP_ORIGINAL
5184 || !SSA_NAME_VAR (cand
->var_before
)
5185 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5188 /* Prefer not to insert statements into latch unless there are some
5189 already (so that we do not create unnecessary jumps). */
5190 if (cand
->pos
== IP_END
5191 && empty_block_p (ip_end_pos (data
->current_loop
)))
5195 cand
->cost_step
= cost_step
;
5198 /* Determines costs of computation of the candidates. */
5201 determine_iv_costs (struct ivopts_data
*data
)
5205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5207 fprintf (dump_file
, "Candidate costs:\n");
5208 fprintf (dump_file
, " cand\tcost\n");
5211 for (i
= 0; i
< n_iv_cands (data
); i
++)
5213 struct iv_cand
*cand
= iv_cand (data
, i
);
5215 determine_iv_cost (data
, cand
);
5217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5218 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5222 fprintf (dump_file
, "\n");
5225 /* Calculates cost for having SIZE induction variables. */
5228 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5230 /* We add size to the cost, so that we prefer eliminating ivs
5232 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5233 data
->body_includes_call
);
5236 /* For each size of the induction variable set determine the penalty. */
5239 determine_set_costs (struct ivopts_data
*data
)
5245 struct loop
*loop
= data
->current_loop
;
5248 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5250 fprintf (dump_file
, "Global costs:\n");
5251 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5252 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5253 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5254 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5258 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5261 op
= PHI_RESULT (phi
);
5263 if (virtual_operand_p (op
))
5266 if (get_iv (data
, op
))
5272 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5274 struct version_info
*info
= ver_info (data
, j
);
5276 if (info
->inv_id
&& info
->has_nonlin_use
)
5280 data
->regs_used
= n
;
5281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5282 fprintf (dump_file
, " regs_used %d\n", n
);
5284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5286 fprintf (dump_file
, " cost for size:\n");
5287 fprintf (dump_file
, " ivs\tcost\n");
5288 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5289 fprintf (dump_file
, " %d\t%d\n", j
,
5290 ivopts_global_cost_for_size (data
, j
));
5291 fprintf (dump_file
, "\n");
5295 /* Returns true if A is a cheaper cost pair than B. */
5298 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5308 cmp
= compare_costs (a
->cost
, b
->cost
);
5315 /* In case the costs are the same, prefer the cheaper candidate. */
5316 if (a
->cand
->cost
< b
->cand
->cost
)
5323 /* Returns candidate by that USE is expressed in IVS. */
5325 static struct cost_pair
*
5326 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5328 return ivs
->cand_for_use
[use
->id
];
5331 /* Computes the cost field of IVS structure. */
5334 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5336 comp_cost cost
= ivs
->cand_use_cost
;
5338 cost
.cost
+= ivs
->cand_cost
;
5340 cost
.cost
+= ivopts_global_cost_for_size (data
,
5341 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5346 /* Remove invariants in set INVS to set IVS. */
5349 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5357 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5359 ivs
->n_invariant_uses
[iid
]--;
5360 if (ivs
->n_invariant_uses
[iid
] == 0)
5365 /* Set USE not to be expressed by any candidate in IVS. */
5368 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5371 unsigned uid
= use
->id
, cid
;
5372 struct cost_pair
*cp
;
5374 cp
= ivs
->cand_for_use
[uid
];
5380 ivs
->cand_for_use
[uid
] = NULL
;
5381 ivs
->n_cand_uses
[cid
]--;
5383 if (ivs
->n_cand_uses
[cid
] == 0)
5385 bitmap_clear_bit (ivs
->cands
, cid
);
5386 /* Do not count the pseudocandidates. */
5390 ivs
->cand_cost
-= cp
->cand
->cost
;
5392 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5395 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5397 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5399 if (cp
->inv_expr_id
!= -1)
5401 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5402 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5403 ivs
->num_used_inv_expr
--;
5405 iv_ca_recount_cost (data
, ivs
);
5408 /* Add invariants in set INVS to set IVS. */
5411 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5419 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5421 ivs
->n_invariant_uses
[iid
]++;
5422 if (ivs
->n_invariant_uses
[iid
] == 1)
5427 /* Set cost pair for USE in set IVS to CP. */
5430 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5431 struct iv_use
*use
, struct cost_pair
*cp
)
5433 unsigned uid
= use
->id
, cid
;
5435 if (ivs
->cand_for_use
[uid
] == cp
)
5438 if (ivs
->cand_for_use
[uid
])
5439 iv_ca_set_no_cp (data
, ivs
, use
);
5446 ivs
->cand_for_use
[uid
] = cp
;
5447 ivs
->n_cand_uses
[cid
]++;
5448 if (ivs
->n_cand_uses
[cid
] == 1)
5450 bitmap_set_bit (ivs
->cands
, cid
);
5451 /* Do not count the pseudocandidates. */
5455 ivs
->cand_cost
+= cp
->cand
->cost
;
5457 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5460 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5461 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5463 if (cp
->inv_expr_id
!= -1)
5465 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5466 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5467 ivs
->num_used_inv_expr
++;
5469 iv_ca_recount_cost (data
, ivs
);
5473 /* Extend set IVS by expressing USE by some of the candidates in it
5474 if possible. Consider all important candidates if candidates in
5475 set IVS don't give any result. */
5478 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5481 struct cost_pair
*best_cp
= NULL
, *cp
;
5484 struct iv_cand
*cand
;
5486 gcc_assert (ivs
->upto
>= use
->id
);
5490 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5492 cand
= iv_cand (data
, i
);
5493 cp
= get_use_iv_cost (data
, use
, cand
);
5494 if (cheaper_cost_pair (cp
, best_cp
))
5498 if (best_cp
== NULL
)
5500 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5502 cand
= iv_cand (data
, i
);
5503 cp
= get_use_iv_cost (data
, use
, cand
);
5504 if (cheaper_cost_pair (cp
, best_cp
))
5509 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5512 /* Get cost for assignment IVS. */
5515 iv_ca_cost (struct iv_ca
*ivs
)
5517 /* This was a conditional expression but it triggered a bug in
5520 return infinite_cost
;
5525 /* Returns true if all dependences of CP are among invariants in IVS. */
5528 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5533 if (!cp
->depends_on
)
5536 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5538 if (ivs
->n_invariant_uses
[i
] == 0)
5545 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5546 it before NEXT_CHANGE. */
5548 static struct iv_ca_delta
*
5549 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5550 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5552 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5555 change
->old_cp
= old_cp
;
5556 change
->new_cp
= new_cp
;
5557 change
->next_change
= next_change
;
5562 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5565 static struct iv_ca_delta
*
5566 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5568 struct iv_ca_delta
*last
;
5576 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5578 last
->next_change
= l2
;
5583 /* Reverse the list of changes DELTA, forming the inverse to it. */
5585 static struct iv_ca_delta
*
5586 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5588 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5589 struct cost_pair
*tmp
;
5591 for (act
= delta
; act
; act
= next
)
5593 next
= act
->next_change
;
5594 act
->next_change
= prev
;
5598 act
->old_cp
= act
->new_cp
;
5605 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5606 reverted instead. */
5609 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5610 struct iv_ca_delta
*delta
, bool forward
)
5612 struct cost_pair
*from
, *to
;
5613 struct iv_ca_delta
*act
;
5616 delta
= iv_ca_delta_reverse (delta
);
5618 for (act
= delta
; act
; act
= act
->next_change
)
5622 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5623 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5627 iv_ca_delta_reverse (delta
);
5630 /* Returns true if CAND is used in IVS. */
5633 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5635 return ivs
->n_cand_uses
[cand
->id
] > 0;
5638 /* Returns number of induction variable candidates in the set IVS. */
5641 iv_ca_n_cands (struct iv_ca
*ivs
)
5643 return ivs
->n_cands
;
5646 /* Free the list of changes DELTA. */
5649 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5651 struct iv_ca_delta
*act
, *next
;
5653 for (act
= *delta
; act
; act
= next
)
5655 next
= act
->next_change
;
5662 /* Allocates new iv candidates assignment. */
5664 static struct iv_ca
*
5665 iv_ca_new (struct ivopts_data
*data
)
5667 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5671 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5672 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5673 nw
->cands
= BITMAP_ALLOC (NULL
);
5676 nw
->cand_use_cost
= no_cost
;
5678 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5680 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5681 nw
->num_used_inv_expr
= 0;
5686 /* Free memory occupied by the set IVS. */
5689 iv_ca_free (struct iv_ca
**ivs
)
5691 free ((*ivs
)->cand_for_use
);
5692 free ((*ivs
)->n_cand_uses
);
5693 BITMAP_FREE ((*ivs
)->cands
);
5694 free ((*ivs
)->n_invariant_uses
);
5695 free ((*ivs
)->used_inv_expr
);
5700 /* Dumps IVS to FILE. */
5703 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5705 const char *pref
= " invariants ";
5707 comp_cost cost
= iv_ca_cost (ivs
);
5709 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5710 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5711 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5712 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5714 for (i
= 0; i
< ivs
->upto
; i
++)
5716 struct iv_use
*use
= iv_use (data
, i
);
5717 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5719 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5720 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5722 fprintf (file
, " use:%d --> ??\n", use
->id
);
5725 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5726 if (ivs
->n_invariant_uses
[i
])
5728 fprintf (file
, "%s%d", pref
, i
);
5731 fprintf (file
, "\n\n");
5734 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5735 new set, and store differences in DELTA. Number of induction variables
5736 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5737 the function will try to find a solution with mimimal iv candidates. */
5740 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5741 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5742 unsigned *n_ivs
, bool min_ncand
)
5747 struct cost_pair
*old_cp
, *new_cp
;
5750 for (i
= 0; i
< ivs
->upto
; i
++)
5752 use
= iv_use (data
, i
);
5753 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5756 && old_cp
->cand
== cand
)
5759 new_cp
= get_use_iv_cost (data
, use
, cand
);
5763 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5766 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5769 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5772 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5773 cost
= iv_ca_cost (ivs
);
5775 *n_ivs
= iv_ca_n_cands (ivs
);
5776 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5781 /* Try narrowing set IVS by removing CAND. Return the cost of
5782 the new set and store the differences in DELTA. START is
5783 the candidate with which we start narrowing. */
5786 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5787 struct iv_cand
*cand
, struct iv_cand
*start
,
5788 struct iv_ca_delta
**delta
)
5792 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5794 struct iv_cand
*cnd
;
5795 comp_cost cost
, best_cost
, acost
;
5798 for (i
= 0; i
< n_iv_uses (data
); i
++)
5800 use
= iv_use (data
, i
);
5802 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5803 if (old_cp
->cand
!= cand
)
5806 best_cost
= iv_ca_cost (ivs
);
5807 /* Start narrowing with START. */
5808 new_cp
= get_use_iv_cost (data
, use
, start
);
5810 if (data
->consider_all_candidates
)
5812 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5814 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5817 cnd
= iv_cand (data
, ci
);
5819 cp
= get_use_iv_cost (data
, use
, cnd
);
5823 iv_ca_set_cp (data
, ivs
, use
, cp
);
5824 acost
= iv_ca_cost (ivs
);
5826 if (compare_costs (acost
, best_cost
) < 0)
5835 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5837 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5840 cnd
= iv_cand (data
, ci
);
5842 cp
= get_use_iv_cost (data
, use
, cnd
);
5846 iv_ca_set_cp (data
, ivs
, use
, cp
);
5847 acost
= iv_ca_cost (ivs
);
5849 if (compare_costs (acost
, best_cost
) < 0)
5856 /* Restore to old cp for use. */
5857 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5861 iv_ca_delta_free (delta
);
5862 return infinite_cost
;
5865 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5868 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5869 cost
= iv_ca_cost (ivs
);
5870 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5875 /* Try optimizing the set of candidates IVS by removing candidates different
5876 from to EXCEPT_CAND from it. Return cost of the new set, and store
5877 differences in DELTA. */
5880 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5881 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5884 struct iv_ca_delta
*act_delta
, *best_delta
;
5886 comp_cost best_cost
, acost
;
5887 struct iv_cand
*cand
;
5890 best_cost
= iv_ca_cost (ivs
);
5892 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5894 cand
= iv_cand (data
, i
);
5896 if (cand
== except_cand
)
5899 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5901 if (compare_costs (acost
, best_cost
) < 0)
5904 iv_ca_delta_free (&best_delta
);
5905 best_delta
= act_delta
;
5908 iv_ca_delta_free (&act_delta
);
5917 /* Recurse to possibly remove other unnecessary ivs. */
5918 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5919 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5920 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5921 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5925 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5926 cheaper local cost for USE than BEST_CP. Return pointer to
5927 the corresponding cost_pair, otherwise just return BEST_CP. */
5929 static struct cost_pair
*
5930 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
5931 unsigned int cand_idx
, struct iv_cand
*old_cand
,
5932 struct cost_pair
*best_cp
)
5934 struct iv_cand
*cand
;
5935 struct cost_pair
*cp
;
5937 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
5938 if (cand_idx
== old_cand
->id
)
5941 cand
= iv_cand (data
, cand_idx
);
5942 cp
= get_use_iv_cost (data
, use
, cand
);
5943 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
5949 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5950 which are used by more than one iv uses. For each of those candidates,
5951 this function tries to represent iv uses under that candidate using
5952 other ones with lower local cost, then tries to prune the new set.
5953 If the new set has lower cost, It returns the new cost after recording
5954 candidate replacement in list DELTA. */
5957 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5958 struct iv_ca_delta
**delta
)
5960 bitmap_iterator bi
, bj
;
5961 unsigned int i
, j
, k
;
5963 struct iv_cand
*cand
;
5964 comp_cost orig_cost
, acost
;
5965 struct iv_ca_delta
*act_delta
, *tmp_delta
;
5966 struct cost_pair
*old_cp
, *best_cp
= NULL
;
5969 orig_cost
= iv_ca_cost (ivs
);
5971 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5973 if (ivs
->n_cand_uses
[i
] == 1
5974 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
5977 cand
= iv_cand (data
, i
);
5980 /* Represent uses under current candidate using other ones with
5981 lower local cost. */
5982 for (j
= 0; j
< ivs
->upto
; j
++)
5984 use
= iv_use (data
, j
);
5985 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5987 if (old_cp
->cand
!= cand
)
5991 if (data
->consider_all_candidates
)
5992 for (k
= 0; k
< n_iv_cands (data
); k
++)
5993 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5994 old_cp
->cand
, best_cp
);
5996 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
5997 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5998 old_cp
->cand
, best_cp
);
6000 if (best_cp
== old_cp
)
6003 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6005 /* No need for further prune. */
6009 /* Prune the new candidate set. */
6010 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6011 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6012 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6013 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6015 if (compare_costs (acost
, orig_cost
) < 0)
6021 iv_ca_delta_free (&act_delta
);
6027 /* Tries to extend the sets IVS in the best possible way in order
6028 to express the USE. If ORIGINALP is true, prefer candidates from
6029 the original set of IVs, otherwise favor important candidates not
6030 based on any memory object. */
6033 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6034 struct iv_use
*use
, bool originalp
)
6036 comp_cost best_cost
, act_cost
;
6039 struct iv_cand
*cand
;
6040 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6041 struct cost_pair
*cp
;
6043 iv_ca_add_use (data
, ivs
, use
);
6044 best_cost
= iv_ca_cost (ivs
);
6045 cp
= iv_ca_cand_for_use (ivs
, use
);
6048 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6049 iv_ca_set_no_cp (data
, ivs
, use
);
6052 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6053 first try important candidates not based on any memory object. Only if
6054 this fails, try the specific ones. Rationale -- in loops with many
6055 variables the best choice often is to use just one generic biv. If we
6056 added here many ivs specific to the uses, the optimization algorithm later
6057 would be likely to get stuck in a local minimum, thus causing us to create
6058 too many ivs. The approach from few ivs to more seems more likely to be
6059 successful -- starting from few ivs, replacing an expensive use by a
6060 specific iv should always be a win. */
6061 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6063 cand
= iv_cand (data
, i
);
6065 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6068 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6071 if (iv_ca_cand_used_p (ivs
, cand
))
6074 cp
= get_use_iv_cost (data
, use
, cand
);
6078 iv_ca_set_cp (data
, ivs
, use
, cp
);
6079 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6081 iv_ca_set_no_cp (data
, ivs
, use
);
6082 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6084 if (compare_costs (act_cost
, best_cost
) < 0)
6086 best_cost
= act_cost
;
6088 iv_ca_delta_free (&best_delta
);
6089 best_delta
= act_delta
;
6092 iv_ca_delta_free (&act_delta
);
6095 if (infinite_cost_p (best_cost
))
6097 for (i
= 0; i
< use
->n_map_members
; i
++)
6099 cp
= use
->cost_map
+ i
;
6104 /* Already tried this. */
6105 if (cand
->important
)
6107 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6109 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6113 if (iv_ca_cand_used_p (ivs
, cand
))
6117 iv_ca_set_cp (data
, ivs
, use
, cp
);
6118 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6119 iv_ca_set_no_cp (data
, ivs
, use
);
6120 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6123 if (compare_costs (act_cost
, best_cost
) < 0)
6125 best_cost
= act_cost
;
6128 iv_ca_delta_free (&best_delta
);
6129 best_delta
= act_delta
;
6132 iv_ca_delta_free (&act_delta
);
6136 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6137 iv_ca_delta_free (&best_delta
);
6139 return !infinite_cost_p (best_cost
);
6142 /* Finds an initial assignment of candidates to uses. */
6144 static struct iv_ca
*
6145 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6147 struct iv_ca
*ivs
= iv_ca_new (data
);
6150 for (i
= 0; i
< n_iv_uses (data
); i
++)
6151 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6160 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6161 points to a bool variable, this function tries to break local
6162 optimal fixed-point by replacing candidates in IVS if it's true. */
6165 try_improve_iv_set (struct ivopts_data
*data
,
6166 struct iv_ca
*ivs
, bool *try_replace_p
)
6169 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6170 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6171 struct iv_cand
*cand
;
6173 /* Try extending the set of induction variables by one. */
6174 for (i
= 0; i
< n_iv_cands (data
); i
++)
6176 cand
= iv_cand (data
, i
);
6178 if (iv_ca_cand_used_p (ivs
, cand
))
6181 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6185 /* If we successfully added the candidate and the set is small enough,
6186 try optimizing it by removing other candidates. */
6187 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6189 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6190 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6191 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6192 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6195 if (compare_costs (acost
, best_cost
) < 0)
6198 iv_ca_delta_free (&best_delta
);
6199 best_delta
= act_delta
;
6202 iv_ca_delta_free (&act_delta
);
6207 /* Try removing the candidates from the set instead. */
6208 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6210 if (!best_delta
&& *try_replace_p
)
6212 *try_replace_p
= false;
6213 /* So far candidate selecting algorithm tends to choose fewer IVs
6214 so that it can handle cases in which loops have many variables
6215 but the best choice is often to use only one general biv. One
6216 weakness is it can't handle opposite cases, in which different
6217 candidates should be chosen with respect to each use. To solve
6218 the problem, we replace candidates in a manner described by the
6219 comments of iv_ca_replace, thus give general algorithm a chance
6220 to break local optimal fixed-point in these cases. */
6221 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6228 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6229 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6230 iv_ca_delta_free (&best_delta
);
6234 /* Attempts to find the optimal set of induction variables. We do simple
6235 greedy heuristic -- we try to replace at most one candidate in the selected
6236 solution and remove the unused ivs while this improves the cost. */
6238 static struct iv_ca
*
6239 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6242 bool try_replace_p
= true;
6244 /* Get the initial solution. */
6245 set
= get_initial_solution (data
, originalp
);
6248 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6249 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6255 fprintf (dump_file
, "Initial set of candidates:\n");
6256 iv_ca_dump (data
, dump_file
, set
);
6259 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6263 fprintf (dump_file
, "Improved to:\n");
6264 iv_ca_dump (data
, dump_file
, set
);
6271 static struct iv_ca
*
6272 find_optimal_iv_set (struct ivopts_data
*data
)
6275 struct iv_ca
*set
, *origset
;
6277 comp_cost cost
, origcost
;
6279 /* Determine the cost based on a strategy that starts with original IVs,
6280 and try again using a strategy that prefers candidates not based
6282 origset
= find_optimal_iv_set_1 (data
, true);
6283 set
= find_optimal_iv_set_1 (data
, false);
6285 if (!origset
&& !set
)
6288 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6289 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6293 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6294 origcost
.cost
, origcost
.complexity
);
6295 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6296 cost
.cost
, cost
.complexity
);
6299 /* Choose the one with the best cost. */
6300 if (compare_costs (origcost
, cost
) <= 0)
6307 iv_ca_free (&origset
);
6309 for (i
= 0; i
< n_iv_uses (data
); i
++)
6311 use
= iv_use (data
, i
);
6312 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6318 /* Creates a new induction variable corresponding to CAND. */
6321 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6323 gimple_stmt_iterator incr_pos
;
6333 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6337 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6345 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6349 /* Mark that the iv is preserved. */
6350 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6351 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6353 /* Rewrite the increment so that it uses var_before directly. */
6354 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6358 gimple_add_tmp_var (cand
->var_before
);
6360 base
= unshare_expr (cand
->iv
->base
);
6362 create_iv (base
, unshare_expr (cand
->iv
->step
),
6363 cand
->var_before
, data
->current_loop
,
6364 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6367 /* Creates new induction variables described in SET. */
6370 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6373 struct iv_cand
*cand
;
6376 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6378 cand
= iv_cand (data
, i
);
6379 create_new_iv (data
, cand
);
6382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6384 fprintf (dump_file
, "Selected IV set for loop %d",
6385 data
->current_loop
->num
);
6386 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6387 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6388 LOCATION_LINE (data
->loop_loc
));
6389 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6390 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6392 cand
= iv_cand (data
, i
);
6393 dump_cand (dump_file
, cand
);
6395 fprintf (dump_file
, "\n");
6399 /* Rewrites USE (definition of iv used in a nonlinear expression)
6400 using candidate CAND. */
6403 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6404 struct iv_use
*use
, struct iv_cand
*cand
)
6409 gimple_stmt_iterator bsi
;
6411 /* An important special case -- if we are asked to express value of
6412 the original iv by itself, just exit; there is no need to
6413 introduce a new computation (that might also need casting the
6414 variable to unsigned and back). */
6415 if (cand
->pos
== IP_ORIGINAL
6416 && cand
->incremented_at
== use
->stmt
)
6418 enum tree_code stmt_code
;
6420 gcc_assert (is_gimple_assign (use
->stmt
));
6421 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6423 /* Check whether we may leave the computation unchanged.
6424 This is the case only if it does not rely on other
6425 computations in the loop -- otherwise, the computation
6426 we rely upon may be removed in remove_unused_ivs,
6427 thus leading to ICE. */
6428 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6429 if (stmt_code
== PLUS_EXPR
6430 || stmt_code
== MINUS_EXPR
6431 || stmt_code
== POINTER_PLUS_EXPR
)
6433 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6434 op
= gimple_assign_rhs2 (use
->stmt
);
6435 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6436 op
= gimple_assign_rhs1 (use
->stmt
);
6443 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6447 comp
= get_computation (data
->current_loop
, use
, cand
);
6448 gcc_assert (comp
!= NULL_TREE
);
6450 switch (gimple_code (use
->stmt
))
6453 tgt
= PHI_RESULT (use
->stmt
);
6455 /* If we should keep the biv, do not replace it. */
6456 if (name_info (data
, tgt
)->preserve_biv
)
6459 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6463 tgt
= gimple_assign_lhs (use
->stmt
);
6464 bsi
= gsi_for_stmt (use
->stmt
);
6471 if (!valid_gimple_rhs_p (comp
)
6472 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6473 /* We can't allow re-allocating the stmt as it might be pointed
6475 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6476 >= gimple_num_ops (gsi_stmt (bsi
)))))
6478 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6479 true, GSI_SAME_STMT
);
6480 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6482 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6483 /* As this isn't a plain copy we have to reset alignment
6485 if (SSA_NAME_PTR_INFO (comp
))
6486 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6490 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6492 ass
= gimple_build_assign (tgt
, comp
);
6493 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6495 bsi
= gsi_for_stmt (use
->stmt
);
6496 remove_phi_node (&bsi
, false);
6500 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6501 use
->stmt
= gsi_stmt (bsi
);
6505 /* Performs a peephole optimization to reorder the iv update statement with
6506 a mem ref to enable instruction combining in later phases. The mem ref uses
6507 the iv value before the update, so the reordering transformation requires
6508 adjustment of the offset. CAND is the selected IV_CAND.
6512 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6520 directly propagating t over to (1) will introduce overlapping live range
6521 thus increase register pressure. This peephole transform it into:
6525 t = MEM_REF (base, iv2, 8, 8);
6532 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6535 gimple iv_update
, stmt
;
6537 gimple_stmt_iterator gsi
, gsi_iv
;
6539 if (cand
->pos
!= IP_NORMAL
)
6542 var_after
= cand
->var_after
;
6543 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6545 bb
= gimple_bb (iv_update
);
6546 gsi
= gsi_last_nondebug_bb (bb
);
6547 stmt
= gsi_stmt (gsi
);
6549 /* Only handle conditional statement for now. */
6550 if (gimple_code (stmt
) != GIMPLE_COND
)
6553 gsi_prev_nondebug (&gsi
);
6554 stmt
= gsi_stmt (gsi
);
6555 if (stmt
!= iv_update
)
6558 gsi_prev_nondebug (&gsi
);
6559 if (gsi_end_p (gsi
))
6562 stmt
= gsi_stmt (gsi
);
6563 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6566 if (stmt
!= use
->stmt
)
6569 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6572 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6574 fprintf (dump_file
, "Reordering \n");
6575 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6576 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6577 fprintf (dump_file
, "\n");
6580 gsi
= gsi_for_stmt (use
->stmt
);
6581 gsi_iv
= gsi_for_stmt (iv_update
);
6582 gsi_move_before (&gsi_iv
, &gsi
);
6584 cand
->pos
= IP_BEFORE_USE
;
6585 cand
->incremented_at
= use
->stmt
;
6588 /* Rewrites USE (address that is an iv) using candidate CAND. */
6591 rewrite_use_address (struct ivopts_data
*data
,
6592 struct iv_use
*use
, struct iv_cand
*cand
)
6595 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6596 tree base_hint
= NULL_TREE
;
6600 adjust_iv_update_pos (cand
, use
);
6601 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6603 unshare_aff_combination (&aff
);
6605 /* To avoid undefined overflow problems, all IV candidates use unsigned
6606 integer types. The drawback is that this makes it impossible for
6607 create_mem_ref to distinguish an IV that is based on a memory object
6608 from one that represents simply an offset.
6610 To work around this problem, we pass a hint to create_mem_ref that
6611 indicates which variable (if any) in aff is an IV based on a memory
6612 object. Note that we only consider the candidate. If this is not
6613 based on an object, the base of the reference is in some subexpression
6614 of the use -- but these will use pointer types, so they are recognized
6615 by the create_mem_ref heuristics anyway. */
6616 if (cand
->iv
->base_object
)
6617 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6619 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6620 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6621 reference_alias_ptr_type (*use
->op_p
),
6622 iv
, base_hint
, data
->speed
);
6623 copy_ref_info (ref
, *use
->op_p
);
6627 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6631 rewrite_use_compare (struct ivopts_data
*data
,
6632 struct iv_use
*use
, struct iv_cand
*cand
)
6634 tree comp
, *var_p
, op
, bound
;
6635 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6636 enum tree_code compare
;
6637 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6643 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6644 tree var_type
= TREE_TYPE (var
);
6647 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6649 fprintf (dump_file
, "Replacing exit test: ");
6650 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6653 bound
= unshare_expr (fold_convert (var_type
, bound
));
6654 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6656 gsi_insert_seq_on_edge_immediate (
6657 loop_preheader_edge (data
->current_loop
),
6660 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6661 gimple_cond_set_lhs (cond_stmt
, var
);
6662 gimple_cond_set_code (cond_stmt
, compare
);
6663 gimple_cond_set_rhs (cond_stmt
, op
);
6667 /* The induction variable elimination failed; just express the original
6669 comp
= get_computation (data
->current_loop
, use
, cand
);
6670 gcc_assert (comp
!= NULL_TREE
);
6672 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6675 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6676 true, GSI_SAME_STMT
);
6679 /* Rewrites USE using candidate CAND. */
6682 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6686 case USE_NONLINEAR_EXPR
:
6687 rewrite_use_nonlinear_expr (data
, use
, cand
);
6691 rewrite_use_address (data
, use
, cand
);
6695 rewrite_use_compare (data
, use
, cand
);
6702 update_stmt (use
->stmt
);
6705 /* Rewrite the uses using the selected induction variables. */
6708 rewrite_uses (struct ivopts_data
*data
)
6711 struct iv_cand
*cand
;
6714 for (i
= 0; i
< n_iv_uses (data
); i
++)
6716 use
= iv_use (data
, i
);
6717 cand
= use
->selected
;
6720 rewrite_use (data
, use
, cand
);
6724 /* Removes the ivs that are not used after rewriting. */
6727 remove_unused_ivs (struct ivopts_data
*data
)
6731 bitmap toremove
= BITMAP_ALLOC (NULL
);
6733 /* Figure out an order in which to release SSA DEFs so that we don't
6734 release something that we'd have to propagate into a debug stmt
6736 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6738 struct version_info
*info
;
6740 info
= ver_info (data
, j
);
6742 && !integer_zerop (info
->iv
->step
)
6744 && !info
->iv
->have_use_for
6745 && !info
->preserve_biv
)
6747 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6749 tree def
= info
->iv
->ssa_name
;
6751 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6753 imm_use_iterator imm_iter
;
6754 use_operand_p use_p
;
6758 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6760 if (!gimple_debug_bind_p (stmt
))
6763 /* We just want to determine whether to do nothing
6764 (count == 0), to substitute the computed
6765 expression into a single use of the SSA DEF by
6766 itself (count == 1), or to use a debug temp
6767 because the SSA DEF is used multiple times or as
6768 part of a larger expression (count > 1). */
6770 if (gimple_debug_bind_get_value (stmt
) != def
)
6774 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6780 struct iv_use dummy_use
;
6781 struct iv_cand
*best_cand
= NULL
, *cand
;
6782 unsigned i
, best_pref
= 0, cand_pref
;
6784 memset (&dummy_use
, 0, sizeof (dummy_use
));
6785 dummy_use
.iv
= info
->iv
;
6786 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6788 cand
= iv_use (data
, i
)->selected
;
6789 if (cand
== best_cand
)
6791 cand_pref
= operand_equal_p (cand
->iv
->step
,
6795 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6796 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6799 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6801 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6804 best_pref
= cand_pref
;
6811 tree comp
= get_computation_at (data
->current_loop
,
6812 &dummy_use
, best_cand
,
6813 SSA_NAME_DEF_STMT (def
));
6819 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6820 DECL_ARTIFICIAL (vexpr
) = 1;
6821 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6822 if (SSA_NAME_VAR (def
))
6823 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6825 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6827 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
6828 gimple_stmt_iterator gsi
;
6830 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6831 gsi
= gsi_after_labels (gimple_bb
6832 (SSA_NAME_DEF_STMT (def
)));
6834 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6836 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6840 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6842 if (!gimple_debug_bind_p (stmt
))
6845 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6846 SET_USE (use_p
, comp
);
6854 release_defs_bitset (toremove
);
6856 BITMAP_FREE (toremove
);
6859 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6860 for hash_map::traverse. */
6863 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6869 /* Frees data allocated by the optimization of a single loop. */
6872 free_loop_data (struct ivopts_data
*data
)
6880 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6881 delete data
->niters
;
6882 data
->niters
= NULL
;
6885 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6887 struct version_info
*info
;
6889 info
= ver_info (data
, i
);
6892 info
->has_nonlin_use
= false;
6893 info
->preserve_biv
= false;
6896 bitmap_clear (data
->relevant
);
6897 bitmap_clear (data
->important_candidates
);
6899 for (i
= 0; i
< n_iv_uses (data
); i
++)
6901 struct iv_use
*use
= iv_use (data
, i
);
6904 BITMAP_FREE (use
->related_cands
);
6905 for (j
= 0; j
< use
->n_map_members
; j
++)
6906 if (use
->cost_map
[j
].depends_on
)
6907 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6908 free (use
->cost_map
);
6911 data
->iv_uses
.truncate (0);
6913 for (i
= 0; i
< n_iv_cands (data
); i
++)
6915 struct iv_cand
*cand
= iv_cand (data
, i
);
6918 if (cand
->depends_on
)
6919 BITMAP_FREE (cand
->depends_on
);
6922 data
->iv_candidates
.truncate (0);
6924 if (data
->version_info_size
< num_ssa_names
)
6926 data
->version_info_size
= 2 * num_ssa_names
;
6927 free (data
->version_info
);
6928 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6931 data
->max_inv_id
= 0;
6933 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6934 SET_DECL_RTL (obj
, NULL_RTX
);
6936 decl_rtl_to_reset
.truncate (0);
6938 data
->inv_expr_tab
->empty ();
6939 data
->inv_expr_id
= 0;
6942 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6946 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6948 free_loop_data (data
);
6949 free (data
->version_info
);
6950 BITMAP_FREE (data
->relevant
);
6951 BITMAP_FREE (data
->important_candidates
);
6953 decl_rtl_to_reset
.release ();
6954 data
->iv_uses
.release ();
6955 data
->iv_candidates
.release ();
6956 delete data
->inv_expr_tab
;
6957 data
->inv_expr_tab
= NULL
;
6958 free_affine_expand_cache (&data
->name_expansion_cache
);
6961 /* Returns true if the loop body BODY includes any function calls. */
6964 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6966 gimple_stmt_iterator gsi
;
6969 for (i
= 0; i
< num_nodes
; i
++)
6970 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6972 gimple stmt
= gsi_stmt (gsi
);
6973 if (is_gimple_call (stmt
)
6974 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6980 /* Optimizes the LOOP. Returns true if anything changed. */
6983 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6985 bool changed
= false;
6986 struct iv_ca
*iv_ca
;
6987 edge exit
= single_dom_exit (loop
);
6990 gcc_assert (!data
->niters
);
6991 data
->current_loop
= loop
;
6992 data
->loop_loc
= find_loop_location (loop
);
6993 data
->speed
= optimize_loop_for_speed_p (loop
);
6995 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6997 fprintf (dump_file
, "Processing loop %d", loop
->num
);
6998 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6999 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7000 LOCATION_LINE (data
->loop_loc
));
7001 fprintf (dump_file
, "\n");
7005 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7006 exit
->src
->index
, exit
->dest
->index
);
7007 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7008 fprintf (dump_file
, "\n");
7011 fprintf (dump_file
, "\n");
7014 body
= get_loop_body (loop
);
7015 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7016 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7019 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7021 /* For each ssa name determines whether it behaves as an induction variable
7023 if (!find_induction_variables (data
))
7026 /* Finds interesting uses (item 1). */
7027 find_interesting_uses (data
);
7028 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7031 /* Finds candidates for the induction variables (item 2). */
7032 find_iv_candidates (data
);
7034 /* Calculates the costs (item 3, part 1). */
7035 determine_iv_costs (data
);
7036 determine_use_iv_costs (data
);
7037 determine_set_costs (data
);
7039 /* Find the optimal set of induction variables (item 3, part 2). */
7040 iv_ca
= find_optimal_iv_set (data
);
7045 /* Create the new induction variables (item 4, part 1). */
7046 create_new_ivs (data
, iv_ca
);
7047 iv_ca_free (&iv_ca
);
7049 /* Rewrite the uses (item 4, part 2). */
7050 rewrite_uses (data
);
7052 /* Remove the ivs that are unused after rewriting. */
7053 remove_unused_ivs (data
);
7055 /* We have changed the structure of induction variables; it might happen
7056 that definitions in the scev database refer to some of them that were
7061 free_loop_data (data
);
7066 /* Main entry point. Optimizes induction variables in loops. */
7069 tree_ssa_iv_optimize (void)
7072 struct ivopts_data data
;
7074 tree_ssa_iv_optimize_init (&data
);
7076 /* Optimize the loops starting with the innermost ones. */
7077 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7079 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7080 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7082 tree_ssa_iv_optimize_loop (&data
, loop
);
7085 tree_ssa_iv_optimize_finalize (&data
);