1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
78 #include "gimple-expr.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop
*loop
)
131 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
133 return AVG_LOOP_NITER (loop
);
138 /* Representation of the induction variable. */
141 tree base
; /* Initial value of the iv. */
142 tree base_object
; /* A memory object to that the induction variable points. */
143 tree step
; /* Step of the iv (constant only). */
144 tree ssa_name
; /* The ssa name with the value. */
145 bool biv_p
; /* Is it a biv? */
146 bool have_use_for
; /* Do we already have a use for it? */
147 unsigned use_id
; /* The identifier in the use if it is the case. */
150 /* Per-ssa version information (induction variable descriptions, etc.). */
153 tree name
; /* The ssa name. */
154 struct iv
*iv
; /* Induction variable description. */
155 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv
; /* For the original biv, whether to preserve it. */
158 unsigned inv_id
; /* Id of an invariant. */
164 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
165 USE_ADDRESS
, /* Use in an address. */
166 USE_COMPARE
/* Use is a compare. */
169 /* Cost of a computation. */
172 int cost
; /* The runtime cost. */
173 unsigned complexity
; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
179 static const comp_cost no_cost
= {0, 0};
180 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
182 /* The candidate - cost pair. */
185 struct iv_cand
*cand
; /* The candidate. */
186 comp_cost cost
; /* The cost. */
187 bitmap depends_on
; /* The list of invariants that have to be
189 tree value
; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp
; /* For iv elimination, the comparison. */
193 int inv_expr_id
; /* Loop invariant expression id. */
199 unsigned id
; /* The id of the use. */
200 enum use_type type
; /* Type of the use. */
201 struct iv
*iv
; /* The induction variable it is based on. */
202 gimple stmt
; /* Statement in that it occurs. */
203 tree
*op_p
; /* The place where it occurs. */
204 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
207 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
208 struct cost_pair
*cost_map
;
209 /* The costs wrto the iv candidates. */
211 struct iv_cand
*selected
;
212 /* The selected candidate. */
215 /* The position where the iv is computed. */
218 IP_NORMAL
, /* At the end, just before the exit condition. */
219 IP_END
, /* At the end of the latch block. */
220 IP_BEFORE_USE
, /* Immediately before a specific use. */
221 IP_AFTER_USE
, /* Immediately after a specific use. */
222 IP_ORIGINAL
/* The original biv. */
225 /* The induction variable candidate. */
228 unsigned id
; /* The number of the candidate. */
229 bool important
; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
232 gimple incremented_at
;/* For original biv, the statement where it is
234 tree var_before
; /* The variable used for it before increment. */
235 tree var_after
; /* The variable used for it after increment. */
236 struct iv
*iv
; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost
; /* Cost of the candidate. */
241 unsigned cost_step
; /* Cost of the candidate's increment operation. */
242 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on
; /* The list of invariants that are used in step of the
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
256 /* The data used by the induction variable optimizations. */
258 typedef struct iv_use
*iv_use_p
;
260 typedef struct iv_cand
*iv_cand_p
;
262 /* Hashtable helpers. */
264 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
266 typedef iv_inv_expr_ent value_type
;
267 typedef iv_inv_expr_ent compare_type
;
268 static inline hashval_t
hash (const value_type
*);
269 static inline bool equal (const value_type
*, const compare_type
*);
272 /* Hash function for loop invariant expressions. */
275 iv_inv_expr_hasher::hash (const value_type
*expr
)
280 /* Hash table equality function for expressions. */
283 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
285 return expr1
->hash
== expr2
->hash
286 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
291 /* The currently optimized loop. */
292 struct loop
*current_loop
;
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t
*niters
;
297 /* Number of registers used in it. */
300 /* The size of version_info array allocated. */
301 unsigned version_info_size
;
303 /* The array of information for the ssa names. */
304 struct version_info
*version_info
;
306 /* The hashtable of loop invariant expressions created
308 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
310 /* Loop invariant expression id. */
313 /* The bitmap of indices in version_info whose value was changed. */
316 /* The uses of induction variables. */
317 vec
<iv_use_p
> iv_uses
;
319 /* The candidates. */
320 vec
<iv_cand_p
> iv_candidates
;
322 /* A bitmap of important candidates. */
323 bitmap important_candidates
;
325 /* The maximum invariant id. */
328 /* Whether to consider just related and important candidates when replacing a
330 bool consider_all_candidates
;
332 /* Are we optimizing for speed? */
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call
;
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p
;
342 /* An assignment of iv candidates to uses. */
346 /* The number of uses covered by the assignment. */
349 /* Number of uses that cannot be expressed by the candidates in the set. */
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair
**cand_for_use
;
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses
;
358 /* The candidates used. */
361 /* The number of candidates in the set. */
364 /* Total number of registers needed. */
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost
;
370 /* Total cost of candidates. */
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses
;
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr
;
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr
;
383 /* Total cost of the assignment. */
387 /* Difference of two iv candidate assignments. */
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair
*old_cp
;
397 /* A new assignment. */
398 struct cost_pair
*new_cp
;
400 /* Next change in the list. */
401 struct iv_ca_delta
*next_change
;
404 /* Bound on number of candidates below that all candidates are considered. */
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
421 /* The list of trees for that the decl_rtl field must be reset is stored
424 static vec
<tree
> decl_rtl_to_reset
;
426 static comp_cost
force_expr_to_var_cost (tree
, bool);
428 /* Number of uses recorded in DATA. */
430 static inline unsigned
431 n_iv_uses (struct ivopts_data
*data
)
433 return data
->iv_uses
.length ();
436 /* Ith use recorded in DATA. */
438 static inline struct iv_use
*
439 iv_use (struct ivopts_data
*data
, unsigned i
)
441 return data
->iv_uses
[i
];
444 /* Number of candidates recorded in DATA. */
446 static inline unsigned
447 n_iv_cands (struct ivopts_data
*data
)
449 return data
->iv_candidates
.length ();
452 /* Ith candidate recorded in DATA. */
454 static inline struct iv_cand
*
455 iv_cand (struct ivopts_data
*data
, unsigned i
)
457 return data
->iv_candidates
[i
];
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
463 single_dom_exit (struct loop
*loop
)
465 edge exit
= single_exit (loop
);
470 if (!just_once_each_iteration_p (loop
, exit
->src
))
476 /* Dumps information about the induction variable IV to FILE. */
479 dump_iv (FILE *file
, struct iv
*iv
)
483 fprintf (file
, "ssa name ");
484 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
485 fprintf (file
, "\n");
488 fprintf (file
, " type ");
489 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
490 fprintf (file
, "\n");
494 fprintf (file
, " base ");
495 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
496 fprintf (file
, "\n");
498 fprintf (file
, " step ");
499 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
500 fprintf (file
, "\n");
504 fprintf (file
, " invariant ");
505 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
506 fprintf (file
, "\n");
511 fprintf (file
, " base object ");
512 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
513 fprintf (file
, "\n");
517 fprintf (file
, " is a biv\n");
520 /* Dumps information about the USE to FILE. */
523 dump_use (FILE *file
, struct iv_use
*use
)
525 fprintf (file
, "use %d\n", use
->id
);
529 case USE_NONLINEAR_EXPR
:
530 fprintf (file
, " generic\n");
534 fprintf (file
, " address\n");
538 fprintf (file
, " compare\n");
545 fprintf (file
, " in statement ");
546 print_gimple_stmt (file
, use
->stmt
, 0, 0);
547 fprintf (file
, "\n");
549 fprintf (file
, " at position ");
551 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
552 fprintf (file
, "\n");
554 dump_iv (file
, use
->iv
);
556 if (use
->related_cands
)
558 fprintf (file
, " related candidates ");
559 dump_bitmap (file
, use
->related_cands
);
563 /* Dumps information about the uses to FILE. */
566 dump_uses (FILE *file
, struct ivopts_data
*data
)
571 for (i
= 0; i
< n_iv_uses (data
); i
++)
573 use
= iv_use (data
, i
);
575 dump_use (file
, use
);
576 fprintf (file
, "\n");
580 /* Dumps information about induction variable candidate CAND to FILE. */
583 dump_cand (FILE *file
, struct iv_cand
*cand
)
585 struct iv
*iv
= cand
->iv
;
587 fprintf (file
, "candidate %d%s\n",
588 cand
->id
, cand
->important
? " (important)" : "");
590 if (cand
->depends_on
)
592 fprintf (file
, " depends on ");
593 dump_bitmap (file
, cand
->depends_on
);
598 fprintf (file
, " final value replacement\n");
602 if (cand
->var_before
)
604 fprintf (file
, " var_before ");
605 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
606 fprintf (file
, "\n");
610 fprintf (file
, " var_after ");
611 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
612 fprintf (file
, "\n");
618 fprintf (file
, " incremented before exit test\n");
622 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
626 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
630 fprintf (file
, " incremented at end\n");
634 fprintf (file
, " original biv\n");
641 /* Returns the info for ssa version VER. */
643 static inline struct version_info
*
644 ver_info (struct ivopts_data
*data
, unsigned ver
)
646 return data
->version_info
+ ver
;
649 /* Returns the info for ssa name NAME. */
651 static inline struct version_info
*
652 name_info (struct ivopts_data
*data
, tree name
)
654 return ver_info (data
, SSA_NAME_VERSION (name
));
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
661 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
663 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
667 if (sbb
== loop
->latch
)
673 return stmt
== last_stmt (bb
);
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
681 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
683 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
684 basic_block stmt_bb
= gimple_bb (stmt
);
686 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
689 if (stmt_bb
!= cand_bb
)
693 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
695 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
702 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
710 return stmt_after_ip_normal_pos (loop
, stmt
);
714 return stmt_after_inc_pos (cand
, stmt
, false);
717 return stmt_after_inc_pos (cand
, stmt
, true);
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
727 abnormal_ssa_name_p (tree exp
)
732 if (TREE_CODE (exp
) != SSA_NAME
)
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
742 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
743 void *data ATTRIBUTE_UNUSED
)
745 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
747 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
749 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
753 return !abnormal_ssa_name_p (*index
);
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
760 contains_abnormal_ssa_name_p (tree expr
)
763 enum tree_code_class codeclass
;
768 code
= TREE_CODE (expr
);
769 codeclass
= TREE_CODE_CLASS (code
);
771 if (code
== SSA_NAME
)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
774 if (code
== INTEGER_CST
775 || is_gimple_min_invariant (expr
))
778 if (code
== ADDR_EXPR
)
779 return !for_each_index (&TREE_OPERAND (expr
, 0),
780 idx_contains_abnormal_ssa_name_p
,
783 if (code
== COND_EXPR
)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
812 static struct tree_niter_desc
*
813 niter_for_exit (struct ivopts_data
*data
, edge exit
)
815 struct tree_niter_desc
*desc
;
820 data
->niters
= pointer_map_create ();
824 slot
= pointer_map_contains (data
->niters
, exit
);
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc
= XNEW (struct tree_niter_desc
);
832 if (!number_of_iterations_exit (data
->current_loop
,
834 || contains_abnormal_ssa_name_p (desc
->niter
))
839 slot
= pointer_map_insert (data
->niters
, exit
);
843 desc
= (struct tree_niter_desc
*) *slot
;
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
852 static struct tree_niter_desc
*
853 niter_for_single_dom_exit (struct ivopts_data
*data
)
855 edge exit
= single_dom_exit (data
->current_loop
);
860 return niter_for_exit (data
, exit
);
863 /* Initializes data structures used by the iv optimization pass, stored
867 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
869 data
->version_info_size
= 2 * num_ssa_names
;
870 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
871 data
->relevant
= BITMAP_ALLOC (NULL
);
872 data
->important_candidates
= BITMAP_ALLOC (NULL
);
873 data
->max_inv_id
= 0;
875 data
->iv_uses
.create (20);
876 data
->iv_candidates
.create (20);
877 data
->inv_expr_tab
.create (10);
878 data
->inv_expr_id
= 0;
879 decl_rtl_to_reset
.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
886 determine_base_object (tree expr
)
888 enum tree_code code
= TREE_CODE (expr
);
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr
))
895 return determine_base_object (TREE_OPERAND (expr
, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
906 obj
= TREE_OPERAND (expr
, 0);
907 base
= get_base_address (obj
);
912 if (TREE_CODE (base
) == MEM_REF
)
913 return determine_base_object (TREE_OPERAND (base
, 0));
915 return fold_convert (ptr_type_node
,
916 build_fold_addr_expr (base
));
918 case POINTER_PLUS_EXPR
:
919 return determine_base_object (TREE_OPERAND (expr
, 0));
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
927 return fold_convert (ptr_type_node
, expr
);
931 /* Allocates an induction variable with given initial value BASE and step STEP
935 alloc_iv (tree base
, tree step
)
937 tree base_object
= base
;
938 struct iv
*iv
= XCNEW (struct iv
);
939 gcc_assert (step
!= NULL_TREE
);
941 /* Lower all address expressions except ones with DECL_P as operand.
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object
);
947 if (TREE_CODE (base_object
) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object
, 0)))
952 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
954 gcc_assert (base_object
!= NULL_TREE
);
955 base_object
= build_fold_addr_expr (base_object
);
956 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
960 iv
->base_object
= determine_base_object (base_object
);
963 iv
->have_use_for
= false;
965 iv
->ssa_name
= NULL_TREE
;
970 /* Sets STEP and BASE for induction variable IV. */
973 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
975 struct version_info
*info
= name_info (data
, iv
);
977 gcc_assert (!info
->iv
);
979 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
980 info
->iv
= alloc_iv (base
, step
);
981 info
->iv
->ssa_name
= iv
;
984 /* Finds induction variable declaration for VAR. */
987 get_iv (struct ivopts_data
*data
, tree var
)
990 tree type
= TREE_TYPE (var
);
992 if (!POINTER_TYPE_P (type
)
993 && !INTEGRAL_TYPE_P (type
))
996 if (!name_info (data
, var
)->iv
)
998 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1001 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1002 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1005 return name_info (data
, var
)->iv
;
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1012 determine_biv_step (gimple phi
)
1014 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1015 tree name
= PHI_RESULT (phi
);
1018 if (virtual_operand_p (name
))
1021 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1024 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1027 /* Finds basic ivs. */
1030 find_bivs (struct ivopts_data
*data
)
1033 tree step
, type
, base
;
1035 struct loop
*loop
= data
->current_loop
;
1036 gimple_stmt_iterator psi
;
1038 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1040 phi
= gsi_stmt (psi
);
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1045 step
= determine_biv_step (phi
);
1049 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1050 base
= expand_simple_operations (base
);
1051 if (contains_abnormal_ssa_name_p (base
)
1052 || contains_abnormal_ssa_name_p (step
))
1055 type
= TREE_TYPE (PHI_RESULT (phi
));
1056 base
= fold_convert (type
, base
);
1059 if (POINTER_TYPE_P (type
))
1060 step
= convert_to_ptrofftype (step
);
1062 step
= fold_convert (type
, step
);
1065 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1072 /* Marks basic ivs. */
1075 mark_bivs (struct ivopts_data
*data
)
1079 struct iv
*iv
, *incr_iv
;
1080 struct loop
*loop
= data
->current_loop
;
1081 basic_block incr_bb
;
1082 gimple_stmt_iterator psi
;
1084 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1086 phi
= gsi_stmt (psi
);
1088 iv
= get_iv (data
, PHI_RESULT (phi
));
1092 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1093 def
= SSA_NAME_DEF_STMT (var
);
1094 /* Don't mark iv peeled from other one as biv. */
1096 && gimple_code (def
) == GIMPLE_PHI
1097 && gimple_bb (def
) == loop
->header
)
1100 incr_iv
= get_iv (data
, var
);
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1106 if (incr_bb
->loop_father
!= data
->current_loop
1107 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1111 incr_iv
->biv_p
= true;
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1119 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1122 struct loop
*loop
= data
->current_loop
;
1124 iv
->base
= NULL_TREE
;
1125 iv
->step
= NULL_TREE
;
1127 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1130 lhs
= gimple_assign_lhs (stmt
);
1131 if (TREE_CODE (lhs
) != SSA_NAME
)
1134 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1136 iv
->base
= expand_simple_operations (iv
->base
);
1138 if (contains_abnormal_ssa_name_p (iv
->base
)
1139 || contains_abnormal_ssa_name_p (iv
->step
))
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt
))
1151 /* Finds general ivs in statement STMT. */
1154 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1158 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1161 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1164 /* Finds general ivs in basic block BB. */
1167 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1169 gimple_stmt_iterator bsi
;
1171 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1172 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1175 /* Finds general ivs. */
1178 find_givs (struct ivopts_data
*data
)
1180 struct loop
*loop
= data
->current_loop
;
1181 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1184 for (i
= 0; i
< loop
->num_nodes
; i
++)
1185 find_givs_in_bb (data
, body
[i
]);
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1193 find_induction_variables (struct ivopts_data
*data
)
1198 if (!find_bivs (data
))
1204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1206 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1210 fprintf (dump_file
, " number of iterations ");
1211 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1212 if (!integer_zerop (niter
->may_be_zero
))
1214 fprintf (dump_file
, "; zero if ");
1215 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1217 fprintf (dump_file
, "\n\n");
1220 fprintf (dump_file
, "Induction variables:\n\n");
1222 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1224 if (ver_info (data
, i
)->iv
)
1225 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1234 static struct iv_use
*
1235 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1236 gimple stmt
, enum use_type use_type
)
1238 struct iv_use
*use
= XCNEW (struct iv_use
);
1240 use
->id
= n_iv_uses (data
);
1241 use
->type
= use_type
;
1245 use
->related_cands
= BITMAP_ALLOC (NULL
);
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1249 iv
->ssa_name
= NULL_TREE
;
1251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1252 dump_use (dump_file
, use
);
1254 data
->iv_uses
.safe_push (use
);
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1264 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1267 struct version_info
*info
;
1269 if (TREE_CODE (op
) != SSA_NAME
1270 || virtual_operand_p (op
))
1273 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1275 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1278 info
= name_info (data
, op
);
1280 info
->has_nonlin_use
|= nonlinear_use
;
1282 info
->inv_id
= ++data
->max_inv_id
;
1283 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1286 /* Checks whether the use OP is interesting and if so, records it. */
1288 static struct iv_use
*
1289 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1296 if (TREE_CODE (op
) != SSA_NAME
)
1299 iv
= get_iv (data
, op
);
1303 if (iv
->have_use_for
)
1305 use
= iv_use (data
, iv
->use_id
);
1307 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1311 if (integer_zerop (iv
->step
))
1313 record_invariant (data
, op
, true);
1316 iv
->have_use_for
= true;
1318 civ
= XNEW (struct iv
);
1321 stmt
= SSA_NAME_DEF_STMT (op
);
1322 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1323 || is_gimple_assign (stmt
));
1325 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1326 iv
->use_id
= use
->id
;
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1340 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1341 tree
**control_var
, tree
**bound
,
1342 struct iv
**iv_var
, struct iv
**iv_bound
)
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv
;
1347 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1348 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1351 if (gimple_code (stmt
) == GIMPLE_COND
)
1353 op0
= gimple_cond_lhs_ptr (stmt
);
1354 op1
= gimple_cond_rhs_ptr (stmt
);
1358 op0
= gimple_assign_rhs1_ptr (stmt
);
1359 op1
= gimple_assign_rhs2_ptr (stmt
);
1362 zero
= integer_zero_node
;
1363 const_iv
.step
= integer_zero_node
;
1365 if (TREE_CODE (*op0
) == SSA_NAME
)
1366 iv0
= get_iv (data
, *op0
);
1367 if (TREE_CODE (*op1
) == SSA_NAME
)
1368 iv1
= get_iv (data
, *op1
);
1370 /* Exactly one of the compared values must be an iv, and the other one must
1375 if (integer_zerop (iv0
->step
))
1377 /* Control variable may be on the other side. */
1378 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1379 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1381 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1385 *control_var
= op0
;;
1396 /* Checks whether the condition in STMT is interesting and if so,
1400 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1402 tree
*var_p
, *bound_p
;
1403 struct iv
*var_iv
, *civ
;
1405 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1407 find_interesting_uses_op (data
, *var_p
);
1408 find_interesting_uses_op (data
, *bound_p
);
1412 civ
= XNEW (struct iv
);
1414 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1423 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1428 if (is_gimple_min_invariant (expr
))
1429 return current_loops
->tree_root
;
1431 if (TREE_CODE (expr
) == SSA_NAME
)
1433 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1436 if (flow_bb_inside_loop_p (loop
, def_bb
))
1438 return superloop_at_depth (loop
,
1439 loop_depth (def_bb
->loop_father
) + 1);
1442 return current_loops
->tree_root
;
1448 unsigned maxdepth
= 0;
1449 len
= TREE_OPERAND_LENGTH (expr
);
1450 for (i
= 0; i
< len
; i
++)
1452 struct loop
*ivloop
;
1453 if (!TREE_OPERAND (expr
, i
))
1456 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1459 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1462 return superloop_at_depth (loop
, maxdepth
);
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1470 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1475 gcc_assert (loop_depth (loop
) > 0);
1477 if (is_gimple_min_invariant (expr
))
1480 if (TREE_CODE (expr
) == SSA_NAME
)
1482 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1484 && flow_bb_inside_loop_p (loop
, def_bb
))
1493 len
= TREE_OPERAND_LENGTH (expr
);
1494 for (i
= 0; i
< len
; i
++)
1495 if (TREE_OPERAND (expr
, i
)
1496 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1506 struct ifs_ivopts_data
1508 struct ivopts_data
*ivopts_data
;
1514 idx_find_step (tree base
, tree
*idx
, void *data
)
1516 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1518 tree step
, iv_base
, iv_step
, lbound
, off
;
1519 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1521 /* If base is a component ref, require that the offset of the reference
1523 if (TREE_CODE (base
) == COMPONENT_REF
)
1525 off
= component_ref_field_offset (base
);
1526 return expr_invariant_in_loop_p (loop
, off
);
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1540 step
= array_ref_element_size (base
);
1541 lbound
= array_ref_low_bound (base
);
1543 if (!expr_invariant_in_loop_p (loop
, step
)
1544 || !expr_invariant_in_loop_p (loop
, lbound
))
1548 if (TREE_CODE (*idx
) != SSA_NAME
)
1551 iv
= get_iv (dta
->ivopts_data
, *idx
);
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1560 if (integer_zerop (iv
->step
))
1563 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1565 step
= array_ref_element_size (base
);
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step
) != INTEGER_CST
)
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step
= size_one_node
;
1577 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1578 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1581 /* The index might wrap. */
1585 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1586 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1595 idx_record_use (tree base
, tree
*idx
,
1598 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1599 find_interesting_uses_op (data
, *idx
);
1600 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1602 find_interesting_uses_op (data
, array_ref_element_size (base
));
1603 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1614 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1617 enum tree_code code
;
1618 double_int res
, p0
, p1
;
1619 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1624 if (operand_equal_p (top
, bot
, 0))
1626 *mul
= double_int_one
;
1630 code
= TREE_CODE (top
);
1634 mby
= TREE_OPERAND (top
, 1);
1635 if (TREE_CODE (mby
) != INTEGER_CST
)
1638 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1641 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1646 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1647 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1650 if (code
== MINUS_EXPR
)
1652 *mul
= (p0
+ p1
).sext (precision
);
1656 if (TREE_CODE (bot
) != INTEGER_CST
)
1659 p0
= tree_to_double_int (top
).sext (precision
);
1660 p1
= tree_to_double_int (bot
).sext (precision
);
1663 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1664 return res
.is_zero ();
1671 /* Return true if memory reference REF with step STEP may be unaligned. */
1674 may_be_unaligned_p (tree ref
, tree step
)
1676 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677 thus they are not misaligned. */
1678 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1681 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1682 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1683 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1685 unsigned HOST_WIDE_INT bitpos
;
1686 unsigned int ref_align
;
1687 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1688 if (ref_align
< align
1689 || (bitpos
% align
) != 0
1690 || (bitpos
% BITS_PER_UNIT
) != 0)
1693 unsigned int trailing_zeros
= tree_ctz (step
);
1694 if (trailing_zeros
< HOST_BITS_PER_INT
1695 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1701 /* Return true if EXPR may be non-addressable. */
1704 may_be_nonaddressable_p (tree expr
)
1706 switch (TREE_CODE (expr
))
1708 case TARGET_MEM_REF
:
1709 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1710 target, thus they are always addressable. */
1714 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1715 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1717 case VIEW_CONVERT_EXPR
:
1718 /* This kind of view-conversions may wrap non-addressable objects
1719 and make them look addressable. After some processing the
1720 non-addressability may be uncovered again, causing ADDR_EXPRs
1721 of inappropriate objects to be built. */
1722 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1723 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1726 /* ... fall through ... */
1729 case ARRAY_RANGE_REF
:
1730 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1742 /* Finds addresses in *OP_P inside STMT. */
1745 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1747 tree base
= *op_p
, step
= size_zero_node
;
1749 struct ifs_ivopts_data ifs_ivopts_data
;
1751 /* Do not play with volatile memory references. A bit too conservative,
1752 perhaps, but safe. */
1753 if (gimple_has_volatile_ops (stmt
))
1756 /* Ignore bitfields for now. Not really something terribly complicated
1758 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1761 base
= unshare_expr (base
);
1763 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1765 tree type
= build_pointer_type (TREE_TYPE (base
));
1769 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1771 civ
= get_iv (data
, TMR_BASE (base
));
1775 TMR_BASE (base
) = civ
->base
;
1778 if (TMR_INDEX2 (base
)
1779 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1781 civ
= get_iv (data
, TMR_INDEX2 (base
));
1785 TMR_INDEX2 (base
) = civ
->base
;
1788 if (TMR_INDEX (base
)
1789 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1791 civ
= get_iv (data
, TMR_INDEX (base
));
1795 TMR_INDEX (base
) = civ
->base
;
1800 if (TMR_STEP (base
))
1801 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1803 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1807 if (integer_zerop (step
))
1809 base
= tree_mem_ref_addr (type
, base
);
1813 ifs_ivopts_data
.ivopts_data
= data
;
1814 ifs_ivopts_data
.stmt
= stmt
;
1815 ifs_ivopts_data
.step
= size_zero_node
;
1816 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1817 || integer_zerop (ifs_ivopts_data
.step
))
1819 step
= ifs_ivopts_data
.step
;
1821 /* Check that the base expression is addressable. This needs
1822 to be done after substituting bases of IVs into it. */
1823 if (may_be_nonaddressable_p (base
))
1826 /* Moreover, on strict alignment platforms, check that it is
1827 sufficiently aligned. */
1828 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1831 base
= build_fold_addr_expr (base
);
1833 /* Substituting bases of IVs into the base expression might
1834 have caused folding opportunities. */
1835 if (TREE_CODE (base
) == ADDR_EXPR
)
1837 tree
*ref
= &TREE_OPERAND (base
, 0);
1838 while (handled_component_p (*ref
))
1839 ref
= &TREE_OPERAND (*ref
, 0);
1840 if (TREE_CODE (*ref
) == MEM_REF
)
1842 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1843 TREE_OPERAND (*ref
, 0),
1844 TREE_OPERAND (*ref
, 1));
1851 civ
= alloc_iv (base
, step
);
1852 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1856 for_each_index (op_p
, idx_record_use
, data
);
1859 /* Finds and records invariants used in STMT. */
1862 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1865 use_operand_p use_p
;
1868 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1870 op
= USE_FROM_PTR (use_p
);
1871 record_invariant (data
, op
, false);
1875 /* Finds interesting uses of induction variables in the statement STMT. */
1878 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1881 tree op
, *lhs
, *rhs
;
1883 use_operand_p use_p
;
1884 enum tree_code code
;
1886 find_invariants_stmt (data
, stmt
);
1888 if (gimple_code (stmt
) == GIMPLE_COND
)
1890 find_interesting_uses_cond (data
, stmt
);
1894 if (is_gimple_assign (stmt
))
1896 lhs
= gimple_assign_lhs_ptr (stmt
);
1897 rhs
= gimple_assign_rhs1_ptr (stmt
);
1899 if (TREE_CODE (*lhs
) == SSA_NAME
)
1901 /* If the statement defines an induction variable, the uses are not
1902 interesting by themselves. */
1904 iv
= get_iv (data
, *lhs
);
1906 if (iv
&& !integer_zerop (iv
->step
))
1910 code
= gimple_assign_rhs_code (stmt
);
1911 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1912 && (REFERENCE_CLASS_P (*rhs
)
1913 || is_gimple_val (*rhs
)))
1915 if (REFERENCE_CLASS_P (*rhs
))
1916 find_interesting_uses_address (data
, stmt
, rhs
);
1918 find_interesting_uses_op (data
, *rhs
);
1920 if (REFERENCE_CLASS_P (*lhs
))
1921 find_interesting_uses_address (data
, stmt
, lhs
);
1924 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1926 find_interesting_uses_cond (data
, stmt
);
1930 /* TODO -- we should also handle address uses of type
1932 memory = call (whatever);
1939 if (gimple_code (stmt
) == GIMPLE_PHI
1940 && gimple_bb (stmt
) == data
->current_loop
->header
)
1942 iv
= get_iv (data
, PHI_RESULT (stmt
));
1944 if (iv
&& !integer_zerop (iv
->step
))
1948 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1950 op
= USE_FROM_PTR (use_p
);
1952 if (TREE_CODE (op
) != SSA_NAME
)
1955 iv
= get_iv (data
, op
);
1959 find_interesting_uses_op (data
, op
);
1963 /* Finds interesting uses of induction variables outside of loops
1964 on loop exit edge EXIT. */
1967 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1970 gimple_stmt_iterator psi
;
1973 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1975 phi
= gsi_stmt (psi
);
1976 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1977 if (!virtual_operand_p (def
))
1978 find_interesting_uses_op (data
, def
);
1982 /* Finds uses of the induction variables that are interesting. */
1985 find_interesting_uses (struct ivopts_data
*data
)
1988 gimple_stmt_iterator bsi
;
1989 basic_block
*body
= get_loop_body (data
->current_loop
);
1991 struct version_info
*info
;
1994 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1995 fprintf (dump_file
, "Uses:\n\n");
1997 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2002 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2003 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2004 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2005 find_interesting_uses_outside (data
, e
);
2007 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2008 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2009 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2010 if (!is_gimple_debug (gsi_stmt (bsi
)))
2011 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2018 fprintf (dump_file
, "\n");
2020 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2022 info
= ver_info (data
, i
);
2025 fprintf (dump_file
, " ");
2026 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2027 fprintf (dump_file
, " is invariant (%d)%s\n",
2028 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2032 fprintf (dump_file
, "\n");
2038 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2039 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2040 we are at the top-level of the processed address. */
2043 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2044 HOST_WIDE_INT
*offset
)
2046 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2047 enum tree_code code
;
2048 tree type
, orig_type
= TREE_TYPE (expr
);
2049 HOST_WIDE_INT off0
, off1
, st
;
2050 tree orig_expr
= expr
;
2054 type
= TREE_TYPE (expr
);
2055 code
= TREE_CODE (expr
);
2061 if (!cst_and_fits_in_hwi (expr
)
2062 || integer_zerop (expr
))
2065 *offset
= int_cst_value (expr
);
2066 return build_int_cst (orig_type
, 0);
2068 case POINTER_PLUS_EXPR
:
2071 op0
= TREE_OPERAND (expr
, 0);
2072 op1
= TREE_OPERAND (expr
, 1);
2074 op0
= strip_offset_1 (op0
, false, false, &off0
);
2075 op1
= strip_offset_1 (op1
, false, false, &off1
);
2077 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2078 if (op0
== TREE_OPERAND (expr
, 0)
2079 && op1
== TREE_OPERAND (expr
, 1))
2082 if (integer_zerop (op1
))
2084 else if (integer_zerop (op0
))
2086 if (code
== MINUS_EXPR
)
2087 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2092 expr
= fold_build2 (code
, type
, op0
, op1
);
2094 return fold_convert (orig_type
, expr
);
2097 op1
= TREE_OPERAND (expr
, 1);
2098 if (!cst_and_fits_in_hwi (op1
))
2101 op0
= TREE_OPERAND (expr
, 0);
2102 op0
= strip_offset_1 (op0
, false, false, &off0
);
2103 if (op0
== TREE_OPERAND (expr
, 0))
2106 *offset
= off0
* int_cst_value (op1
);
2107 if (integer_zerop (op0
))
2110 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2112 return fold_convert (orig_type
, expr
);
2115 case ARRAY_RANGE_REF
:
2119 step
= array_ref_element_size (expr
);
2120 if (!cst_and_fits_in_hwi (step
))
2123 st
= int_cst_value (step
);
2124 op1
= TREE_OPERAND (expr
, 1);
2125 op1
= strip_offset_1 (op1
, false, false, &off1
);
2126 *offset
= off1
* st
;
2129 && integer_zerop (op1
))
2131 /* Strip the component reference completely. */
2132 op0
= TREE_OPERAND (expr
, 0);
2133 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2146 tmp
= component_ref_field_offset (expr
);
2147 field
= TREE_OPERAND (expr
, 1);
2149 && cst_and_fits_in_hwi (tmp
)
2150 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2152 HOST_WIDE_INT boffset
, abs_off
;
2154 /* Strip the component reference completely. */
2155 op0
= TREE_OPERAND (expr
, 0);
2156 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2157 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2158 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2162 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2169 op0
= TREE_OPERAND (expr
, 0);
2170 op0
= strip_offset_1 (op0
, true, true, &off0
);
2173 if (op0
== TREE_OPERAND (expr
, 0))
2176 expr
= build_fold_addr_expr (op0
);
2177 return fold_convert (orig_type
, expr
);
2180 /* ??? Offset operand? */
2181 inside_addr
= false;
2188 /* Default handling of expressions for that we want to recurse into
2189 the first operand. */
2190 op0
= TREE_OPERAND (expr
, 0);
2191 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2194 if (op0
== TREE_OPERAND (expr
, 0)
2195 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2198 expr
= copy_node (expr
);
2199 TREE_OPERAND (expr
, 0) = op0
;
2201 TREE_OPERAND (expr
, 1) = op1
;
2203 /* Inside address, we might strip the top level component references,
2204 thus changing type of the expression. Handling of ADDR_EXPR
2206 expr
= fold_convert (orig_type
, expr
);
2211 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2214 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2217 tree core
= strip_offset_1 (expr
, false, false, &off
);
2222 /* Returns variant of TYPE that can be used as base for different uses.
2223 We return unsigned type with the same precision, which avoids problems
2227 generic_type_for (tree type
)
2229 if (POINTER_TYPE_P (type
))
2230 return unsigned_type_for (type
);
2232 if (TYPE_UNSIGNED (type
))
2235 return unsigned_type_for (type
);
2238 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2239 the bitmap to that we should store it. */
2241 static struct ivopts_data
*fd_ivopts_data
;
2243 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2245 bitmap
*depends_on
= (bitmap
*) data
;
2246 struct version_info
*info
;
2248 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2250 info
= name_info (fd_ivopts_data
, *expr_p
);
2252 if (!info
->inv_id
|| info
->has_nonlin_use
)
2256 *depends_on
= BITMAP_ALLOC (NULL
);
2257 bitmap_set_bit (*depends_on
, info
->inv_id
);
2262 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2263 position to POS. If USE is not NULL, the candidate is set as related to
2264 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2265 replacement of the final value of the iv by a direct computation. */
2267 static struct iv_cand
*
2268 add_candidate_1 (struct ivopts_data
*data
,
2269 tree base
, tree step
, bool important
, enum iv_position pos
,
2270 struct iv_use
*use
, gimple incremented_at
)
2273 struct iv_cand
*cand
= NULL
;
2274 tree type
, orig_type
;
2276 /* For non-original variables, make sure their values are computed in a type
2277 that does not invoke undefined behavior on overflows (since in general,
2278 we cannot prove that these induction variables are non-wrapping). */
2279 if (pos
!= IP_ORIGINAL
)
2281 orig_type
= TREE_TYPE (base
);
2282 type
= generic_type_for (orig_type
);
2283 if (type
!= orig_type
)
2285 base
= fold_convert (type
, base
);
2286 step
= fold_convert (type
, step
);
2290 for (i
= 0; i
< n_iv_cands (data
); i
++)
2292 cand
= iv_cand (data
, i
);
2294 if (cand
->pos
!= pos
)
2297 if (cand
->incremented_at
!= incremented_at
2298 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2299 && cand
->ainc_use
!= use
))
2313 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2314 && operand_equal_p (step
, cand
->iv
->step
, 0)
2315 && (TYPE_PRECISION (TREE_TYPE (base
))
2316 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2320 if (i
== n_iv_cands (data
))
2322 cand
= XCNEW (struct iv_cand
);
2328 cand
->iv
= alloc_iv (base
, step
);
2331 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2333 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2334 cand
->var_after
= cand
->var_before
;
2336 cand
->important
= important
;
2337 cand
->incremented_at
= incremented_at
;
2338 data
->iv_candidates
.safe_push (cand
);
2341 && TREE_CODE (step
) != INTEGER_CST
)
2343 fd_ivopts_data
= data
;
2344 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2347 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2348 cand
->ainc_use
= use
;
2350 cand
->ainc_use
= NULL
;
2352 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2353 dump_cand (dump_file
, cand
);
2356 if (important
&& !cand
->important
)
2358 cand
->important
= true;
2359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2360 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2365 bitmap_set_bit (use
->related_cands
, i
);
2366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2367 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2374 /* Returns true if incrementing the induction variable at the end of the LOOP
2377 The purpose is to avoid splitting latch edge with a biv increment, thus
2378 creating a jump, possibly confusing other optimization passes and leaving
2379 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2380 is not available (so we do not have a better alternative), or if the latch
2381 edge is already nonempty. */
2384 allow_ip_end_pos_p (struct loop
*loop
)
2386 if (!ip_normal_pos (loop
))
2389 if (!empty_block_p (ip_end_pos (loop
)))
2395 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2396 Important field is set to IMPORTANT. */
2399 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2400 bool important
, struct iv_use
*use
)
2402 basic_block use_bb
= gimple_bb (use
->stmt
);
2403 enum machine_mode mem_mode
;
2404 unsigned HOST_WIDE_INT cstepi
;
2406 /* If we insert the increment in any position other than the standard
2407 ones, we must ensure that it is incremented once per iteration.
2408 It must not be in an inner nested loop, or one side of an if
2410 if (use_bb
->loop_father
!= data
->current_loop
2411 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2412 || stmt_could_throw_p (use
->stmt
)
2413 || !cst_and_fits_in_hwi (step
))
2416 cstepi
= int_cst_value (step
);
2418 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2419 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2420 || USE_STORE_PRE_INCREMENT (mem_mode
))
2421 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2422 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2423 || USE_STORE_PRE_DECREMENT (mem_mode
))
2424 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2426 enum tree_code code
= MINUS_EXPR
;
2428 tree new_step
= step
;
2430 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2432 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2433 code
= POINTER_PLUS_EXPR
;
2436 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2437 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2438 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2441 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2442 || USE_STORE_POST_INCREMENT (mem_mode
))
2443 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2444 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2445 || USE_STORE_POST_DECREMENT (mem_mode
))
2446 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2448 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2453 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2454 position to POS. If USE is not NULL, the candidate is set as related to
2455 it. The candidate computation is scheduled on all available positions. */
2458 add_candidate (struct ivopts_data
*data
,
2459 tree base
, tree step
, bool important
, struct iv_use
*use
)
2461 if (ip_normal_pos (data
->current_loop
))
2462 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2463 if (ip_end_pos (data
->current_loop
)
2464 && allow_ip_end_pos_p (data
->current_loop
))
2465 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2467 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2468 add_autoinc_candidates (data
, base
, step
, important
, use
);
2471 /* Adds standard iv candidates. */
2474 add_standard_iv_candidates (struct ivopts_data
*data
)
2476 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2478 /* The same for a double-integer type if it is still fast enough. */
2480 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2481 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2482 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2483 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2485 /* The same for a double-integer type if it is still fast enough. */
2487 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2488 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2489 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2490 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2494 /* Adds candidates bases on the old induction variable IV. */
2497 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2501 struct iv_cand
*cand
;
2503 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2505 /* The same, but with initial value zero. */
2506 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2507 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2509 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2510 iv
->step
, true, NULL
);
2512 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2513 if (gimple_code (phi
) == GIMPLE_PHI
)
2515 /* Additionally record the possibility of leaving the original iv
2517 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2518 /* Don't add candidate if it's from another PHI node because
2519 it's an affine iv appearing in the form of PEELED_CHREC. */
2520 phi
= SSA_NAME_DEF_STMT (def
);
2521 if (gimple_code (phi
) != GIMPLE_PHI
)
2523 cand
= add_candidate_1 (data
,
2524 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2525 SSA_NAME_DEF_STMT (def
));
2526 cand
->var_before
= iv
->ssa_name
;
2527 cand
->var_after
= def
;
2530 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2534 /* Adds candidates based on the old induction variables. */
2537 add_old_ivs_candidates (struct ivopts_data
*data
)
2543 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2545 iv
= ver_info (data
, i
)->iv
;
2546 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2547 add_old_iv_candidates (data
, iv
);
2551 /* Adds candidates based on the value of the induction variable IV and USE. */
2554 add_iv_value_candidates (struct ivopts_data
*data
,
2555 struct iv
*iv
, struct iv_use
*use
)
2557 unsigned HOST_WIDE_INT offset
;
2561 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2563 /* The same, but with initial value zero. Make such variable important,
2564 since it is generic enough so that possibly many uses may be based
2566 basetype
= TREE_TYPE (iv
->base
);
2567 if (POINTER_TYPE_P (basetype
))
2568 basetype
= sizetype
;
2569 add_candidate (data
, build_int_cst (basetype
, 0),
2570 iv
->step
, true, use
);
2572 /* Third, try removing the constant offset. Make sure to even
2573 add a candidate for &a[0] vs. (T *)&a. */
2574 base
= strip_offset (iv
->base
, &offset
);
2576 || base
!= iv
->base
)
2577 add_candidate (data
, base
, iv
->step
, false, use
);
2580 /* Adds candidates based on the uses. */
2583 add_derived_ivs_candidates (struct ivopts_data
*data
)
2587 for (i
= 0; i
< n_iv_uses (data
); i
++)
2589 struct iv_use
*use
= iv_use (data
, i
);
2596 case USE_NONLINEAR_EXPR
:
2599 /* Just add the ivs based on the value of the iv used here. */
2600 add_iv_value_candidates (data
, use
->iv
, use
);
2609 /* Record important candidates and add them to related_cands bitmaps
2613 record_important_candidates (struct ivopts_data
*data
)
2618 for (i
= 0; i
< n_iv_cands (data
); i
++)
2620 struct iv_cand
*cand
= iv_cand (data
, i
);
2622 if (cand
->important
)
2623 bitmap_set_bit (data
->important_candidates
, i
);
2626 data
->consider_all_candidates
= (n_iv_cands (data
)
2627 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2629 if (data
->consider_all_candidates
)
2631 /* We will not need "related_cands" bitmaps in this case,
2632 so release them to decrease peak memory consumption. */
2633 for (i
= 0; i
< n_iv_uses (data
); i
++)
2635 use
= iv_use (data
, i
);
2636 BITMAP_FREE (use
->related_cands
);
2641 /* Add important candidates to the related_cands bitmaps. */
2642 for (i
= 0; i
< n_iv_uses (data
); i
++)
2643 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2644 data
->important_candidates
);
2648 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2649 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2650 we allocate a simple list to every use. */
2653 alloc_use_cost_map (struct ivopts_data
*data
)
2655 unsigned i
, size
, s
;
2657 for (i
= 0; i
< n_iv_uses (data
); i
++)
2659 struct iv_use
*use
= iv_use (data
, i
);
2661 if (data
->consider_all_candidates
)
2662 size
= n_iv_cands (data
);
2665 s
= bitmap_count_bits (use
->related_cands
);
2667 /* Round up to the power of two, so that moduling by it is fast. */
2668 size
= s
? (1 << ceil_log2 (s
)) : 1;
2671 use
->n_map_members
= size
;
2672 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2676 /* Returns description of computation cost of expression whose runtime
2677 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2680 new_cost (unsigned runtime
, unsigned complexity
)
2684 cost
.cost
= runtime
;
2685 cost
.complexity
= complexity
;
2690 /* Adds costs COST1 and COST2. */
2693 add_costs (comp_cost cost1
, comp_cost cost2
)
2695 cost1
.cost
+= cost2
.cost
;
2696 cost1
.complexity
+= cost2
.complexity
;
2700 /* Subtracts costs COST1 and COST2. */
2703 sub_costs (comp_cost cost1
, comp_cost cost2
)
2705 cost1
.cost
-= cost2
.cost
;
2706 cost1
.complexity
-= cost2
.complexity
;
2711 /* Returns a negative number if COST1 < COST2, a positive number if
2712 COST1 > COST2, and 0 if COST1 = COST2. */
2715 compare_costs (comp_cost cost1
, comp_cost cost2
)
2717 if (cost1
.cost
== cost2
.cost
)
2718 return cost1
.complexity
- cost2
.complexity
;
2720 return cost1
.cost
- cost2
.cost
;
2723 /* Returns true if COST is infinite. */
2726 infinite_cost_p (comp_cost cost
)
2728 return cost
.cost
== INFTY
;
2731 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2732 on invariants DEPENDS_ON and that the value used in expressing it
2733 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2736 set_use_iv_cost (struct ivopts_data
*data
,
2737 struct iv_use
*use
, struct iv_cand
*cand
,
2738 comp_cost cost
, bitmap depends_on
, tree value
,
2739 enum tree_code comp
, int inv_expr_id
)
2743 if (infinite_cost_p (cost
))
2745 BITMAP_FREE (depends_on
);
2749 if (data
->consider_all_candidates
)
2751 use
->cost_map
[cand
->id
].cand
= cand
;
2752 use
->cost_map
[cand
->id
].cost
= cost
;
2753 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2754 use
->cost_map
[cand
->id
].value
= value
;
2755 use
->cost_map
[cand
->id
].comp
= comp
;
2756 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2760 /* n_map_members is a power of two, so this computes modulo. */
2761 s
= cand
->id
& (use
->n_map_members
- 1);
2762 for (i
= s
; i
< use
->n_map_members
; i
++)
2763 if (!use
->cost_map
[i
].cand
)
2765 for (i
= 0; i
< s
; i
++)
2766 if (!use
->cost_map
[i
].cand
)
2772 use
->cost_map
[i
].cand
= cand
;
2773 use
->cost_map
[i
].cost
= cost
;
2774 use
->cost_map
[i
].depends_on
= depends_on
;
2775 use
->cost_map
[i
].value
= value
;
2776 use
->cost_map
[i
].comp
= comp
;
2777 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2780 /* Gets cost of (USE, CANDIDATE) pair. */
2782 static struct cost_pair
*
2783 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2784 struct iv_cand
*cand
)
2787 struct cost_pair
*ret
;
2792 if (data
->consider_all_candidates
)
2794 ret
= use
->cost_map
+ cand
->id
;
2801 /* n_map_members is a power of two, so this computes modulo. */
2802 s
= cand
->id
& (use
->n_map_members
- 1);
2803 for (i
= s
; i
< use
->n_map_members
; i
++)
2804 if (use
->cost_map
[i
].cand
== cand
)
2805 return use
->cost_map
+ i
;
2806 else if (use
->cost_map
[i
].cand
== NULL
)
2808 for (i
= 0; i
< s
; i
++)
2809 if (use
->cost_map
[i
].cand
== cand
)
2810 return use
->cost_map
+ i
;
2811 else if (use
->cost_map
[i
].cand
== NULL
)
2817 /* Returns estimate on cost of computing SEQ. */
2820 seq_cost (rtx seq
, bool speed
)
2825 for (; seq
; seq
= NEXT_INSN (seq
))
2827 set
= single_set (seq
);
2829 cost
+= set_src_cost (SET_SRC (set
), speed
);
2837 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2839 produce_memory_decl_rtl (tree obj
, int *regno
)
2841 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2842 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2846 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2848 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2849 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2850 SET_SYMBOL_REF_DECL (x
, obj
);
2851 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2852 set_mem_addr_space (x
, as
);
2853 targetm
.encode_section_info (obj
, x
, true);
2857 x
= gen_raw_REG (address_mode
, (*regno
)++);
2858 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2859 set_mem_addr_space (x
, as
);
2865 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2866 walk_tree. DATA contains the actual fake register number. */
2869 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2871 tree obj
= NULL_TREE
;
2873 int *regno
= (int *) data
;
2875 switch (TREE_CODE (*expr_p
))
2878 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2879 handled_component_p (*expr_p
);
2880 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2883 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2884 x
= produce_memory_decl_rtl (obj
, regno
);
2889 obj
= SSA_NAME_VAR (*expr_p
);
2890 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2893 if (!DECL_RTL_SET_P (obj
))
2894 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2903 if (DECL_RTL_SET_P (obj
))
2906 if (DECL_MODE (obj
) == BLKmode
)
2907 x
= produce_memory_decl_rtl (obj
, regno
);
2909 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2919 decl_rtl_to_reset
.safe_push (obj
);
2920 SET_DECL_RTL (obj
, x
);
2926 /* Determines cost of the computation of EXPR. */
2929 computation_cost (tree expr
, bool speed
)
2932 tree type
= TREE_TYPE (expr
);
2934 /* Avoid using hard regs in ways which may be unsupported. */
2935 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2936 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2937 enum node_frequency real_frequency
= node
->frequency
;
2939 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2940 crtl
->maybe_hot_insn_p
= speed
;
2941 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2943 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2946 default_rtl_profile ();
2947 node
->frequency
= real_frequency
;
2949 cost
= seq_cost (seq
, speed
);
2951 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2952 TYPE_ADDR_SPACE (type
), speed
);
2953 else if (!REG_P (rslt
))
2954 cost
+= set_src_cost (rslt
, speed
);
2959 /* Returns variable containing the value of candidate CAND at statement AT. */
2962 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2964 if (stmt_after_increment (loop
, cand
, stmt
))
2965 return cand
->var_after
;
2967 return cand
->var_before
;
2970 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2971 same precision that is at least as wide as the precision of TYPE, stores
2972 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2976 determine_common_wider_type (tree
*a
, tree
*b
)
2978 tree wider_type
= NULL
;
2980 tree atype
= TREE_TYPE (*a
);
2982 if (CONVERT_EXPR_P (*a
))
2984 suba
= TREE_OPERAND (*a
, 0);
2985 wider_type
= TREE_TYPE (suba
);
2986 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2992 if (CONVERT_EXPR_P (*b
))
2994 subb
= TREE_OPERAND (*b
, 0);
2995 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3006 /* Determines the expression by that USE is expressed from induction variable
3007 CAND at statement AT in LOOP. The expression is stored in a decomposed
3008 form into AFF. Returns false if USE cannot be expressed using CAND. */
3011 get_computation_aff (struct loop
*loop
,
3012 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3013 struct aff_tree
*aff
)
3015 tree ubase
= use
->iv
->base
;
3016 tree ustep
= use
->iv
->step
;
3017 tree cbase
= cand
->iv
->base
;
3018 tree cstep
= cand
->iv
->step
, cstep_common
;
3019 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3020 tree common_type
, var
;
3022 aff_tree cbase_aff
, var_aff
;
3025 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3027 /* We do not have a precision to express the values of use. */
3031 var
= var_at_stmt (loop
, cand
, at
);
3032 uutype
= unsigned_type_for (utype
);
3034 /* If the conversion is not noop, perform it. */
3035 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3037 cstep
= fold_convert (uutype
, cstep
);
3038 cbase
= fold_convert (uutype
, cbase
);
3039 var
= fold_convert (uutype
, var
);
3042 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3045 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3046 type, we achieve better folding by computing their difference in this
3047 wider type, and cast the result to UUTYPE. We do not need to worry about
3048 overflows, as all the arithmetics will in the end be performed in UUTYPE
3050 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3052 /* use = ubase - ratio * cbase + ratio * var. */
3053 tree_to_aff_combination (ubase
, common_type
, aff
);
3054 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3055 tree_to_aff_combination (var
, uutype
, &var_aff
);
3057 /* We need to shift the value if we are after the increment. */
3058 if (stmt_after_increment (loop
, cand
, at
))
3062 if (common_type
!= uutype
)
3063 cstep_common
= fold_convert (common_type
, cstep
);
3065 cstep_common
= cstep
;
3067 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3068 aff_combination_add (&cbase_aff
, &cstep_aff
);
3071 aff_combination_scale (&cbase_aff
, -rat
);
3072 aff_combination_add (aff
, &cbase_aff
);
3073 if (common_type
!= uutype
)
3074 aff_combination_convert (aff
, uutype
);
3076 aff_combination_scale (&var_aff
, rat
);
3077 aff_combination_add (aff
, &var_aff
);
3082 /* Return the type of USE. */
3085 get_use_type (struct iv_use
*use
)
3087 tree base_type
= TREE_TYPE (use
->iv
->base
);
3090 if (use
->type
== USE_ADDRESS
)
3092 /* The base_type may be a void pointer. Create a pointer type based on
3093 the mem_ref instead. */
3094 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3095 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3096 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3104 /* Determines the expression by that USE is expressed from induction variable
3105 CAND at statement AT in LOOP. The computation is unshared. */
3108 get_computation_at (struct loop
*loop
,
3109 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3112 tree type
= get_use_type (use
);
3114 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3116 unshare_aff_combination (&aff
);
3117 return fold_convert (type
, aff_combination_to_tree (&aff
));
3120 /* Determines the expression by that USE is expressed from induction variable
3121 CAND in LOOP. The computation is unshared. */
3124 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3126 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3129 /* Adjust the cost COST for being in loop setup rather than loop body.
3130 If we're optimizing for space, the loop setup overhead is constant;
3131 if we're optimizing for speed, amortize it over the per-iteration cost. */
3133 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3137 else if (optimize_loop_for_speed_p (data
->current_loop
))
3138 return cost
/ avg_loop_niter (data
->current_loop
);
3143 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3144 validity for a memory reference accessing memory of mode MODE in
3145 address space AS. */
3149 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3152 #define MAX_RATIO 128
3153 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3154 static vec
<sbitmap
> valid_mult_list
;
3157 if (data_index
>= valid_mult_list
.length ())
3158 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3160 valid_mult
= valid_mult_list
[data_index
];
3163 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3164 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3165 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3169 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3170 bitmap_clear (valid_mult
);
3171 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3172 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3173 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3175 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3176 if (memory_address_addr_space_p (mode
, addr
, as
)
3177 || memory_address_addr_space_p (mode
, scaled
, as
))
3178 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3181 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3183 fprintf (dump_file
, " allowed multipliers:");
3184 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3185 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3186 fprintf (dump_file
, " %d", (int) i
);
3187 fprintf (dump_file
, "\n");
3188 fprintf (dump_file
, "\n");
3191 valid_mult_list
[data_index
] = valid_mult
;
3194 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3197 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3200 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3201 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3202 variable is omitted. Compute the cost for a memory reference that accesses
3203 a memory location of mode MEM_MODE in address space AS.
3205 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3206 size of MEM_MODE / RATIO) is available. To make this determination, we
3207 look at the size of the increment to be made, which is given in CSTEP.
3208 CSTEP may be zero if the step is unknown.
3209 STMT_AFTER_INC is true iff the statement we're looking at is after the
3210 increment of the original biv.
3212 TODO -- there must be some better way. This all is quite crude. */
3216 AINC_PRE_INC
, /* Pre increment. */
3217 AINC_PRE_DEC
, /* Pre decrement. */
3218 AINC_POST_INC
, /* Post increment. */
3219 AINC_POST_DEC
, /* Post decrement. */
3220 AINC_NONE
/* Also the number of auto increment types. */
3223 typedef struct address_cost_data_s
3225 HOST_WIDE_INT min_offset
, max_offset
;
3226 unsigned costs
[2][2][2][2];
3227 unsigned ainc_costs
[AINC_NONE
];
3228 } *address_cost_data
;
3232 get_address_cost (bool symbol_present
, bool var_present
,
3233 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3234 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3235 addr_space_t as
, bool speed
,
3236 bool stmt_after_inc
, bool *may_autoinc
)
3238 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3239 static vec
<address_cost_data
> address_cost_data_list
;
3240 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3241 address_cost_data data
;
3242 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3243 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3244 unsigned cost
, acost
, complexity
;
3245 enum ainc_type autoinc_type
;
3246 bool offset_p
, ratio_p
, autoinc
;
3247 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3248 unsigned HOST_WIDE_INT mask
;
3251 if (data_index
>= address_cost_data_list
.length ())
3252 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3254 data
= address_cost_data_list
[data_index
];
3258 HOST_WIDE_INT rat
, off
= 0;
3259 int old_cse_not_expected
, width
;
3260 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3261 rtx seq
, addr
, base
;
3264 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3266 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3268 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3269 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3270 width
= HOST_BITS_PER_WIDE_INT
- 1;
3271 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3273 for (i
= width
; i
>= 0; i
--)
3275 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3276 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3277 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3280 data
->min_offset
= (i
== -1? 0 : off
);
3282 for (i
= width
; i
>= 0; i
--)
3284 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3285 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3286 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3291 data
->max_offset
= off
;
3293 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3295 fprintf (dump_file
, "get_address_cost:\n");
3296 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3297 GET_MODE_NAME (mem_mode
),
3299 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3300 GET_MODE_NAME (mem_mode
),
3305 for (i
= 2; i
<= MAX_RATIO
; i
++)
3306 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3312 /* Compute the cost of various addressing modes. */
3314 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3315 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3317 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3318 || USE_STORE_PRE_DECREMENT (mem_mode
))
3320 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3321 has_predec
[mem_mode
]
3322 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3324 if (has_predec
[mem_mode
])
3325 data
->ainc_costs
[AINC_PRE_DEC
]
3326 = address_cost (addr
, mem_mode
, as
, speed
);
3328 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3329 || USE_STORE_POST_DECREMENT (mem_mode
))
3331 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3332 has_postdec
[mem_mode
]
3333 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3335 if (has_postdec
[mem_mode
])
3336 data
->ainc_costs
[AINC_POST_DEC
]
3337 = address_cost (addr
, mem_mode
, as
, speed
);
3339 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3340 || USE_STORE_PRE_DECREMENT (mem_mode
))
3342 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3343 has_preinc
[mem_mode
]
3344 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3346 if (has_preinc
[mem_mode
])
3347 data
->ainc_costs
[AINC_PRE_INC
]
3348 = address_cost (addr
, mem_mode
, as
, speed
);
3350 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3351 || USE_STORE_POST_INCREMENT (mem_mode
))
3353 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3354 has_postinc
[mem_mode
]
3355 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3357 if (has_postinc
[mem_mode
])
3358 data
->ainc_costs
[AINC_POST_INC
]
3359 = address_cost (addr
, mem_mode
, as
, speed
);
3361 for (i
= 0; i
< 16; i
++)
3364 var_p
= (i
>> 1) & 1;
3365 off_p
= (i
>> 2) & 1;
3366 rat_p
= (i
>> 3) & 1;
3370 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3371 gen_int_mode (rat
, address_mode
));
3374 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3378 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3379 /* ??? We can run into trouble with some backends by presenting
3380 it with symbols which haven't been properly passed through
3381 targetm.encode_section_info. By setting the local bit, we
3382 enhance the probability of things working. */
3383 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3386 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3388 (PLUS
, address_mode
, base
,
3389 gen_int_mode (off
, address_mode
)));
3392 base
= gen_int_mode (off
, address_mode
);
3397 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3400 /* To avoid splitting addressing modes, pretend that no cse will
3402 old_cse_not_expected
= cse_not_expected
;
3403 cse_not_expected
= true;
3404 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3405 cse_not_expected
= old_cse_not_expected
;
3409 acost
= seq_cost (seq
, speed
);
3410 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3414 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3417 /* On some targets, it is quite expensive to load symbol to a register,
3418 which makes addresses that contain symbols look much more expensive.
3419 However, the symbol will have to be loaded in any case before the
3420 loop (and quite likely we have it in register already), so it does not
3421 make much sense to penalize them too heavily. So make some final
3422 tweaks for the SYMBOL_PRESENT modes:
3424 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3425 var is cheaper, use this mode with small penalty.
3426 If VAR_PRESENT is true, try whether the mode with
3427 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3428 if this is the case, use it. */
3429 add_c
= add_cost (speed
, address_mode
);
3430 for (i
= 0; i
< 8; i
++)
3433 off_p
= (i
>> 1) & 1;
3434 rat_p
= (i
>> 2) & 1;
3436 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3440 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3441 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3446 fprintf (dump_file
, "Address costs:\n");
3448 for (i
= 0; i
< 16; i
++)
3451 var_p
= (i
>> 1) & 1;
3452 off_p
= (i
>> 2) & 1;
3453 rat_p
= (i
>> 3) & 1;
3455 fprintf (dump_file
, " ");
3457 fprintf (dump_file
, "sym + ");
3459 fprintf (dump_file
, "var + ");
3461 fprintf (dump_file
, "cst + ");
3463 fprintf (dump_file
, "rat * ");
3465 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3466 fprintf (dump_file
, "index costs %d\n", acost
);
3468 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3469 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3470 fprintf (dump_file
, " May include autoinc/dec\n");
3471 fprintf (dump_file
, "\n");
3474 address_cost_data_list
[data_index
] = data
;
3477 bits
= GET_MODE_BITSIZE (address_mode
);
3478 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3480 if ((offset
>> (bits
- 1) & 1))
3485 autoinc_type
= AINC_NONE
;
3486 msize
= GET_MODE_SIZE (mem_mode
);
3487 autoinc_offset
= offset
;
3489 autoinc_offset
+= ratio
* cstep
;
3490 if (symbol_present
|| var_present
|| ratio
!= 1)
3494 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3496 autoinc_type
= AINC_POST_INC
;
3497 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3499 autoinc_type
= AINC_POST_DEC
;
3500 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3502 autoinc_type
= AINC_PRE_INC
;
3503 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3505 autoinc_type
= AINC_PRE_DEC
;
3507 if (autoinc_type
!= AINC_NONE
)
3512 offset_p
= (s_offset
!= 0
3513 && data
->min_offset
<= s_offset
3514 && s_offset
<= data
->max_offset
);
3515 ratio_p
= (ratio
!= 1
3516 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3518 if (ratio
!= 1 && !ratio_p
)
3519 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3521 if (s_offset
&& !offset_p
&& !symbol_present
)
3522 cost
+= add_cost (speed
, address_mode
);
3525 *may_autoinc
= autoinc
;
3527 acost
= data
->ainc_costs
[autoinc_type
];
3529 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3530 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3531 return new_cost (cost
+ acost
, complexity
);
3534 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3535 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3536 calculating the operands of EXPR. Returns true if successful, and returns
3537 the cost in COST. */
3540 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3541 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3544 tree op1
= TREE_OPERAND (expr
, 1);
3545 tree cst
= TREE_OPERAND (mult
, 1);
3546 tree multop
= TREE_OPERAND (mult
, 0);
3547 int m
= exact_log2 (int_cst_value (cst
));
3548 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3550 bool equal_p
= false;
3552 if (!(m
>= 0 && m
< maxm
))
3555 if (operand_equal_p (op1
, mult
, 0))
3558 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3559 ? shiftadd_cost (speed
, mode
, m
)
3561 ? shiftsub1_cost (speed
, mode
, m
)
3562 : shiftsub0_cost (speed
, mode
, m
)));
3563 res
= new_cost (sa_cost
, 0);
3564 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3566 STRIP_NOPS (multop
);
3567 if (!is_gimple_val (multop
))
3568 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3574 /* Estimates cost of forcing expression EXPR into a variable. */
3577 force_expr_to_var_cost (tree expr
, bool speed
)
3579 static bool costs_initialized
= false;
3580 static unsigned integer_cost
[2];
3581 static unsigned symbol_cost
[2];
3582 static unsigned address_cost
[2];
3584 comp_cost cost0
, cost1
, cost
;
3585 enum machine_mode mode
;
3587 if (!costs_initialized
)
3589 tree type
= build_pointer_type (integer_type_node
);
3594 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3595 TREE_STATIC (var
) = 1;
3596 x
= produce_memory_decl_rtl (var
, NULL
);
3597 SET_DECL_RTL (var
, x
);
3599 addr
= build1 (ADDR_EXPR
, type
, var
);
3602 for (i
= 0; i
< 2; i
++)
3604 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3607 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3610 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3611 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3613 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3614 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3615 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3616 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3617 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3618 fprintf (dump_file
, "\n");
3622 costs_initialized
= true;
3627 if (SSA_VAR_P (expr
))
3630 if (is_gimple_min_invariant (expr
))
3632 if (TREE_CODE (expr
) == INTEGER_CST
)
3633 return new_cost (integer_cost
[speed
], 0);
3635 if (TREE_CODE (expr
) == ADDR_EXPR
)
3637 tree obj
= TREE_OPERAND (expr
, 0);
3639 if (TREE_CODE (obj
) == VAR_DECL
3640 || TREE_CODE (obj
) == PARM_DECL
3641 || TREE_CODE (obj
) == RESULT_DECL
)
3642 return new_cost (symbol_cost
[speed
], 0);
3645 return new_cost (address_cost
[speed
], 0);
3648 switch (TREE_CODE (expr
))
3650 case POINTER_PLUS_EXPR
:
3654 op0
= TREE_OPERAND (expr
, 0);
3655 op1
= TREE_OPERAND (expr
, 1);
3662 op0
= TREE_OPERAND (expr
, 0);
3668 /* Just an arbitrary value, FIXME. */
3669 return new_cost (target_spill_cost
[speed
], 0);
3672 if (op0
== NULL_TREE
3673 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3676 cost0
= force_expr_to_var_cost (op0
, speed
);
3678 if (op1
== NULL_TREE
3679 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3682 cost1
= force_expr_to_var_cost (op1
, speed
);
3684 mode
= TYPE_MODE (TREE_TYPE (expr
));
3685 switch (TREE_CODE (expr
))
3687 case POINTER_PLUS_EXPR
:
3691 cost
= new_cost (add_cost (speed
, mode
), 0);
3692 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3694 tree mult
= NULL_TREE
;
3696 if (TREE_CODE (op1
) == MULT_EXPR
)
3698 else if (TREE_CODE (op0
) == MULT_EXPR
)
3701 if (mult
!= NULL_TREE
3702 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3703 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3711 tree inner_mode
, outer_mode
;
3712 outer_mode
= TREE_TYPE (expr
);
3713 inner_mode
= TREE_TYPE (op0
);
3714 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3715 TYPE_MODE (inner_mode
), speed
), 0);
3720 if (cst_and_fits_in_hwi (op0
))
3721 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3723 else if (cst_and_fits_in_hwi (op1
))
3724 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3727 return new_cost (target_spill_cost
[speed
], 0);
3734 cost
= add_costs (cost
, cost0
);
3735 cost
= add_costs (cost
, cost1
);
3737 /* Bound the cost by target_spill_cost. The parts of complicated
3738 computations often are either loop invariant or at least can
3739 be shared between several iv uses, so letting this grow without
3740 limits would not give reasonable results. */
3741 if (cost
.cost
> (int) target_spill_cost
[speed
])
3742 cost
.cost
= target_spill_cost
[speed
];
3747 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3748 invariants the computation depends on. */
3751 force_var_cost (struct ivopts_data
*data
,
3752 tree expr
, bitmap
*depends_on
)
3756 fd_ivopts_data
= data
;
3757 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3760 return force_expr_to_var_cost (expr
, data
->speed
);
3763 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3764 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3765 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3766 invariants the computation depends on. */
3769 split_address_cost (struct ivopts_data
*data
,
3770 tree addr
, bool *symbol_present
, bool *var_present
,
3771 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3774 HOST_WIDE_INT bitsize
;
3775 HOST_WIDE_INT bitpos
;
3777 enum machine_mode mode
;
3778 int unsignedp
, volatilep
;
3780 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3781 &unsignedp
, &volatilep
, false);
3784 || bitpos
% BITS_PER_UNIT
!= 0
3785 || TREE_CODE (core
) != VAR_DECL
)
3787 *symbol_present
= false;
3788 *var_present
= true;
3789 fd_ivopts_data
= data
;
3790 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3791 return new_cost (target_spill_cost
[data
->speed
], 0);
3794 *offset
+= bitpos
/ BITS_PER_UNIT
;
3795 if (TREE_STATIC (core
)
3796 || DECL_EXTERNAL (core
))
3798 *symbol_present
= true;
3799 *var_present
= false;
3803 *symbol_present
= false;
3804 *var_present
= true;
3808 /* Estimates cost of expressing difference of addresses E1 - E2 as
3809 var + symbol + offset. The value of offset is added to OFFSET,
3810 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3811 part is missing. DEPENDS_ON is a set of the invariants the computation
3815 ptr_difference_cost (struct ivopts_data
*data
,
3816 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3817 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3819 HOST_WIDE_INT diff
= 0;
3820 aff_tree aff_e1
, aff_e2
;
3823 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3825 if (ptr_difference_const (e1
, e2
, &diff
))
3828 *symbol_present
= false;
3829 *var_present
= false;
3833 if (integer_zerop (e2
))
3834 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3835 symbol_present
, var_present
, offset
, depends_on
);
3837 *symbol_present
= false;
3838 *var_present
= true;
3840 type
= signed_type_for (TREE_TYPE (e1
));
3841 tree_to_aff_combination (e1
, type
, &aff_e1
);
3842 tree_to_aff_combination (e2
, type
, &aff_e2
);
3843 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3844 aff_combination_add (&aff_e1
, &aff_e2
);
3846 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3849 /* Estimates cost of expressing difference E1 - E2 as
3850 var + symbol + offset. The value of offset is added to OFFSET,
3851 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3852 part is missing. DEPENDS_ON is a set of the invariants the computation
3856 difference_cost (struct ivopts_data
*data
,
3857 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3858 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3860 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3861 unsigned HOST_WIDE_INT off1
, off2
;
3862 aff_tree aff_e1
, aff_e2
;
3865 e1
= strip_offset (e1
, &off1
);
3866 e2
= strip_offset (e2
, &off2
);
3867 *offset
+= off1
- off2
;
3872 if (TREE_CODE (e1
) == ADDR_EXPR
)
3873 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3874 offset
, depends_on
);
3875 *symbol_present
= false;
3877 if (operand_equal_p (e1
, e2
, 0))
3879 *var_present
= false;
3883 *var_present
= true;
3885 if (integer_zerop (e2
))
3886 return force_var_cost (data
, e1
, depends_on
);
3888 if (integer_zerop (e1
))
3890 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3891 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3895 type
= signed_type_for (TREE_TYPE (e1
));
3896 tree_to_aff_combination (e1
, type
, &aff_e1
);
3897 tree_to_aff_combination (e2
, type
, &aff_e2
);
3898 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3899 aff_combination_add (&aff_e1
, &aff_e2
);
3901 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3904 /* Returns true if AFF1 and AFF2 are identical. */
3907 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3911 if (aff1
->n
!= aff2
->n
)
3914 for (i
= 0; i
< aff1
->n
; i
++)
3916 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3919 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3925 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3928 get_expr_id (struct ivopts_data
*data
, tree expr
)
3930 struct iv_inv_expr_ent ent
;
3931 struct iv_inv_expr_ent
**slot
;
3934 ent
.hash
= iterative_hash_expr (expr
, 0);
3935 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3939 *slot
= XNEW (struct iv_inv_expr_ent
);
3940 (*slot
)->expr
= expr
;
3941 (*slot
)->hash
= ent
.hash
;
3942 (*slot
)->id
= data
->inv_expr_id
++;
3946 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3947 requires a new compiler generated temporary. Returns -1 otherwise.
3948 ADDRESS_P is a flag indicating if the expression is for address
3952 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3953 tree cbase
, HOST_WIDE_INT ratio
,
3956 aff_tree ubase_aff
, cbase_aff
;
3964 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3965 && (TREE_CODE (cbase
) == INTEGER_CST
))
3968 /* Strips the constant part. */
3969 if (TREE_CODE (ubase
) == PLUS_EXPR
3970 || TREE_CODE (ubase
) == MINUS_EXPR
3971 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3973 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3974 ubase
= TREE_OPERAND (ubase
, 0);
3977 /* Strips the constant part. */
3978 if (TREE_CODE (cbase
) == PLUS_EXPR
3979 || TREE_CODE (cbase
) == MINUS_EXPR
3980 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3982 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3983 cbase
= TREE_OPERAND (cbase
, 0);
3988 if (((TREE_CODE (ubase
) == SSA_NAME
)
3989 || (TREE_CODE (ubase
) == ADDR_EXPR
3990 && is_gimple_min_invariant (ubase
)))
3991 && (TREE_CODE (cbase
) == INTEGER_CST
))
3994 if (((TREE_CODE (cbase
) == SSA_NAME
)
3995 || (TREE_CODE (cbase
) == ADDR_EXPR
3996 && is_gimple_min_invariant (cbase
)))
3997 && (TREE_CODE (ubase
) == INTEGER_CST
))
4003 if (operand_equal_p (ubase
, cbase
, 0))
4006 if (TREE_CODE (ubase
) == ADDR_EXPR
4007 && TREE_CODE (cbase
) == ADDR_EXPR
)
4011 usym
= TREE_OPERAND (ubase
, 0);
4012 csym
= TREE_OPERAND (cbase
, 0);
4013 if (TREE_CODE (usym
) == ARRAY_REF
)
4015 tree ind
= TREE_OPERAND (usym
, 1);
4016 if (TREE_CODE (ind
) == INTEGER_CST
4017 && tree_fits_shwi_p (ind
)
4018 && tree_to_shwi (ind
) == 0)
4019 usym
= TREE_OPERAND (usym
, 0);
4021 if (TREE_CODE (csym
) == ARRAY_REF
)
4023 tree ind
= TREE_OPERAND (csym
, 1);
4024 if (TREE_CODE (ind
) == INTEGER_CST
4025 && tree_fits_shwi_p (ind
)
4026 && tree_to_shwi (ind
) == 0)
4027 csym
= TREE_OPERAND (csym
, 0);
4029 if (operand_equal_p (usym
, csym
, 0))
4032 /* Now do more complex comparison */
4033 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4034 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4035 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4039 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4040 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4042 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
4043 aff_combination_add (&ubase_aff
, &cbase_aff
);
4044 expr
= aff_combination_to_tree (&ubase_aff
);
4045 return get_expr_id (data
, expr
);
4050 /* Determines the cost of the computation by that USE is expressed
4051 from induction variable CAND. If ADDRESS_P is true, we just need
4052 to create an address from it, otherwise we want to get it into
4053 register. A set of invariants we depend on is stored in
4054 DEPENDS_ON. AT is the statement at that the value is computed.
4055 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4056 addressing is likely. */
4059 get_computation_cost_at (struct ivopts_data
*data
,
4060 struct iv_use
*use
, struct iv_cand
*cand
,
4061 bool address_p
, bitmap
*depends_on
, gimple at
,
4065 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4067 tree utype
= TREE_TYPE (ubase
), ctype
;
4068 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4069 HOST_WIDE_INT ratio
, aratio
;
4070 bool var_present
, symbol_present
, stmt_is_after_inc
;
4073 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4074 enum machine_mode mem_mode
= (address_p
4075 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4080 /* Only consider real candidates. */
4082 return infinite_cost
;
4084 cbase
= cand
->iv
->base
;
4085 cstep
= cand
->iv
->step
;
4086 ctype
= TREE_TYPE (cbase
);
4088 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4090 /* We do not have a precision to express the values of use. */
4091 return infinite_cost
;
4095 || (use
->iv
->base_object
4096 && cand
->iv
->base_object
4097 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4098 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4100 /* Do not try to express address of an object with computation based
4101 on address of a different object. This may cause problems in rtl
4102 level alias analysis (that does not expect this to be happening,
4103 as this is illegal in C), and would be unlikely to be useful
4105 if (use
->iv
->base_object
4106 && cand
->iv
->base_object
4107 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4108 return infinite_cost
;
4111 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4113 /* TODO -- add direct handling of this case. */
4117 /* CSTEPI is removed from the offset in case statement is after the
4118 increment. If the step is not constant, we use zero instead.
4119 This is a bit imprecise (there is the extra addition), but
4120 redundancy elimination is likely to transform the code so that
4121 it uses value of the variable before increment anyway,
4122 so it is not that much unrealistic. */
4123 if (cst_and_fits_in_hwi (cstep
))
4124 cstepi
= int_cst_value (cstep
);
4128 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4129 return infinite_cost
;
4131 if (rat
.fits_shwi ())
4132 ratio
= rat
.to_shwi ();
4134 return infinite_cost
;
4137 ctype
= TREE_TYPE (cbase
);
4139 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4141 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4142 or ratio == 1, it is better to handle this like
4144 ubase - ratio * cbase + ratio * var
4146 (also holds in the case ratio == -1, TODO. */
4148 if (cst_and_fits_in_hwi (cbase
))
4150 offset
= - ratio
* int_cst_value (cbase
);
4151 cost
= difference_cost (data
,
4152 ubase
, build_int_cst (utype
, 0),
4153 &symbol_present
, &var_present
, &offset
,
4155 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4157 else if (ratio
== 1)
4159 tree real_cbase
= cbase
;
4161 /* Check to see if any adjustment is needed. */
4162 if (cstepi
== 0 && stmt_is_after_inc
)
4164 aff_tree real_cbase_aff
;
4167 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4169 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4171 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4172 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4175 cost
= difference_cost (data
,
4177 &symbol_present
, &var_present
, &offset
,
4179 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4182 && !POINTER_TYPE_P (ctype
)
4183 && multiplier_allowed_in_address_p
4185 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4188 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4189 cost
= difference_cost (data
,
4191 &symbol_present
, &var_present
, &offset
,
4193 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4197 cost
= force_var_cost (data
, cbase
, depends_on
);
4198 cost
= add_costs (cost
,
4199 difference_cost (data
,
4200 ubase
, build_int_cst (utype
, 0),
4201 &symbol_present
, &var_present
,
4202 &offset
, depends_on
));
4203 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4204 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4210 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4211 /* Clear depends on. */
4212 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4213 bitmap_clear (*depends_on
);
4216 /* If we are after the increment, the value of the candidate is higher by
4218 if (stmt_is_after_inc
)
4219 offset
-= ratio
* cstepi
;
4221 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4222 (symbol/var1/const parts may be omitted). If we are looking for an
4223 address, find the cost of addressing this. */
4225 return add_costs (cost
,
4226 get_address_cost (symbol_present
, var_present
,
4227 offset
, ratio
, cstepi
,
4229 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4230 speed
, stmt_is_after_inc
,
4233 /* Otherwise estimate the costs for computing the expression. */
4234 if (!symbol_present
&& !var_present
&& !offset
)
4237 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4241 /* Symbol + offset should be compile-time computable so consider that they
4242 are added once to the variable, if present. */
4243 if (var_present
&& (symbol_present
|| offset
))
4244 cost
.cost
+= adjust_setup_cost (data
,
4245 add_cost (speed
, TYPE_MODE (ctype
)));
4247 /* Having offset does not affect runtime cost in case it is added to
4248 symbol, but it increases complexity. */
4252 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4254 aratio
= ratio
> 0 ? ratio
: -ratio
;
4256 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4261 *can_autoinc
= false;
4264 /* Just get the expression, expand it and measure the cost. */
4265 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4268 return infinite_cost
;
4271 comp
= build_simple_mem_ref (comp
);
4273 return new_cost (computation_cost (comp
, speed
), 0);
4277 /* Determines the cost of the computation by that USE is expressed
4278 from induction variable CAND. If ADDRESS_P is true, we just need
4279 to create an address from it, otherwise we want to get it into
4280 register. A set of invariants we depend on is stored in
4281 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4282 autoinc addressing is likely. */
4285 get_computation_cost (struct ivopts_data
*data
,
4286 struct iv_use
*use
, struct iv_cand
*cand
,
4287 bool address_p
, bitmap
*depends_on
,
4288 bool *can_autoinc
, int *inv_expr_id
)
4290 return get_computation_cost_at (data
,
4291 use
, cand
, address_p
, depends_on
, use
->stmt
,
4292 can_autoinc
, inv_expr_id
);
4295 /* Determines cost of basing replacement of USE on CAND in a generic
4299 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4300 struct iv_use
*use
, struct iv_cand
*cand
)
4304 int inv_expr_id
= -1;
4306 /* The simple case first -- if we need to express value of the preserved
4307 original biv, the cost is 0. This also prevents us from counting the
4308 cost of increment twice -- once at this use and once in the cost of
4310 if (cand
->pos
== IP_ORIGINAL
4311 && cand
->incremented_at
== use
->stmt
)
4313 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4318 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4319 NULL
, &inv_expr_id
);
4321 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4324 return !infinite_cost_p (cost
);
4327 /* Determines cost of basing replacement of USE on CAND in an address. */
4330 determine_use_iv_cost_address (struct ivopts_data
*data
,
4331 struct iv_use
*use
, struct iv_cand
*cand
)
4335 int inv_expr_id
= -1;
4336 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4337 &can_autoinc
, &inv_expr_id
);
4339 if (cand
->ainc_use
== use
)
4342 cost
.cost
-= cand
->cost_step
;
4343 /* If we generated the candidate solely for exploiting autoincrement
4344 opportunities, and it turns out it can't be used, set the cost to
4345 infinity to make sure we ignore it. */
4346 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4347 cost
= infinite_cost
;
4349 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4352 return !infinite_cost_p (cost
);
4355 /* Computes value of candidate CAND at position AT in iteration NITER, and
4356 stores it to VAL. */
4359 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4362 aff_tree step
, delta
, nit
;
4363 struct iv
*iv
= cand
->iv
;
4364 tree type
= TREE_TYPE (iv
->base
);
4365 tree steptype
= type
;
4366 if (POINTER_TYPE_P (type
))
4367 steptype
= sizetype
;
4368 steptype
= unsigned_type_for (type
);
4370 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4371 aff_combination_convert (&step
, steptype
);
4372 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4373 aff_combination_convert (&nit
, steptype
);
4374 aff_combination_mult (&nit
, &step
, &delta
);
4375 if (stmt_after_increment (loop
, cand
, at
))
4376 aff_combination_add (&delta
, &step
);
4378 tree_to_aff_combination (iv
->base
, type
, val
);
4379 if (!POINTER_TYPE_P (type
))
4380 aff_combination_convert (val
, steptype
);
4381 aff_combination_add (val
, &delta
);
4384 /* Returns period of induction variable iv. */
4387 iv_period (struct iv
*iv
)
4389 tree step
= iv
->step
, period
, type
;
4392 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4394 type
= unsigned_type_for (TREE_TYPE (step
));
4395 /* Period of the iv is lcm (step, type_range)/step -1,
4396 i.e., N*type_range/step - 1. Since type range is power
4397 of two, N == (step >> num_of_ending_zeros_binary (step),
4398 so the final result is
4400 (type_range >> num_of_ending_zeros_binary (step)) - 1
4403 pow2div
= num_ending_zeros (step
);
4405 period
= build_low_bits_mask (type
,
4406 (TYPE_PRECISION (type
)
4407 - tree_to_uhwi (pow2div
)));
4412 /* Returns the comparison operator used when eliminating the iv USE. */
4414 static enum tree_code
4415 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4417 struct loop
*loop
= data
->current_loop
;
4421 ex_bb
= gimple_bb (use
->stmt
);
4422 exit
= EDGE_SUCC (ex_bb
, 0);
4423 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4424 exit
= EDGE_SUCC (ex_bb
, 1);
4426 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4430 strip_wrap_conserving_type_conversions (tree exp
)
4432 while (tree_ssa_useless_type_conversion (exp
)
4433 && (nowrap_type_p (TREE_TYPE (exp
))
4434 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4435 exp
= TREE_OPERAND (exp
, 0);
4439 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4440 check for an exact match. */
4443 expr_equal_p (tree e
, tree what
)
4446 enum tree_code code
;
4448 e
= strip_wrap_conserving_type_conversions (e
);
4449 what
= strip_wrap_conserving_type_conversions (what
);
4451 code
= TREE_CODE (what
);
4452 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4455 if (operand_equal_p (e
, what
, 0))
4458 if (TREE_CODE (e
) != SSA_NAME
)
4461 stmt
= SSA_NAME_DEF_STMT (e
);
4462 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4463 || gimple_assign_rhs_code (stmt
) != code
)
4466 switch (get_gimple_rhs_class (code
))
4468 case GIMPLE_BINARY_RHS
:
4469 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4473 case GIMPLE_UNARY_RHS
:
4474 case GIMPLE_SINGLE_RHS
:
4475 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4481 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4482 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4483 calculation is performed in non-wrapping type.
4485 TODO: More generally, we could test for the situation that
4486 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4487 This would require knowing the sign of OFFSET.
4489 Also, we only look for the first addition in the computation of BASE.
4490 More complex analysis would be better, but introducing it just for
4491 this optimization seems like an overkill. */
4494 difference_cannot_overflow_p (tree base
, tree offset
)
4496 enum tree_code code
;
4499 if (!nowrap_type_p (TREE_TYPE (base
)))
4502 base
= expand_simple_operations (base
);
4504 if (TREE_CODE (base
) == SSA_NAME
)
4506 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4508 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4511 code
= gimple_assign_rhs_code (stmt
);
4512 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4515 e1
= gimple_assign_rhs1 (stmt
);
4516 e2
= gimple_assign_rhs2 (stmt
);
4520 code
= TREE_CODE (base
);
4521 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4523 e1
= TREE_OPERAND (base
, 0);
4524 e2
= TREE_OPERAND (base
, 1);
4527 /* TODO: deeper inspection may be necessary to prove the equality. */
4531 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4532 case POINTER_PLUS_EXPR
:
4533 return expr_equal_p (e2
, offset
);
4540 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4541 comparison with CAND. NITER describes the number of iterations of
4542 the loops. If successful, the comparison in COMP_P is altered accordingly.
4544 We aim to handle the following situation:
4560 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4561 We aim to optimize this to
4569 while (p < p_0 - a + b);
4571 This preserves the correctness, since the pointer arithmetics does not
4572 overflow. More precisely:
4574 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4575 overflow in computing it or the values of p.
4576 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4577 overflow. To prove this, we use the fact that p_0 = base + a. */
4580 iv_elimination_compare_lt (struct ivopts_data
*data
,
4581 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4582 struct tree_niter_desc
*niter
)
4584 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4585 struct aff_tree nit
, tmpa
, tmpb
;
4586 enum tree_code comp
;
4589 /* We need to know that the candidate induction variable does not overflow.
4590 While more complex analysis may be used to prove this, for now just
4591 check that the variable appears in the original program and that it
4592 is computed in a type that guarantees no overflows. */
4593 cand_type
= TREE_TYPE (cand
->iv
->base
);
4594 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4597 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4598 the calculation of the BOUND could overflow, making the comparison
4600 if (!data
->loop_single_exit_p
)
4603 /* We need to be able to decide whether candidate is increasing or decreasing
4604 in order to choose the right comparison operator. */
4605 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4607 step
= int_cst_value (cand
->iv
->step
);
4609 /* Check that the number of iterations matches the expected pattern:
4610 a + 1 > b ? 0 : b - a - 1. */
4611 mbz
= niter
->may_be_zero
;
4612 if (TREE_CODE (mbz
) == GT_EXPR
)
4614 /* Handle a + 1 > b. */
4615 tree op0
= TREE_OPERAND (mbz
, 0);
4616 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4618 a
= TREE_OPERAND (op0
, 0);
4619 b
= TREE_OPERAND (mbz
, 1);
4624 else if (TREE_CODE (mbz
) == LT_EXPR
)
4626 tree op1
= TREE_OPERAND (mbz
, 1);
4628 /* Handle b < a + 1. */
4629 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4631 a
= TREE_OPERAND (op1
, 0);
4632 b
= TREE_OPERAND (mbz
, 0);
4640 /* Expected number of iterations is B - A - 1. Check that it matches
4641 the actual number, i.e., that B - A - NITER = 1. */
4642 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4643 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4644 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4645 aff_combination_scale (&nit
, double_int_minus_one
);
4646 aff_combination_scale (&tmpa
, double_int_minus_one
);
4647 aff_combination_add (&tmpb
, &tmpa
);
4648 aff_combination_add (&tmpb
, &nit
);
4649 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4652 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4654 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4656 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4657 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4660 /* Determine the new comparison operator. */
4661 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4662 if (*comp_p
== NE_EXPR
)
4664 else if (*comp_p
== EQ_EXPR
)
4665 *comp_p
= invert_tree_comparison (comp
, false);
4672 /* Check whether it is possible to express the condition in USE by comparison
4673 of candidate CAND. If so, store the value compared with to BOUND, and the
4674 comparison operator to COMP. */
4677 may_eliminate_iv (struct ivopts_data
*data
,
4678 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4679 enum tree_code
*comp
)
4684 struct loop
*loop
= data
->current_loop
;
4686 struct tree_niter_desc
*desc
= NULL
;
4688 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4691 /* For now works only for exits that dominate the loop latch.
4692 TODO: extend to other conditions inside loop body. */
4693 ex_bb
= gimple_bb (use
->stmt
);
4694 if (use
->stmt
!= last_stmt (ex_bb
)
4695 || gimple_code (use
->stmt
) != GIMPLE_COND
4696 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4699 exit
= EDGE_SUCC (ex_bb
, 0);
4700 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4701 exit
= EDGE_SUCC (ex_bb
, 1);
4702 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4705 desc
= niter_for_exit (data
, exit
);
4709 /* Determine whether we can use the variable to test the exit condition.
4710 This is the case iff the period of the induction variable is greater
4711 than the number of iterations for which the exit condition is true. */
4712 period
= iv_period (cand
->iv
);
4714 /* If the number of iterations is constant, compare against it directly. */
4715 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4717 /* See cand_value_at. */
4718 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4720 if (!tree_int_cst_lt (desc
->niter
, period
))
4725 if (tree_int_cst_lt (period
, desc
->niter
))
4730 /* If not, and if this is the only possible exit of the loop, see whether
4731 we can get a conservative estimate on the number of iterations of the
4732 entire loop and compare against that instead. */
4735 double_int period_value
, max_niter
;
4737 max_niter
= desc
->max
;
4738 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4739 max_niter
+= double_int_one
;
4740 period_value
= tree_to_double_int (period
);
4741 if (max_niter
.ugt (period_value
))
4743 /* See if we can take advantage of inferred loop bound information. */
4744 if (data
->loop_single_exit_p
)
4746 if (!max_loop_iterations (loop
, &max_niter
))
4748 /* The loop bound is already adjusted by adding 1. */
4749 if (max_niter
.ugt (period_value
))
4757 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4759 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4760 aff_combination_to_tree (&bnd
));
4761 *comp
= iv_elimination_compare (data
, use
);
4763 /* It is unlikely that computing the number of iterations using division
4764 would be more profitable than keeping the original induction variable. */
4765 if (expression_expensive_p (*bound
))
4768 /* Sometimes, it is possible to handle the situation that the number of
4769 iterations may be zero unless additional assumtions by using <
4770 instead of != in the exit condition.
4772 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4773 base the exit condition on it. However, that is often too
4775 if (!integer_zerop (desc
->may_be_zero
))
4776 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4781 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4782 be copied, if is is used in the loop body and DATA->body_includes_call. */
4785 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4787 tree sbound
= bound
;
4788 STRIP_NOPS (sbound
);
4790 if (TREE_CODE (sbound
) == SSA_NAME
4791 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4792 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4793 && data
->body_includes_call
)
4794 return COSTS_N_INSNS (1);
4799 /* Determines cost of basing replacement of USE on CAND in a condition. */
4802 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4803 struct iv_use
*use
, struct iv_cand
*cand
)
4805 tree bound
= NULL_TREE
;
4807 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4808 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4810 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4811 tree
*control_var
, *bound_cst
;
4812 enum tree_code comp
= ERROR_MARK
;
4814 /* Only consider real candidates. */
4817 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4822 /* Try iv elimination. */
4823 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4825 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4826 if (elim_cost
.cost
== 0)
4827 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4828 else if (TREE_CODE (bound
) == INTEGER_CST
)
4830 /* If we replace a loop condition 'i < n' with 'p < base + n',
4831 depends_on_elim will have 'base' and 'n' set, which implies
4832 that both 'base' and 'n' will be live during the loop. More likely,
4833 'base + n' will be loop invariant, resulting in only one live value
4834 during the loop. So in that case we clear depends_on_elim and set
4835 elim_inv_expr_id instead. */
4836 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4838 elim_inv_expr_id
= get_expr_id (data
, bound
);
4839 bitmap_clear (depends_on_elim
);
4841 /* The bound is a loop invariant, so it will be only computed
4843 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4846 elim_cost
= infinite_cost
;
4848 /* Try expressing the original giv. If it is compared with an invariant,
4849 note that we cannot get rid of it. */
4850 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4854 /* When the condition is a comparison of the candidate IV against
4855 zero, prefer this IV.
4857 TODO: The constant that we're subtracting from the cost should
4858 be target-dependent. This information should be added to the
4859 target costs for each backend. */
4860 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4861 && integer_zerop (*bound_cst
)
4862 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4863 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4864 elim_cost
.cost
-= 1;
4866 express_cost
= get_computation_cost (data
, use
, cand
, false,
4867 &depends_on_express
, NULL
,
4868 &express_inv_expr_id
);
4869 fd_ivopts_data
= data
;
4870 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4872 /* Count the cost of the original bound as well. */
4873 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4874 if (bound_cost
.cost
== 0)
4875 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4876 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4877 bound_cost
.cost
= 0;
4878 express_cost
.cost
+= bound_cost
.cost
;
4880 /* Choose the better approach, preferring the eliminated IV. */
4881 if (compare_costs (elim_cost
, express_cost
) <= 0)
4884 depends_on
= depends_on_elim
;
4885 depends_on_elim
= NULL
;
4886 inv_expr_id
= elim_inv_expr_id
;
4890 cost
= express_cost
;
4891 depends_on
= depends_on_express
;
4892 depends_on_express
= NULL
;
4895 inv_expr_id
= express_inv_expr_id
;
4898 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4900 if (depends_on_elim
)
4901 BITMAP_FREE (depends_on_elim
);
4902 if (depends_on_express
)
4903 BITMAP_FREE (depends_on_express
);
4905 return !infinite_cost_p (cost
);
4908 /* Determines cost of basing replacement of USE on CAND. Returns false
4909 if USE cannot be based on CAND. */
4912 determine_use_iv_cost (struct ivopts_data
*data
,
4913 struct iv_use
*use
, struct iv_cand
*cand
)
4917 case USE_NONLINEAR_EXPR
:
4918 return determine_use_iv_cost_generic (data
, use
, cand
);
4921 return determine_use_iv_cost_address (data
, use
, cand
);
4924 return determine_use_iv_cost_condition (data
, use
, cand
);
4931 /* Return true if get_computation_cost indicates that autoincrement is
4932 a possibility for the pair of USE and CAND, false otherwise. */
4935 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4936 struct iv_cand
*cand
)
4942 if (use
->type
!= USE_ADDRESS
)
4945 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4946 &can_autoinc
, NULL
);
4948 BITMAP_FREE (depends_on
);
4950 return !infinite_cost_p (cost
) && can_autoinc
;
4953 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4954 use that allows autoincrement, and set their AINC_USE if possible. */
4957 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4961 for (i
= 0; i
< n_iv_cands (data
); i
++)
4963 struct iv_cand
*cand
= iv_cand (data
, i
);
4964 struct iv_use
*closest_before
= NULL
;
4965 struct iv_use
*closest_after
= NULL
;
4966 if (cand
->pos
!= IP_ORIGINAL
)
4969 for (j
= 0; j
< n_iv_uses (data
); j
++)
4971 struct iv_use
*use
= iv_use (data
, j
);
4972 unsigned uid
= gimple_uid (use
->stmt
);
4974 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4977 if (uid
< gimple_uid (cand
->incremented_at
)
4978 && (closest_before
== NULL
4979 || uid
> gimple_uid (closest_before
->stmt
)))
4980 closest_before
= use
;
4982 if (uid
> gimple_uid (cand
->incremented_at
)
4983 && (closest_after
== NULL
4984 || uid
< gimple_uid (closest_after
->stmt
)))
4985 closest_after
= use
;
4988 if (closest_before
!= NULL
4989 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4990 cand
->ainc_use
= closest_before
;
4991 else if (closest_after
!= NULL
4992 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4993 cand
->ainc_use
= closest_after
;
4997 /* Finds the candidates for the induction variables. */
5000 find_iv_candidates (struct ivopts_data
*data
)
5002 /* Add commonly used ivs. */
5003 add_standard_iv_candidates (data
);
5005 /* Add old induction variables. */
5006 add_old_ivs_candidates (data
);
5008 /* Add induction variables derived from uses. */
5009 add_derived_ivs_candidates (data
);
5011 set_autoinc_for_original_candidates (data
);
5013 /* Record the important candidates. */
5014 record_important_candidates (data
);
5017 /* Determines costs of basing the use of the iv on an iv candidate. */
5020 determine_use_iv_costs (struct ivopts_data
*data
)
5024 struct iv_cand
*cand
;
5025 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5027 alloc_use_cost_map (data
);
5029 for (i
= 0; i
< n_iv_uses (data
); i
++)
5031 use
= iv_use (data
, i
);
5033 if (data
->consider_all_candidates
)
5035 for (j
= 0; j
< n_iv_cands (data
); j
++)
5037 cand
= iv_cand (data
, j
);
5038 determine_use_iv_cost (data
, use
, cand
);
5045 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5047 cand
= iv_cand (data
, j
);
5048 if (!determine_use_iv_cost (data
, use
, cand
))
5049 bitmap_set_bit (to_clear
, j
);
5052 /* Remove the candidates for that the cost is infinite from
5053 the list of related candidates. */
5054 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5055 bitmap_clear (to_clear
);
5059 BITMAP_FREE (to_clear
);
5061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5063 fprintf (dump_file
, "Use-candidate costs:\n");
5065 for (i
= 0; i
< n_iv_uses (data
); i
++)
5067 use
= iv_use (data
, i
);
5069 fprintf (dump_file
, "Use %d:\n", i
);
5070 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5071 for (j
= 0; j
< use
->n_map_members
; j
++)
5073 if (!use
->cost_map
[j
].cand
5074 || infinite_cost_p (use
->cost_map
[j
].cost
))
5077 fprintf (dump_file
, " %d\t%d\t%d\t",
5078 use
->cost_map
[j
].cand
->id
,
5079 use
->cost_map
[j
].cost
.cost
,
5080 use
->cost_map
[j
].cost
.complexity
);
5081 if (use
->cost_map
[j
].depends_on
)
5082 bitmap_print (dump_file
,
5083 use
->cost_map
[j
].depends_on
, "","");
5084 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5085 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5086 fprintf (dump_file
, "\n");
5089 fprintf (dump_file
, "\n");
5091 fprintf (dump_file
, "\n");
5095 /* Determines cost of the candidate CAND. */
5098 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5100 comp_cost cost_base
;
5101 unsigned cost
, cost_step
;
5110 /* There are two costs associated with the candidate -- its increment
5111 and its initialization. The second is almost negligible for any loop
5112 that rolls enough, so we take it just very little into account. */
5114 base
= cand
->iv
->base
;
5115 cost_base
= force_var_cost (data
, base
, NULL
);
5116 /* It will be exceptional that the iv register happens to be initialized with
5117 the proper value at no cost. In general, there will at least be a regcopy
5119 if (cost_base
.cost
== 0)
5120 cost_base
.cost
= COSTS_N_INSNS (1);
5121 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5123 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5125 /* Prefer the original ivs unless we may gain something by replacing it.
5126 The reason is to make debugging simpler; so this is not relevant for
5127 artificial ivs created by other optimization passes. */
5128 if (cand
->pos
!= IP_ORIGINAL
5129 || !SSA_NAME_VAR (cand
->var_before
)
5130 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5133 /* Prefer not to insert statements into latch unless there are some
5134 already (so that we do not create unnecessary jumps). */
5135 if (cand
->pos
== IP_END
5136 && empty_block_p (ip_end_pos (data
->current_loop
)))
5140 cand
->cost_step
= cost_step
;
5143 /* Determines costs of computation of the candidates. */
5146 determine_iv_costs (struct ivopts_data
*data
)
5150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5152 fprintf (dump_file
, "Candidate costs:\n");
5153 fprintf (dump_file
, " cand\tcost\n");
5156 for (i
= 0; i
< n_iv_cands (data
); i
++)
5158 struct iv_cand
*cand
= iv_cand (data
, i
);
5160 determine_iv_cost (data
, cand
);
5162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5163 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5166 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5167 fprintf (dump_file
, "\n");
5170 /* Calculates cost for having SIZE induction variables. */
5173 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5175 /* We add size to the cost, so that we prefer eliminating ivs
5177 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5178 data
->body_includes_call
);
5181 /* For each size of the induction variable set determine the penalty. */
5184 determine_set_costs (struct ivopts_data
*data
)
5188 gimple_stmt_iterator psi
;
5190 struct loop
*loop
= data
->current_loop
;
5193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5195 fprintf (dump_file
, "Global costs:\n");
5196 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5197 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5198 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5199 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5203 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5205 phi
= gsi_stmt (psi
);
5206 op
= PHI_RESULT (phi
);
5208 if (virtual_operand_p (op
))
5211 if (get_iv (data
, op
))
5217 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5219 struct version_info
*info
= ver_info (data
, j
);
5221 if (info
->inv_id
&& info
->has_nonlin_use
)
5225 data
->regs_used
= n
;
5226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5227 fprintf (dump_file
, " regs_used %d\n", n
);
5229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5231 fprintf (dump_file
, " cost for size:\n");
5232 fprintf (dump_file
, " ivs\tcost\n");
5233 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5234 fprintf (dump_file
, " %d\t%d\n", j
,
5235 ivopts_global_cost_for_size (data
, j
));
5236 fprintf (dump_file
, "\n");
5240 /* Returns true if A is a cheaper cost pair than B. */
5243 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5253 cmp
= compare_costs (a
->cost
, b
->cost
);
5260 /* In case the costs are the same, prefer the cheaper candidate. */
5261 if (a
->cand
->cost
< b
->cand
->cost
)
5268 /* Returns candidate by that USE is expressed in IVS. */
5270 static struct cost_pair
*
5271 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5273 return ivs
->cand_for_use
[use
->id
];
5276 /* Computes the cost field of IVS structure. */
5279 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5281 comp_cost cost
= ivs
->cand_use_cost
;
5283 cost
.cost
+= ivs
->cand_cost
;
5285 cost
.cost
+= ivopts_global_cost_for_size (data
,
5286 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5291 /* Remove invariants in set INVS to set IVS. */
5294 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5302 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5304 ivs
->n_invariant_uses
[iid
]--;
5305 if (ivs
->n_invariant_uses
[iid
] == 0)
5310 /* Set USE not to be expressed by any candidate in IVS. */
5313 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5316 unsigned uid
= use
->id
, cid
;
5317 struct cost_pair
*cp
;
5319 cp
= ivs
->cand_for_use
[uid
];
5325 ivs
->cand_for_use
[uid
] = NULL
;
5326 ivs
->n_cand_uses
[cid
]--;
5328 if (ivs
->n_cand_uses
[cid
] == 0)
5330 bitmap_clear_bit (ivs
->cands
, cid
);
5331 /* Do not count the pseudocandidates. */
5335 ivs
->cand_cost
-= cp
->cand
->cost
;
5337 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5340 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5342 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5344 if (cp
->inv_expr_id
!= -1)
5346 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5347 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5348 ivs
->num_used_inv_expr
--;
5350 iv_ca_recount_cost (data
, ivs
);
5353 /* Add invariants in set INVS to set IVS. */
5356 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5364 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5366 ivs
->n_invariant_uses
[iid
]++;
5367 if (ivs
->n_invariant_uses
[iid
] == 1)
5372 /* Set cost pair for USE in set IVS to CP. */
5375 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5376 struct iv_use
*use
, struct cost_pair
*cp
)
5378 unsigned uid
= use
->id
, cid
;
5380 if (ivs
->cand_for_use
[uid
] == cp
)
5383 if (ivs
->cand_for_use
[uid
])
5384 iv_ca_set_no_cp (data
, ivs
, use
);
5391 ivs
->cand_for_use
[uid
] = cp
;
5392 ivs
->n_cand_uses
[cid
]++;
5393 if (ivs
->n_cand_uses
[cid
] == 1)
5395 bitmap_set_bit (ivs
->cands
, cid
);
5396 /* Do not count the pseudocandidates. */
5400 ivs
->cand_cost
+= cp
->cand
->cost
;
5402 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5405 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5406 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5408 if (cp
->inv_expr_id
!= -1)
5410 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5411 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5412 ivs
->num_used_inv_expr
++;
5414 iv_ca_recount_cost (data
, ivs
);
5418 /* Extend set IVS by expressing USE by some of the candidates in it
5419 if possible. All important candidates will be considered
5420 if IMPORTANT_CANDIDATES is true. */
5423 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5424 struct iv_use
*use
, bool important_candidates
)
5426 struct cost_pair
*best_cp
= NULL
, *cp
;
5431 gcc_assert (ivs
->upto
>= use
->id
);
5433 if (ivs
->upto
== use
->id
)
5439 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5440 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5442 struct iv_cand
*cand
= iv_cand (data
, i
);
5444 cp
= get_use_iv_cost (data
, use
, cand
);
5446 if (cheaper_cost_pair (cp
, best_cp
))
5450 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5453 /* Get cost for assignment IVS. */
5456 iv_ca_cost (struct iv_ca
*ivs
)
5458 /* This was a conditional expression but it triggered a bug in
5461 return infinite_cost
;
5466 /* Returns true if all dependences of CP are among invariants in IVS. */
5469 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5474 if (!cp
->depends_on
)
5477 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5479 if (ivs
->n_invariant_uses
[i
] == 0)
5486 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5487 it before NEXT_CHANGE. */
5489 static struct iv_ca_delta
*
5490 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5491 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5493 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5496 change
->old_cp
= old_cp
;
5497 change
->new_cp
= new_cp
;
5498 change
->next_change
= next_change
;
5503 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5506 static struct iv_ca_delta
*
5507 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5509 struct iv_ca_delta
*last
;
5517 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5519 last
->next_change
= l2
;
5524 /* Reverse the list of changes DELTA, forming the inverse to it. */
5526 static struct iv_ca_delta
*
5527 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5529 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5530 struct cost_pair
*tmp
;
5532 for (act
= delta
; act
; act
= next
)
5534 next
= act
->next_change
;
5535 act
->next_change
= prev
;
5539 act
->old_cp
= act
->new_cp
;
5546 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5547 reverted instead. */
5550 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5551 struct iv_ca_delta
*delta
, bool forward
)
5553 struct cost_pair
*from
, *to
;
5554 struct iv_ca_delta
*act
;
5557 delta
= iv_ca_delta_reverse (delta
);
5559 for (act
= delta
; act
; act
= act
->next_change
)
5563 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5564 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5568 iv_ca_delta_reverse (delta
);
5571 /* Returns true if CAND is used in IVS. */
5574 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5576 return ivs
->n_cand_uses
[cand
->id
] > 0;
5579 /* Returns number of induction variable candidates in the set IVS. */
5582 iv_ca_n_cands (struct iv_ca
*ivs
)
5584 return ivs
->n_cands
;
5587 /* Free the list of changes DELTA. */
5590 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5592 struct iv_ca_delta
*act
, *next
;
5594 for (act
= *delta
; act
; act
= next
)
5596 next
= act
->next_change
;
5603 /* Allocates new iv candidates assignment. */
5605 static struct iv_ca
*
5606 iv_ca_new (struct ivopts_data
*data
)
5608 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5612 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5613 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5614 nw
->cands
= BITMAP_ALLOC (NULL
);
5617 nw
->cand_use_cost
= no_cost
;
5619 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5621 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5622 nw
->num_used_inv_expr
= 0;
5627 /* Free memory occupied by the set IVS. */
5630 iv_ca_free (struct iv_ca
**ivs
)
5632 free ((*ivs
)->cand_for_use
);
5633 free ((*ivs
)->n_cand_uses
);
5634 BITMAP_FREE ((*ivs
)->cands
);
5635 free ((*ivs
)->n_invariant_uses
);
5636 free ((*ivs
)->used_inv_expr
);
5641 /* Dumps IVS to FILE. */
5644 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5646 const char *pref
= " invariants ";
5648 comp_cost cost
= iv_ca_cost (ivs
);
5650 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5651 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5652 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5653 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5655 for (i
= 0; i
< ivs
->upto
; i
++)
5657 struct iv_use
*use
= iv_use (data
, i
);
5658 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5660 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5661 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5663 fprintf (file
, " use:%d --> ??\n", use
->id
);
5666 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5667 if (ivs
->n_invariant_uses
[i
])
5669 fprintf (file
, "%s%d", pref
, i
);
5672 fprintf (file
, "\n\n");
5675 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5676 new set, and store differences in DELTA. Number of induction variables
5677 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5678 the function will try to find a solution with mimimal iv candidates. */
5681 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5682 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5683 unsigned *n_ivs
, bool min_ncand
)
5688 struct cost_pair
*old_cp
, *new_cp
;
5691 for (i
= 0; i
< ivs
->upto
; i
++)
5693 use
= iv_use (data
, i
);
5694 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5697 && old_cp
->cand
== cand
)
5700 new_cp
= get_use_iv_cost (data
, use
, cand
);
5704 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5707 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5710 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5713 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5714 cost
= iv_ca_cost (ivs
);
5716 *n_ivs
= iv_ca_n_cands (ivs
);
5717 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5722 /* Try narrowing set IVS by removing CAND. Return the cost of
5723 the new set and store the differences in DELTA. START is
5724 the candidate with which we start narrowing. */
5727 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5728 struct iv_cand
*cand
, struct iv_cand
*start
,
5729 struct iv_ca_delta
**delta
)
5733 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5735 struct iv_cand
*cnd
;
5736 comp_cost cost
, best_cost
, acost
;
5739 for (i
= 0; i
< n_iv_uses (data
); i
++)
5741 use
= iv_use (data
, i
);
5743 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5744 if (old_cp
->cand
!= cand
)
5747 best_cost
= iv_ca_cost (ivs
);
5748 /* Start narrowing with START. */
5749 new_cp
= get_use_iv_cost (data
, use
, start
);
5751 if (data
->consider_all_candidates
)
5753 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5755 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5758 cnd
= iv_cand (data
, ci
);
5760 cp
= get_use_iv_cost (data
, use
, cnd
);
5764 iv_ca_set_cp (data
, ivs
, use
, cp
);
5765 acost
= iv_ca_cost (ivs
);
5767 if (compare_costs (acost
, best_cost
) < 0)
5776 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5778 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5781 cnd
= iv_cand (data
, ci
);
5783 cp
= get_use_iv_cost (data
, use
, cnd
);
5787 iv_ca_set_cp (data
, ivs
, use
, cp
);
5788 acost
= iv_ca_cost (ivs
);
5790 if (compare_costs (acost
, best_cost
) < 0)
5797 /* Restore to old cp for use. */
5798 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5802 iv_ca_delta_free (delta
);
5803 return infinite_cost
;
5806 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5809 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5810 cost
= iv_ca_cost (ivs
);
5811 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5816 /* Try optimizing the set of candidates IVS by removing candidates different
5817 from to EXCEPT_CAND from it. Return cost of the new set, and store
5818 differences in DELTA. */
5821 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5822 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5825 struct iv_ca_delta
*act_delta
, *best_delta
;
5827 comp_cost best_cost
, acost
;
5828 struct iv_cand
*cand
;
5831 best_cost
= iv_ca_cost (ivs
);
5833 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5835 cand
= iv_cand (data
, i
);
5837 if (cand
== except_cand
)
5840 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5842 if (compare_costs (acost
, best_cost
) < 0)
5845 iv_ca_delta_free (&best_delta
);
5846 best_delta
= act_delta
;
5849 iv_ca_delta_free (&act_delta
);
5858 /* Recurse to possibly remove other unnecessary ivs. */
5859 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5860 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5861 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5862 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5866 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5867 cheaper local cost for USE than BEST_CP. Return pointer to
5868 the corresponding cost_pair, otherwise just return BEST_CP. */
5870 static struct cost_pair
*
5871 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
5872 unsigned int cand_idx
, struct iv_cand
*old_cand
,
5873 struct cost_pair
*best_cp
)
5875 struct iv_cand
*cand
;
5876 struct cost_pair
*cp
;
5878 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
5879 if (cand_idx
== old_cand
->id
)
5882 cand
= iv_cand (data
, cand_idx
);
5883 cp
= get_use_iv_cost (data
, use
, cand
);
5884 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
5890 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5891 which are used by more than one iv uses. For each of those candidates,
5892 this function tries to represent iv uses under that candidate using
5893 other ones with lower local cost, then tries to prune the new set.
5894 If the new set has lower cost, It returns the new cost after recording
5895 candidate replacement in list DELTA. */
5898 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5899 struct iv_ca_delta
**delta
)
5901 bitmap_iterator bi
, bj
;
5902 unsigned int i
, j
, k
;
5904 struct iv_cand
*cand
;
5905 comp_cost orig_cost
, acost
;
5906 struct iv_ca_delta
*act_delta
, *tmp_delta
;
5907 struct cost_pair
*old_cp
, *best_cp
= NULL
;
5910 orig_cost
= iv_ca_cost (ivs
);
5912 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5914 if (ivs
->n_cand_uses
[i
] == 1
5915 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
5918 cand
= iv_cand (data
, i
);
5921 /* Represent uses under current candidate using other ones with
5922 lower local cost. */
5923 for (j
= 0; j
< ivs
->upto
; j
++)
5925 use
= iv_use (data
, j
);
5926 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5928 if (old_cp
->cand
!= cand
)
5932 if (data
->consider_all_candidates
)
5933 for (k
= 0; k
< n_iv_cands (data
); k
++)
5934 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5935 old_cp
->cand
, best_cp
);
5937 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
5938 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5939 old_cp
->cand
, best_cp
);
5941 if (best_cp
== old_cp
)
5944 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
5946 /* No need for further prune. */
5950 /* Prune the new candidate set. */
5951 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5952 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
5953 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5954 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5956 if (compare_costs (acost
, orig_cost
) < 0)
5962 iv_ca_delta_free (&act_delta
);
5968 /* Tries to extend the sets IVS in the best possible way in order
5969 to express the USE. If ORIGINALP is true, prefer candidates from
5970 the original set of IVs, otherwise favor important candidates not
5971 based on any memory object. */
5974 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5975 struct iv_use
*use
, bool originalp
)
5977 comp_cost best_cost
, act_cost
;
5980 struct iv_cand
*cand
;
5981 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5982 struct cost_pair
*cp
;
5984 iv_ca_add_use (data
, ivs
, use
, false);
5985 best_cost
= iv_ca_cost (ivs
);
5987 cp
= iv_ca_cand_for_use (ivs
, use
);
5992 iv_ca_add_use (data
, ivs
, use
, true);
5993 best_cost
= iv_ca_cost (ivs
);
5994 cp
= iv_ca_cand_for_use (ivs
, use
);
5998 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5999 iv_ca_set_no_cp (data
, ivs
, use
);
6002 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6003 first try important candidates not based on any memory object. Only if
6004 this fails, try the specific ones. Rationale -- in loops with many
6005 variables the best choice often is to use just one generic biv. If we
6006 added here many ivs specific to the uses, the optimization algorithm later
6007 would be likely to get stuck in a local minimum, thus causing us to create
6008 too many ivs. The approach from few ivs to more seems more likely to be
6009 successful -- starting from few ivs, replacing an expensive use by a
6010 specific iv should always be a win. */
6011 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6013 cand
= iv_cand (data
, i
);
6015 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6018 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6021 if (iv_ca_cand_used_p (ivs
, cand
))
6024 cp
= get_use_iv_cost (data
, use
, cand
);
6028 iv_ca_set_cp (data
, ivs
, use
, cp
);
6029 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6031 iv_ca_set_no_cp (data
, ivs
, use
);
6032 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6034 if (compare_costs (act_cost
, best_cost
) < 0)
6036 best_cost
= act_cost
;
6038 iv_ca_delta_free (&best_delta
);
6039 best_delta
= act_delta
;
6042 iv_ca_delta_free (&act_delta
);
6045 if (infinite_cost_p (best_cost
))
6047 for (i
= 0; i
< use
->n_map_members
; i
++)
6049 cp
= use
->cost_map
+ i
;
6054 /* Already tried this. */
6055 if (cand
->important
)
6057 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6059 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6063 if (iv_ca_cand_used_p (ivs
, cand
))
6067 iv_ca_set_cp (data
, ivs
, use
, cp
);
6068 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6069 iv_ca_set_no_cp (data
, ivs
, use
);
6070 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6073 if (compare_costs (act_cost
, best_cost
) < 0)
6075 best_cost
= act_cost
;
6078 iv_ca_delta_free (&best_delta
);
6079 best_delta
= act_delta
;
6082 iv_ca_delta_free (&act_delta
);
6086 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6087 iv_ca_delta_free (&best_delta
);
6089 return !infinite_cost_p (best_cost
);
6092 /* Finds an initial assignment of candidates to uses. */
6094 static struct iv_ca
*
6095 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6097 struct iv_ca
*ivs
= iv_ca_new (data
);
6100 for (i
= 0; i
< n_iv_uses (data
); i
++)
6101 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6110 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6111 points to a bool variable, this function tries to break local
6112 optimal fixed-point by replacing candidates in IVS if it's true. */
6115 try_improve_iv_set (struct ivopts_data
*data
,
6116 struct iv_ca
*ivs
, bool *try_replace_p
)
6119 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6120 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6121 struct iv_cand
*cand
;
6123 /* Try extending the set of induction variables by one. */
6124 for (i
= 0; i
< n_iv_cands (data
); i
++)
6126 cand
= iv_cand (data
, i
);
6128 if (iv_ca_cand_used_p (ivs
, cand
))
6131 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6135 /* If we successfully added the candidate and the set is small enough,
6136 try optimizing it by removing other candidates. */
6137 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6139 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6140 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6141 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6142 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6145 if (compare_costs (acost
, best_cost
) < 0)
6148 iv_ca_delta_free (&best_delta
);
6149 best_delta
= act_delta
;
6152 iv_ca_delta_free (&act_delta
);
6157 /* Try removing the candidates from the set instead. */
6158 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6160 if (!best_delta
&& *try_replace_p
)
6162 *try_replace_p
= false;
6163 /* So far candidate selecting algorithm tends to choose fewer IVs
6164 so that it can handle cases in which loops have many variables
6165 but the best choice is often to use only one general biv. One
6166 weakness is it can't handle opposite cases, in which different
6167 candidates should be chosen with respect to each use. To solve
6168 the problem, we replace candidates in a manner described by the
6169 comments of iv_ca_replace, thus give general algorithm a chance
6170 to break local optimal fixed-point in these cases. */
6171 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6178 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6179 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6180 iv_ca_delta_free (&best_delta
);
6184 /* Attempts to find the optimal set of induction variables. We do simple
6185 greedy heuristic -- we try to replace at most one candidate in the selected
6186 solution and remove the unused ivs while this improves the cost. */
6188 static struct iv_ca
*
6189 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6192 bool try_replace_p
= true;
6194 /* Get the initial solution. */
6195 set
= get_initial_solution (data
, originalp
);
6198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6199 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6205 fprintf (dump_file
, "Initial set of candidates:\n");
6206 iv_ca_dump (data
, dump_file
, set
);
6209 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6213 fprintf (dump_file
, "Improved to:\n");
6214 iv_ca_dump (data
, dump_file
, set
);
6221 static struct iv_ca
*
6222 find_optimal_iv_set (struct ivopts_data
*data
)
6225 struct iv_ca
*set
, *origset
;
6227 comp_cost cost
, origcost
;
6229 /* Determine the cost based on a strategy that starts with original IVs,
6230 and try again using a strategy that prefers candidates not based
6232 origset
= find_optimal_iv_set_1 (data
, true);
6233 set
= find_optimal_iv_set_1 (data
, false);
6235 if (!origset
&& !set
)
6238 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6239 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6243 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6244 origcost
.cost
, origcost
.complexity
);
6245 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6246 cost
.cost
, cost
.complexity
);
6249 /* Choose the one with the best cost. */
6250 if (compare_costs (origcost
, cost
) <= 0)
6257 iv_ca_free (&origset
);
6259 for (i
= 0; i
< n_iv_uses (data
); i
++)
6261 use
= iv_use (data
, i
);
6262 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6268 /* Creates a new induction variable corresponding to CAND. */
6271 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6273 gimple_stmt_iterator incr_pos
;
6283 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6287 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6295 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6299 /* Mark that the iv is preserved. */
6300 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6301 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6303 /* Rewrite the increment so that it uses var_before directly. */
6304 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6308 gimple_add_tmp_var (cand
->var_before
);
6310 base
= unshare_expr (cand
->iv
->base
);
6312 create_iv (base
, unshare_expr (cand
->iv
->step
),
6313 cand
->var_before
, data
->current_loop
,
6314 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6317 /* Creates new induction variables described in SET. */
6320 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6323 struct iv_cand
*cand
;
6326 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6328 cand
= iv_cand (data
, i
);
6329 create_new_iv (data
, cand
);
6332 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6334 fprintf (dump_file
, "\nSelected IV set: \n");
6335 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6337 cand
= iv_cand (data
, i
);
6338 dump_cand (dump_file
, cand
);
6340 fprintf (dump_file
, "\n");
6344 /* Rewrites USE (definition of iv used in a nonlinear expression)
6345 using candidate CAND. */
6348 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6349 struct iv_use
*use
, struct iv_cand
*cand
)
6354 gimple_stmt_iterator bsi
;
6356 /* An important special case -- if we are asked to express value of
6357 the original iv by itself, just exit; there is no need to
6358 introduce a new computation (that might also need casting the
6359 variable to unsigned and back). */
6360 if (cand
->pos
== IP_ORIGINAL
6361 && cand
->incremented_at
== use
->stmt
)
6363 enum tree_code stmt_code
;
6365 gcc_assert (is_gimple_assign (use
->stmt
));
6366 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6368 /* Check whether we may leave the computation unchanged.
6369 This is the case only if it does not rely on other
6370 computations in the loop -- otherwise, the computation
6371 we rely upon may be removed in remove_unused_ivs,
6372 thus leading to ICE. */
6373 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6374 if (stmt_code
== PLUS_EXPR
6375 || stmt_code
== MINUS_EXPR
6376 || stmt_code
== POINTER_PLUS_EXPR
)
6378 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6379 op
= gimple_assign_rhs2 (use
->stmt
);
6380 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6381 op
= gimple_assign_rhs1 (use
->stmt
);
6388 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6392 comp
= get_computation (data
->current_loop
, use
, cand
);
6393 gcc_assert (comp
!= NULL_TREE
);
6395 switch (gimple_code (use
->stmt
))
6398 tgt
= PHI_RESULT (use
->stmt
);
6400 /* If we should keep the biv, do not replace it. */
6401 if (name_info (data
, tgt
)->preserve_biv
)
6404 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6408 tgt
= gimple_assign_lhs (use
->stmt
);
6409 bsi
= gsi_for_stmt (use
->stmt
);
6416 if (!valid_gimple_rhs_p (comp
)
6417 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6418 /* We can't allow re-allocating the stmt as it might be pointed
6420 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6421 >= gimple_num_ops (gsi_stmt (bsi
)))))
6423 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6424 true, GSI_SAME_STMT
);
6425 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6427 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6428 /* As this isn't a plain copy we have to reset alignment
6430 if (SSA_NAME_PTR_INFO (comp
))
6431 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6435 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6437 ass
= gimple_build_assign (tgt
, comp
);
6438 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6440 bsi
= gsi_for_stmt (use
->stmt
);
6441 remove_phi_node (&bsi
, false);
6445 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6446 use
->stmt
= gsi_stmt (bsi
);
6450 /* Performs a peephole optimization to reorder the iv update statement with
6451 a mem ref to enable instruction combining in later phases. The mem ref uses
6452 the iv value before the update, so the reordering transformation requires
6453 adjustment of the offset. CAND is the selected IV_CAND.
6457 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6465 directly propagating t over to (1) will introduce overlapping live range
6466 thus increase register pressure. This peephole transform it into:
6470 t = MEM_REF (base, iv2, 8, 8);
6477 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6480 gimple iv_update
, stmt
;
6482 gimple_stmt_iterator gsi
, gsi_iv
;
6484 if (cand
->pos
!= IP_NORMAL
)
6487 var_after
= cand
->var_after
;
6488 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6490 bb
= gimple_bb (iv_update
);
6491 gsi
= gsi_last_nondebug_bb (bb
);
6492 stmt
= gsi_stmt (gsi
);
6494 /* Only handle conditional statement for now. */
6495 if (gimple_code (stmt
) != GIMPLE_COND
)
6498 gsi_prev_nondebug (&gsi
);
6499 stmt
= gsi_stmt (gsi
);
6500 if (stmt
!= iv_update
)
6503 gsi_prev_nondebug (&gsi
);
6504 if (gsi_end_p (gsi
))
6507 stmt
= gsi_stmt (gsi
);
6508 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6511 if (stmt
!= use
->stmt
)
6514 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6517 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6519 fprintf (dump_file
, "Reordering \n");
6520 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6521 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6522 fprintf (dump_file
, "\n");
6525 gsi
= gsi_for_stmt (use
->stmt
);
6526 gsi_iv
= gsi_for_stmt (iv_update
);
6527 gsi_move_before (&gsi_iv
, &gsi
);
6529 cand
->pos
= IP_BEFORE_USE
;
6530 cand
->incremented_at
= use
->stmt
;
6533 /* Rewrites USE (address that is an iv) using candidate CAND. */
6536 rewrite_use_address (struct ivopts_data
*data
,
6537 struct iv_use
*use
, struct iv_cand
*cand
)
6540 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6541 tree base_hint
= NULL_TREE
;
6545 adjust_iv_update_pos (cand
, use
);
6546 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6548 unshare_aff_combination (&aff
);
6550 /* To avoid undefined overflow problems, all IV candidates use unsigned
6551 integer types. The drawback is that this makes it impossible for
6552 create_mem_ref to distinguish an IV that is based on a memory object
6553 from one that represents simply an offset.
6555 To work around this problem, we pass a hint to create_mem_ref that
6556 indicates which variable (if any) in aff is an IV based on a memory
6557 object. Note that we only consider the candidate. If this is not
6558 based on an object, the base of the reference is in some subexpression
6559 of the use -- but these will use pointer types, so they are recognized
6560 by the create_mem_ref heuristics anyway. */
6561 if (cand
->iv
->base_object
)
6562 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6564 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6565 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6566 reference_alias_ptr_type (*use
->op_p
),
6567 iv
, base_hint
, data
->speed
);
6568 copy_ref_info (ref
, *use
->op_p
);
6572 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6576 rewrite_use_compare (struct ivopts_data
*data
,
6577 struct iv_use
*use
, struct iv_cand
*cand
)
6579 tree comp
, *var_p
, op
, bound
;
6580 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6581 enum tree_code compare
;
6582 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6588 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6589 tree var_type
= TREE_TYPE (var
);
6592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6594 fprintf (dump_file
, "Replacing exit test: ");
6595 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6598 bound
= unshare_expr (fold_convert (var_type
, bound
));
6599 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6601 gsi_insert_seq_on_edge_immediate (
6602 loop_preheader_edge (data
->current_loop
),
6605 gimple_cond_set_lhs (use
->stmt
, var
);
6606 gimple_cond_set_code (use
->stmt
, compare
);
6607 gimple_cond_set_rhs (use
->stmt
, op
);
6611 /* The induction variable elimination failed; just express the original
6613 comp
= get_computation (data
->current_loop
, use
, cand
);
6614 gcc_assert (comp
!= NULL_TREE
);
6616 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6619 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6620 true, GSI_SAME_STMT
);
6623 /* Rewrites USE using candidate CAND. */
6626 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6630 case USE_NONLINEAR_EXPR
:
6631 rewrite_use_nonlinear_expr (data
, use
, cand
);
6635 rewrite_use_address (data
, use
, cand
);
6639 rewrite_use_compare (data
, use
, cand
);
6646 update_stmt (use
->stmt
);
6649 /* Rewrite the uses using the selected induction variables. */
6652 rewrite_uses (struct ivopts_data
*data
)
6655 struct iv_cand
*cand
;
6658 for (i
= 0; i
< n_iv_uses (data
); i
++)
6660 use
= iv_use (data
, i
);
6661 cand
= use
->selected
;
6664 rewrite_use (data
, use
, cand
);
6668 /* Removes the ivs that are not used after rewriting. */
6671 remove_unused_ivs (struct ivopts_data
*data
)
6675 bitmap toremove
= BITMAP_ALLOC (NULL
);
6677 /* Figure out an order in which to release SSA DEFs so that we don't
6678 release something that we'd have to propagate into a debug stmt
6680 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6682 struct version_info
*info
;
6684 info
= ver_info (data
, j
);
6686 && !integer_zerop (info
->iv
->step
)
6688 && !info
->iv
->have_use_for
6689 && !info
->preserve_biv
)
6691 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6693 tree def
= info
->iv
->ssa_name
;
6695 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6697 imm_use_iterator imm_iter
;
6698 use_operand_p use_p
;
6702 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6704 if (!gimple_debug_bind_p (stmt
))
6707 /* We just want to determine whether to do nothing
6708 (count == 0), to substitute the computed
6709 expression into a single use of the SSA DEF by
6710 itself (count == 1), or to use a debug temp
6711 because the SSA DEF is used multiple times or as
6712 part of a larger expression (count > 1). */
6714 if (gimple_debug_bind_get_value (stmt
) != def
)
6718 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6724 struct iv_use dummy_use
;
6725 struct iv_cand
*best_cand
= NULL
, *cand
;
6726 unsigned i
, best_pref
= 0, cand_pref
;
6728 memset (&dummy_use
, 0, sizeof (dummy_use
));
6729 dummy_use
.iv
= info
->iv
;
6730 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6732 cand
= iv_use (data
, i
)->selected
;
6733 if (cand
== best_cand
)
6735 cand_pref
= operand_equal_p (cand
->iv
->step
,
6739 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6740 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6743 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6745 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6748 best_pref
= cand_pref
;
6755 tree comp
= get_computation_at (data
->current_loop
,
6756 &dummy_use
, best_cand
,
6757 SSA_NAME_DEF_STMT (def
));
6763 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6764 DECL_ARTIFICIAL (vexpr
) = 1;
6765 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6766 if (SSA_NAME_VAR (def
))
6767 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6769 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6770 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6771 gimple_stmt_iterator gsi
;
6773 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6774 gsi
= gsi_after_labels (gimple_bb
6775 (SSA_NAME_DEF_STMT (def
)));
6777 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6779 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6783 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6785 if (!gimple_debug_bind_p (stmt
))
6788 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6789 SET_USE (use_p
, comp
);
6797 release_defs_bitset (toremove
);
6799 BITMAP_FREE (toremove
);
6802 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6803 for pointer_map_traverse. */
6806 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6807 void *data ATTRIBUTE_UNUSED
)
6809 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6815 /* Frees data allocated by the optimization of a single loop. */
6818 free_loop_data (struct ivopts_data
*data
)
6826 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6827 pointer_map_destroy (data
->niters
);
6828 data
->niters
= NULL
;
6831 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6833 struct version_info
*info
;
6835 info
= ver_info (data
, i
);
6838 info
->has_nonlin_use
= false;
6839 info
->preserve_biv
= false;
6842 bitmap_clear (data
->relevant
);
6843 bitmap_clear (data
->important_candidates
);
6845 for (i
= 0; i
< n_iv_uses (data
); i
++)
6847 struct iv_use
*use
= iv_use (data
, i
);
6850 BITMAP_FREE (use
->related_cands
);
6851 for (j
= 0; j
< use
->n_map_members
; j
++)
6852 if (use
->cost_map
[j
].depends_on
)
6853 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6854 free (use
->cost_map
);
6857 data
->iv_uses
.truncate (0);
6859 for (i
= 0; i
< n_iv_cands (data
); i
++)
6861 struct iv_cand
*cand
= iv_cand (data
, i
);
6864 if (cand
->depends_on
)
6865 BITMAP_FREE (cand
->depends_on
);
6868 data
->iv_candidates
.truncate (0);
6870 if (data
->version_info_size
< num_ssa_names
)
6872 data
->version_info_size
= 2 * num_ssa_names
;
6873 free (data
->version_info
);
6874 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6877 data
->max_inv_id
= 0;
6879 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6880 SET_DECL_RTL (obj
, NULL_RTX
);
6882 decl_rtl_to_reset
.truncate (0);
6884 data
->inv_expr_tab
.empty ();
6885 data
->inv_expr_id
= 0;
6888 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6892 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6894 free_loop_data (data
);
6895 free (data
->version_info
);
6896 BITMAP_FREE (data
->relevant
);
6897 BITMAP_FREE (data
->important_candidates
);
6899 decl_rtl_to_reset
.release ();
6900 data
->iv_uses
.release ();
6901 data
->iv_candidates
.release ();
6902 data
->inv_expr_tab
.dispose ();
6905 /* Returns true if the loop body BODY includes any function calls. */
6908 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6910 gimple_stmt_iterator gsi
;
6913 for (i
= 0; i
< num_nodes
; i
++)
6914 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6916 gimple stmt
= gsi_stmt (gsi
);
6917 if (is_gimple_call (stmt
)
6918 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6924 /* Optimizes the LOOP. Returns true if anything changed. */
6927 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6929 bool changed
= false;
6930 struct iv_ca
*iv_ca
;
6931 edge exit
= single_dom_exit (loop
);
6934 gcc_assert (!data
->niters
);
6935 data
->current_loop
= loop
;
6936 data
->speed
= optimize_loop_for_speed_p (loop
);
6938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6940 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6944 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6945 exit
->src
->index
, exit
->dest
->index
);
6946 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6947 fprintf (dump_file
, "\n");
6950 fprintf (dump_file
, "\n");
6953 body
= get_loop_body (loop
);
6954 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6955 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6958 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6960 /* For each ssa name determines whether it behaves as an induction variable
6962 if (!find_induction_variables (data
))
6965 /* Finds interesting uses (item 1). */
6966 find_interesting_uses (data
);
6967 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6970 /* Finds candidates for the induction variables (item 2). */
6971 find_iv_candidates (data
);
6973 /* Calculates the costs (item 3, part 1). */
6974 determine_iv_costs (data
);
6975 determine_use_iv_costs (data
);
6976 determine_set_costs (data
);
6978 /* Find the optimal set of induction variables (item 3, part 2). */
6979 iv_ca
= find_optimal_iv_set (data
);
6984 /* Create the new induction variables (item 4, part 1). */
6985 create_new_ivs (data
, iv_ca
);
6986 iv_ca_free (&iv_ca
);
6988 /* Rewrite the uses (item 4, part 2). */
6989 rewrite_uses (data
);
6991 /* Remove the ivs that are unused after rewriting. */
6992 remove_unused_ivs (data
);
6994 /* We have changed the structure of induction variables; it might happen
6995 that definitions in the scev database refer to some of them that were
7000 free_loop_data (data
);
7005 /* Main entry point. Optimizes induction variables in loops. */
7008 tree_ssa_iv_optimize (void)
7011 struct ivopts_data data
;
7013 tree_ssa_iv_optimize_init (&data
);
7015 /* Optimize the loops starting with the innermost ones. */
7016 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7019 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7021 tree_ssa_iv_optimize_loop (&data
, loop
);
7024 tree_ssa_iv_optimize_finalize (&data
);