1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
69 #include "stor-layout.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
78 #include "gimple-expr.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop
*loop
)
131 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
133 return AVG_LOOP_NITER (loop
);
138 /* Representation of the induction variable. */
141 tree base
; /* Initial value of the iv. */
142 tree base_object
; /* A memory object to that the induction variable points. */
143 tree step
; /* Step of the iv (constant only). */
144 tree ssa_name
; /* The ssa name with the value. */
145 bool biv_p
; /* Is it a biv? */
146 bool have_use_for
; /* Do we already have a use for it? */
147 unsigned use_id
; /* The identifier in the use if it is the case. */
150 /* Per-ssa version information (induction variable descriptions, etc.). */
153 tree name
; /* The ssa name. */
154 struct iv
*iv
; /* Induction variable description. */
155 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv
; /* For the original biv, whether to preserve it. */
158 unsigned inv_id
; /* Id of an invariant. */
164 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
165 USE_ADDRESS
, /* Use in an address. */
166 USE_COMPARE
/* Use is a compare. */
169 /* Cost of a computation. */
172 int cost
; /* The runtime cost. */
173 unsigned complexity
; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
179 static const comp_cost no_cost
= {0, 0};
180 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
182 /* The candidate - cost pair. */
185 struct iv_cand
*cand
; /* The candidate. */
186 comp_cost cost
; /* The cost. */
187 bitmap depends_on
; /* The list of invariants that have to be
189 tree value
; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp
; /* For iv elimination, the comparison. */
193 int inv_expr_id
; /* Loop invariant expression id. */
199 unsigned id
; /* The id of the use. */
200 enum use_type type
; /* Type of the use. */
201 struct iv
*iv
; /* The induction variable it is based on. */
202 gimple stmt
; /* Statement in that it occurs. */
203 tree
*op_p
; /* The place where it occurs. */
204 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
207 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
208 struct cost_pair
*cost_map
;
209 /* The costs wrto the iv candidates. */
211 struct iv_cand
*selected
;
212 /* The selected candidate. */
215 /* The position where the iv is computed. */
218 IP_NORMAL
, /* At the end, just before the exit condition. */
219 IP_END
, /* At the end of the latch block. */
220 IP_BEFORE_USE
, /* Immediately before a specific use. */
221 IP_AFTER_USE
, /* Immediately after a specific use. */
222 IP_ORIGINAL
/* The original biv. */
225 /* The induction variable candidate. */
228 unsigned id
; /* The number of the candidate. */
229 bool important
; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
232 gimple incremented_at
;/* For original biv, the statement where it is
234 tree var_before
; /* The variable used for it before increment. */
235 tree var_after
; /* The variable used for it after increment. */
236 struct iv
*iv
; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost
; /* Cost of the candidate. */
241 unsigned cost_step
; /* Cost of the candidate's increment operation. */
242 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on
; /* The list of invariants that are used in step of the
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
256 /* The data used by the induction variable optimizations. */
258 typedef struct iv_use
*iv_use_p
;
260 typedef struct iv_cand
*iv_cand_p
;
262 /* Hashtable helpers. */
264 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
266 typedef iv_inv_expr_ent value_type
;
267 typedef iv_inv_expr_ent compare_type
;
268 static inline hashval_t
hash (const value_type
*);
269 static inline bool equal (const value_type
*, const compare_type
*);
272 /* Hash function for loop invariant expressions. */
275 iv_inv_expr_hasher::hash (const value_type
*expr
)
280 /* Hash table equality function for expressions. */
283 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
285 return expr1
->hash
== expr2
->hash
286 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
291 /* The currently optimized loop. */
292 struct loop
*current_loop
;
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t
*niters
;
297 /* Number of registers used in it. */
300 /* The size of version_info array allocated. */
301 unsigned version_info_size
;
303 /* The array of information for the ssa names. */
304 struct version_info
*version_info
;
306 /* The hashtable of loop invariant expressions created
308 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
310 /* Loop invariant expression id. */
313 /* The bitmap of indices in version_info whose value was changed. */
316 /* The uses of induction variables. */
317 vec
<iv_use_p
> iv_uses
;
319 /* The candidates. */
320 vec
<iv_cand_p
> iv_candidates
;
322 /* A bitmap of important candidates. */
323 bitmap important_candidates
;
325 /* The maximum invariant id. */
328 /* Whether to consider just related and important candidates when replacing a
330 bool consider_all_candidates
;
332 /* Are we optimizing for speed? */
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call
;
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p
;
342 /* An assignment of iv candidates to uses. */
346 /* The number of uses covered by the assignment. */
349 /* Number of uses that cannot be expressed by the candidates in the set. */
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair
**cand_for_use
;
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses
;
358 /* The candidates used. */
361 /* The number of candidates in the set. */
364 /* Total number of registers needed. */
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost
;
370 /* Total cost of candidates. */
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses
;
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr
;
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr
;
383 /* Total cost of the assignment. */
387 /* Difference of two iv candidate assignments. */
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair
*old_cp
;
397 /* A new assignment. */
398 struct cost_pair
*new_cp
;
400 /* Next change in the list. */
401 struct iv_ca_delta
*next_change
;
404 /* Bound on number of candidates below that all candidates are considered. */
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
421 /* The list of trees for that the decl_rtl field must be reset is stored
424 static vec
<tree
> decl_rtl_to_reset
;
426 static comp_cost
force_expr_to_var_cost (tree
, bool);
428 /* Number of uses recorded in DATA. */
430 static inline unsigned
431 n_iv_uses (struct ivopts_data
*data
)
433 return data
->iv_uses
.length ();
436 /* Ith use recorded in DATA. */
438 static inline struct iv_use
*
439 iv_use (struct ivopts_data
*data
, unsigned i
)
441 return data
->iv_uses
[i
];
444 /* Number of candidates recorded in DATA. */
446 static inline unsigned
447 n_iv_cands (struct ivopts_data
*data
)
449 return data
->iv_candidates
.length ();
452 /* Ith candidate recorded in DATA. */
454 static inline struct iv_cand
*
455 iv_cand (struct ivopts_data
*data
, unsigned i
)
457 return data
->iv_candidates
[i
];
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
463 single_dom_exit (struct loop
*loop
)
465 edge exit
= single_exit (loop
);
470 if (!just_once_each_iteration_p (loop
, exit
->src
))
476 /* Dumps information about the induction variable IV to FILE. */
479 dump_iv (FILE *file
, struct iv
*iv
)
483 fprintf (file
, "ssa name ");
484 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
485 fprintf (file
, "\n");
488 fprintf (file
, " type ");
489 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
490 fprintf (file
, "\n");
494 fprintf (file
, " base ");
495 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
496 fprintf (file
, "\n");
498 fprintf (file
, " step ");
499 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
500 fprintf (file
, "\n");
504 fprintf (file
, " invariant ");
505 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
506 fprintf (file
, "\n");
511 fprintf (file
, " base object ");
512 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
513 fprintf (file
, "\n");
517 fprintf (file
, " is a biv\n");
520 /* Dumps information about the USE to FILE. */
523 dump_use (FILE *file
, struct iv_use
*use
)
525 fprintf (file
, "use %d\n", use
->id
);
529 case USE_NONLINEAR_EXPR
:
530 fprintf (file
, " generic\n");
534 fprintf (file
, " address\n");
538 fprintf (file
, " compare\n");
545 fprintf (file
, " in statement ");
546 print_gimple_stmt (file
, use
->stmt
, 0, 0);
547 fprintf (file
, "\n");
549 fprintf (file
, " at position ");
551 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
552 fprintf (file
, "\n");
554 dump_iv (file
, use
->iv
);
556 if (use
->related_cands
)
558 fprintf (file
, " related candidates ");
559 dump_bitmap (file
, use
->related_cands
);
563 /* Dumps information about the uses to FILE. */
566 dump_uses (FILE *file
, struct ivopts_data
*data
)
571 for (i
= 0; i
< n_iv_uses (data
); i
++)
573 use
= iv_use (data
, i
);
575 dump_use (file
, use
);
576 fprintf (file
, "\n");
580 /* Dumps information about induction variable candidate CAND to FILE. */
583 dump_cand (FILE *file
, struct iv_cand
*cand
)
585 struct iv
*iv
= cand
->iv
;
587 fprintf (file
, "candidate %d%s\n",
588 cand
->id
, cand
->important
? " (important)" : "");
590 if (cand
->depends_on
)
592 fprintf (file
, " depends on ");
593 dump_bitmap (file
, cand
->depends_on
);
598 fprintf (file
, " final value replacement\n");
602 if (cand
->var_before
)
604 fprintf (file
, " var_before ");
605 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
606 fprintf (file
, "\n");
610 fprintf (file
, " var_after ");
611 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
612 fprintf (file
, "\n");
618 fprintf (file
, " incremented before exit test\n");
622 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
626 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
630 fprintf (file
, " incremented at end\n");
634 fprintf (file
, " original biv\n");
641 /* Returns the info for ssa version VER. */
643 static inline struct version_info
*
644 ver_info (struct ivopts_data
*data
, unsigned ver
)
646 return data
->version_info
+ ver
;
649 /* Returns the info for ssa name NAME. */
651 static inline struct version_info
*
652 name_info (struct ivopts_data
*data
, tree name
)
654 return ver_info (data
, SSA_NAME_VERSION (name
));
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
661 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
663 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
667 if (sbb
== loop
->latch
)
673 return stmt
== last_stmt (bb
);
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
681 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
683 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
684 basic_block stmt_bb
= gimple_bb (stmt
);
686 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
689 if (stmt_bb
!= cand_bb
)
693 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
695 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
702 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
710 return stmt_after_ip_normal_pos (loop
, stmt
);
714 return stmt_after_inc_pos (cand
, stmt
, false);
717 return stmt_after_inc_pos (cand
, stmt
, true);
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
727 abnormal_ssa_name_p (tree exp
)
732 if (TREE_CODE (exp
) != SSA_NAME
)
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
742 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
743 void *data ATTRIBUTE_UNUSED
)
745 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
747 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
749 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
753 return !abnormal_ssa_name_p (*index
);
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
760 contains_abnormal_ssa_name_p (tree expr
)
763 enum tree_code_class codeclass
;
768 code
= TREE_CODE (expr
);
769 codeclass
= TREE_CODE_CLASS (code
);
771 if (code
== SSA_NAME
)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
774 if (code
== INTEGER_CST
775 || is_gimple_min_invariant (expr
))
778 if (code
== ADDR_EXPR
)
779 return !for_each_index (&TREE_OPERAND (expr
, 0),
780 idx_contains_abnormal_ssa_name_p
,
783 if (code
== COND_EXPR
)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
812 static struct tree_niter_desc
*
813 niter_for_exit (struct ivopts_data
*data
, edge exit
)
815 struct tree_niter_desc
*desc
;
820 data
->niters
= pointer_map_create ();
824 slot
= pointer_map_contains (data
->niters
, exit
);
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc
= XNEW (struct tree_niter_desc
);
832 if (!number_of_iterations_exit (data
->current_loop
,
834 || contains_abnormal_ssa_name_p (desc
->niter
))
839 slot
= pointer_map_insert (data
->niters
, exit
);
843 desc
= (struct tree_niter_desc
*) *slot
;
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
852 static struct tree_niter_desc
*
853 niter_for_single_dom_exit (struct ivopts_data
*data
)
855 edge exit
= single_dom_exit (data
->current_loop
);
860 return niter_for_exit (data
, exit
);
863 /* Initializes data structures used by the iv optimization pass, stored
867 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
869 data
->version_info_size
= 2 * num_ssa_names
;
870 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
871 data
->relevant
= BITMAP_ALLOC (NULL
);
872 data
->important_candidates
= BITMAP_ALLOC (NULL
);
873 data
->max_inv_id
= 0;
875 data
->iv_uses
.create (20);
876 data
->iv_candidates
.create (20);
877 data
->inv_expr_tab
.create (10);
878 data
->inv_expr_id
= 0;
879 decl_rtl_to_reset
.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
886 determine_base_object (tree expr
)
888 enum tree_code code
= TREE_CODE (expr
);
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr
))
895 return determine_base_object (TREE_OPERAND (expr
, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
906 obj
= TREE_OPERAND (expr
, 0);
907 base
= get_base_address (obj
);
912 if (TREE_CODE (base
) == MEM_REF
)
913 return determine_base_object (TREE_OPERAND (base
, 0));
915 return fold_convert (ptr_type_node
,
916 build_fold_addr_expr (base
));
918 case POINTER_PLUS_EXPR
:
919 return determine_base_object (TREE_OPERAND (expr
, 0));
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
927 return fold_convert (ptr_type_node
, expr
);
931 /* Allocates an induction variable with given initial value BASE and step STEP
935 alloc_iv (tree base
, tree step
)
937 tree base_object
= base
;
938 struct iv
*iv
= XCNEW (struct iv
);
939 gcc_assert (step
!= NULL_TREE
);
941 /* Lower all address expressions except ones with DECL_P as operand.
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object
);
947 if (TREE_CODE (base_object
) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object
, 0)))
952 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
954 gcc_assert (base_object
!= NULL_TREE
);
955 base_object
= build_fold_addr_expr (base_object
);
956 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
960 iv
->base_object
= determine_base_object (base_object
);
963 iv
->have_use_for
= false;
965 iv
->ssa_name
= NULL_TREE
;
970 /* Sets STEP and BASE for induction variable IV. */
973 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
975 struct version_info
*info
= name_info (data
, iv
);
977 gcc_assert (!info
->iv
);
979 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
980 info
->iv
= alloc_iv (base
, step
);
981 info
->iv
->ssa_name
= iv
;
984 /* Finds induction variable declaration for VAR. */
987 get_iv (struct ivopts_data
*data
, tree var
)
990 tree type
= TREE_TYPE (var
);
992 if (!POINTER_TYPE_P (type
)
993 && !INTEGRAL_TYPE_P (type
))
996 if (!name_info (data
, var
)->iv
)
998 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1001 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1002 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1005 return name_info (data
, var
)->iv
;
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1012 determine_biv_step (gimple phi
)
1014 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1015 tree name
= PHI_RESULT (phi
);
1018 if (virtual_operand_p (name
))
1021 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1024 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1027 /* Finds basic ivs. */
1030 find_bivs (struct ivopts_data
*data
)
1033 tree step
, type
, base
;
1035 struct loop
*loop
= data
->current_loop
;
1036 gimple_stmt_iterator psi
;
1038 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1040 phi
= gsi_stmt (psi
);
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1045 step
= determine_biv_step (phi
);
1049 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1050 base
= expand_simple_operations (base
);
1051 if (contains_abnormal_ssa_name_p (base
)
1052 || contains_abnormal_ssa_name_p (step
))
1055 type
= TREE_TYPE (PHI_RESULT (phi
));
1056 base
= fold_convert (type
, base
);
1059 if (POINTER_TYPE_P (type
))
1060 step
= convert_to_ptrofftype (step
);
1062 step
= fold_convert (type
, step
);
1065 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1072 /* Marks basic ivs. */
1075 mark_bivs (struct ivopts_data
*data
)
1079 struct iv
*iv
, *incr_iv
;
1080 struct loop
*loop
= data
->current_loop
;
1081 basic_block incr_bb
;
1082 gimple_stmt_iterator psi
;
1084 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1086 phi
= gsi_stmt (psi
);
1088 iv
= get_iv (data
, PHI_RESULT (phi
));
1092 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1093 def
= SSA_NAME_DEF_STMT (var
);
1094 /* Don't mark iv peeled from other one as biv. */
1096 && gimple_code (def
) == GIMPLE_PHI
1097 && gimple_bb (def
) == loop
->header
)
1100 incr_iv
= get_iv (data
, var
);
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1106 if (incr_bb
->loop_father
!= data
->current_loop
1107 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1111 incr_iv
->biv_p
= true;
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1119 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1122 struct loop
*loop
= data
->current_loop
;
1124 iv
->base
= NULL_TREE
;
1125 iv
->step
= NULL_TREE
;
1127 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1130 lhs
= gimple_assign_lhs (stmt
);
1131 if (TREE_CODE (lhs
) != SSA_NAME
)
1134 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1136 iv
->base
= expand_simple_operations (iv
->base
);
1138 if (contains_abnormal_ssa_name_p (iv
->base
)
1139 || contains_abnormal_ssa_name_p (iv
->step
))
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt
))
1151 /* Finds general ivs in statement STMT. */
1154 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1158 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1161 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1164 /* Finds general ivs in basic block BB. */
1167 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1169 gimple_stmt_iterator bsi
;
1171 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1172 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1175 /* Finds general ivs. */
1178 find_givs (struct ivopts_data
*data
)
1180 struct loop
*loop
= data
->current_loop
;
1181 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1184 for (i
= 0; i
< loop
->num_nodes
; i
++)
1185 find_givs_in_bb (data
, body
[i
]);
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1193 find_induction_variables (struct ivopts_data
*data
)
1198 if (!find_bivs (data
))
1204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1206 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1210 fprintf (dump_file
, " number of iterations ");
1211 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1212 if (!integer_zerop (niter
->may_be_zero
))
1214 fprintf (dump_file
, "; zero if ");
1215 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1217 fprintf (dump_file
, "\n\n");
1220 fprintf (dump_file
, "Induction variables:\n\n");
1222 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1224 if (ver_info (data
, i
)->iv
)
1225 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1234 static struct iv_use
*
1235 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1236 gimple stmt
, enum use_type use_type
)
1238 struct iv_use
*use
= XCNEW (struct iv_use
);
1240 use
->id
= n_iv_uses (data
);
1241 use
->type
= use_type
;
1245 use
->related_cands
= BITMAP_ALLOC (NULL
);
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1249 iv
->ssa_name
= NULL_TREE
;
1251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1252 dump_use (dump_file
, use
);
1254 data
->iv_uses
.safe_push (use
);
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1264 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1267 struct version_info
*info
;
1269 if (TREE_CODE (op
) != SSA_NAME
1270 || virtual_operand_p (op
))
1273 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1275 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1278 info
= name_info (data
, op
);
1280 info
->has_nonlin_use
|= nonlinear_use
;
1282 info
->inv_id
= ++data
->max_inv_id
;
1283 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1286 /* Checks whether the use OP is interesting and if so, records it. */
1288 static struct iv_use
*
1289 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1296 if (TREE_CODE (op
) != SSA_NAME
)
1299 iv
= get_iv (data
, op
);
1303 if (iv
->have_use_for
)
1305 use
= iv_use (data
, iv
->use_id
);
1307 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1311 if (integer_zerop (iv
->step
))
1313 record_invariant (data
, op
, true);
1316 iv
->have_use_for
= true;
1318 civ
= XNEW (struct iv
);
1321 stmt
= SSA_NAME_DEF_STMT (op
);
1322 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1323 || is_gimple_assign (stmt
));
1325 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1326 iv
->use_id
= use
->id
;
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1340 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1341 tree
**control_var
, tree
**bound
,
1342 struct iv
**iv_var
, struct iv
**iv_bound
)
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv
;
1347 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1348 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1351 if (gimple_code (stmt
) == GIMPLE_COND
)
1353 op0
= gimple_cond_lhs_ptr (stmt
);
1354 op1
= gimple_cond_rhs_ptr (stmt
);
1358 op0
= gimple_assign_rhs1_ptr (stmt
);
1359 op1
= gimple_assign_rhs2_ptr (stmt
);
1362 zero
= integer_zero_node
;
1363 const_iv
.step
= integer_zero_node
;
1365 if (TREE_CODE (*op0
) == SSA_NAME
)
1366 iv0
= get_iv (data
, *op0
);
1367 if (TREE_CODE (*op1
) == SSA_NAME
)
1368 iv1
= get_iv (data
, *op1
);
1370 /* Exactly one of the compared values must be an iv, and the other one must
1375 if (integer_zerop (iv0
->step
))
1377 /* Control variable may be on the other side. */
1378 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1379 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1381 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1385 *control_var
= op0
;;
1396 /* Checks whether the condition in STMT is interesting and if so,
1400 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1402 tree
*var_p
, *bound_p
;
1403 struct iv
*var_iv
, *civ
;
1405 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1407 find_interesting_uses_op (data
, *var_p
);
1408 find_interesting_uses_op (data
, *bound_p
);
1412 civ
= XNEW (struct iv
);
1414 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1423 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1428 if (is_gimple_min_invariant (expr
))
1429 return current_loops
->tree_root
;
1431 if (TREE_CODE (expr
) == SSA_NAME
)
1433 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1436 if (flow_bb_inside_loop_p (loop
, def_bb
))
1438 return superloop_at_depth (loop
,
1439 loop_depth (def_bb
->loop_father
) + 1);
1442 return current_loops
->tree_root
;
1448 unsigned maxdepth
= 0;
1449 len
= TREE_OPERAND_LENGTH (expr
);
1450 for (i
= 0; i
< len
; i
++)
1452 struct loop
*ivloop
;
1453 if (!TREE_OPERAND (expr
, i
))
1456 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1459 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1462 return superloop_at_depth (loop
, maxdepth
);
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1470 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1475 gcc_assert (loop_depth (loop
) > 0);
1477 if (is_gimple_min_invariant (expr
))
1480 if (TREE_CODE (expr
) == SSA_NAME
)
1482 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1484 && flow_bb_inside_loop_p (loop
, def_bb
))
1493 len
= TREE_OPERAND_LENGTH (expr
);
1494 for (i
= 0; i
< len
; i
++)
1495 if (TREE_OPERAND (expr
, i
)
1496 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1506 struct ifs_ivopts_data
1508 struct ivopts_data
*ivopts_data
;
1514 idx_find_step (tree base
, tree
*idx
, void *data
)
1516 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1518 tree step
, iv_base
, iv_step
, lbound
, off
;
1519 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1521 /* If base is a component ref, require that the offset of the reference
1523 if (TREE_CODE (base
) == COMPONENT_REF
)
1525 off
= component_ref_field_offset (base
);
1526 return expr_invariant_in_loop_p (loop
, off
);
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1540 step
= array_ref_element_size (base
);
1541 lbound
= array_ref_low_bound (base
);
1543 if (!expr_invariant_in_loop_p (loop
, step
)
1544 || !expr_invariant_in_loop_p (loop
, lbound
))
1548 if (TREE_CODE (*idx
) != SSA_NAME
)
1551 iv
= get_iv (dta
->ivopts_data
, *idx
);
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1560 if (integer_zerop (iv
->step
))
1563 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1565 step
= array_ref_element_size (base
);
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step
) != INTEGER_CST
)
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step
= size_one_node
;
1577 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1578 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1581 /* The index might wrap. */
1585 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1586 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1595 idx_record_use (tree base
, tree
*idx
,
1598 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1599 find_interesting_uses_op (data
, *idx
);
1600 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1602 find_interesting_uses_op (data
, array_ref_element_size (base
));
1603 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1614 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1617 enum tree_code code
;
1618 double_int res
, p0
, p1
;
1619 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1624 if (operand_equal_p (top
, bot
, 0))
1626 *mul
= double_int_one
;
1630 code
= TREE_CODE (top
);
1634 mby
= TREE_OPERAND (top
, 1);
1635 if (TREE_CODE (mby
) != INTEGER_CST
)
1638 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1641 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1646 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1647 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1650 if (code
== MINUS_EXPR
)
1652 *mul
= (p0
+ p1
).sext (precision
);
1656 if (TREE_CODE (bot
) != INTEGER_CST
)
1659 p0
= tree_to_double_int (top
).sext (precision
);
1660 p1
= tree_to_double_int (bot
).sext (precision
);
1663 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1664 return res
.is_zero ();
1671 /* Returns true if memory reference REF with step STEP may be unaligned. */
1674 may_be_unaligned_p (tree ref
, tree step
)
1678 HOST_WIDE_INT bitsize
;
1679 HOST_WIDE_INT bitpos
;
1681 enum machine_mode mode
;
1682 int unsignedp
, volatilep
;
1683 unsigned base_align
;
1685 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1686 thus they are not misaligned. */
1687 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1690 /* The test below is basically copy of what expr.c:normal_inner_ref
1691 does to check whether the object must be loaded by parts when
1692 STRICT_ALIGNMENT is true. */
1693 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1694 &unsignedp
, &volatilep
, true);
1695 base_type
= TREE_TYPE (base
);
1696 base_align
= get_object_alignment (base
);
1697 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1699 if (mode
!= BLKmode
)
1701 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1703 if (base_align
< mode_align
1704 || (bitpos
% mode_align
) != 0
1705 || (bitpos
% BITS_PER_UNIT
) != 0)
1709 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1712 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1719 /* Return true if EXPR may be non-addressable. */
1722 may_be_nonaddressable_p (tree expr
)
1724 switch (TREE_CODE (expr
))
1726 case TARGET_MEM_REF
:
1727 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1728 target, thus they are always addressable. */
1732 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1733 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1735 case VIEW_CONVERT_EXPR
:
1736 /* This kind of view-conversions may wrap non-addressable objects
1737 and make them look addressable. After some processing the
1738 non-addressability may be uncovered again, causing ADDR_EXPRs
1739 of inappropriate objects to be built. */
1740 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1741 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1744 /* ... fall through ... */
1747 case ARRAY_RANGE_REF
:
1748 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1760 /* Finds addresses in *OP_P inside STMT. */
1763 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1765 tree base
= *op_p
, step
= size_zero_node
;
1767 struct ifs_ivopts_data ifs_ivopts_data
;
1769 /* Do not play with volatile memory references. A bit too conservative,
1770 perhaps, but safe. */
1771 if (gimple_has_volatile_ops (stmt
))
1774 /* Ignore bitfields for now. Not really something terribly complicated
1776 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1779 base
= unshare_expr (base
);
1781 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1783 tree type
= build_pointer_type (TREE_TYPE (base
));
1787 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1789 civ
= get_iv (data
, TMR_BASE (base
));
1793 TMR_BASE (base
) = civ
->base
;
1796 if (TMR_INDEX2 (base
)
1797 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1799 civ
= get_iv (data
, TMR_INDEX2 (base
));
1803 TMR_INDEX2 (base
) = civ
->base
;
1806 if (TMR_INDEX (base
)
1807 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1809 civ
= get_iv (data
, TMR_INDEX (base
));
1813 TMR_INDEX (base
) = civ
->base
;
1818 if (TMR_STEP (base
))
1819 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1821 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1825 if (integer_zerop (step
))
1827 base
= tree_mem_ref_addr (type
, base
);
1831 ifs_ivopts_data
.ivopts_data
= data
;
1832 ifs_ivopts_data
.stmt
= stmt
;
1833 ifs_ivopts_data
.step
= size_zero_node
;
1834 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1835 || integer_zerop (ifs_ivopts_data
.step
))
1837 step
= ifs_ivopts_data
.step
;
1839 /* Check that the base expression is addressable. This needs
1840 to be done after substituting bases of IVs into it. */
1841 if (may_be_nonaddressable_p (base
))
1844 /* Moreover, on strict alignment platforms, check that it is
1845 sufficiently aligned. */
1846 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1849 base
= build_fold_addr_expr (base
);
1851 /* Substituting bases of IVs into the base expression might
1852 have caused folding opportunities. */
1853 if (TREE_CODE (base
) == ADDR_EXPR
)
1855 tree
*ref
= &TREE_OPERAND (base
, 0);
1856 while (handled_component_p (*ref
))
1857 ref
= &TREE_OPERAND (*ref
, 0);
1858 if (TREE_CODE (*ref
) == MEM_REF
)
1860 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1861 TREE_OPERAND (*ref
, 0),
1862 TREE_OPERAND (*ref
, 1));
1869 civ
= alloc_iv (base
, step
);
1870 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1874 for_each_index (op_p
, idx_record_use
, data
);
1877 /* Finds and records invariants used in STMT. */
1880 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1883 use_operand_p use_p
;
1886 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1888 op
= USE_FROM_PTR (use_p
);
1889 record_invariant (data
, op
, false);
1893 /* Finds interesting uses of induction variables in the statement STMT. */
1896 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1899 tree op
, *lhs
, *rhs
;
1901 use_operand_p use_p
;
1902 enum tree_code code
;
1904 find_invariants_stmt (data
, stmt
);
1906 if (gimple_code (stmt
) == GIMPLE_COND
)
1908 find_interesting_uses_cond (data
, stmt
);
1912 if (is_gimple_assign (stmt
))
1914 lhs
= gimple_assign_lhs_ptr (stmt
);
1915 rhs
= gimple_assign_rhs1_ptr (stmt
);
1917 if (TREE_CODE (*lhs
) == SSA_NAME
)
1919 /* If the statement defines an induction variable, the uses are not
1920 interesting by themselves. */
1922 iv
= get_iv (data
, *lhs
);
1924 if (iv
&& !integer_zerop (iv
->step
))
1928 code
= gimple_assign_rhs_code (stmt
);
1929 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1930 && (REFERENCE_CLASS_P (*rhs
)
1931 || is_gimple_val (*rhs
)))
1933 if (REFERENCE_CLASS_P (*rhs
))
1934 find_interesting_uses_address (data
, stmt
, rhs
);
1936 find_interesting_uses_op (data
, *rhs
);
1938 if (REFERENCE_CLASS_P (*lhs
))
1939 find_interesting_uses_address (data
, stmt
, lhs
);
1942 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1944 find_interesting_uses_cond (data
, stmt
);
1948 /* TODO -- we should also handle address uses of type
1950 memory = call (whatever);
1957 if (gimple_code (stmt
) == GIMPLE_PHI
1958 && gimple_bb (stmt
) == data
->current_loop
->header
)
1960 iv
= get_iv (data
, PHI_RESULT (stmt
));
1962 if (iv
&& !integer_zerop (iv
->step
))
1966 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1968 op
= USE_FROM_PTR (use_p
);
1970 if (TREE_CODE (op
) != SSA_NAME
)
1973 iv
= get_iv (data
, op
);
1977 find_interesting_uses_op (data
, op
);
1981 /* Finds interesting uses of induction variables outside of loops
1982 on loop exit edge EXIT. */
1985 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1988 gimple_stmt_iterator psi
;
1991 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1993 phi
= gsi_stmt (psi
);
1994 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1995 if (!virtual_operand_p (def
))
1996 find_interesting_uses_op (data
, def
);
2000 /* Finds uses of the induction variables that are interesting. */
2003 find_interesting_uses (struct ivopts_data
*data
)
2006 gimple_stmt_iterator bsi
;
2007 basic_block
*body
= get_loop_body (data
->current_loop
);
2009 struct version_info
*info
;
2012 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2013 fprintf (dump_file
, "Uses:\n\n");
2015 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2020 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2021 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2022 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2023 find_interesting_uses_outside (data
, e
);
2025 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2026 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2027 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2028 if (!is_gimple_debug (gsi_stmt (bsi
)))
2029 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2036 fprintf (dump_file
, "\n");
2038 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2040 info
= ver_info (data
, i
);
2043 fprintf (dump_file
, " ");
2044 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2045 fprintf (dump_file
, " is invariant (%d)%s\n",
2046 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2050 fprintf (dump_file
, "\n");
2056 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2057 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2058 we are at the top-level of the processed address. */
2061 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2062 HOST_WIDE_INT
*offset
)
2064 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2065 enum tree_code code
;
2066 tree type
, orig_type
= TREE_TYPE (expr
);
2067 HOST_WIDE_INT off0
, off1
, st
;
2068 tree orig_expr
= expr
;
2072 type
= TREE_TYPE (expr
);
2073 code
= TREE_CODE (expr
);
2079 if (!cst_and_fits_in_hwi (expr
)
2080 || integer_zerop (expr
))
2083 *offset
= int_cst_value (expr
);
2084 return build_int_cst (orig_type
, 0);
2086 case POINTER_PLUS_EXPR
:
2089 op0
= TREE_OPERAND (expr
, 0);
2090 op1
= TREE_OPERAND (expr
, 1);
2092 op0
= strip_offset_1 (op0
, false, false, &off0
);
2093 op1
= strip_offset_1 (op1
, false, false, &off1
);
2095 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2096 if (op0
== TREE_OPERAND (expr
, 0)
2097 && op1
== TREE_OPERAND (expr
, 1))
2100 if (integer_zerop (op1
))
2102 else if (integer_zerop (op0
))
2104 if (code
== MINUS_EXPR
)
2105 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2110 expr
= fold_build2 (code
, type
, op0
, op1
);
2112 return fold_convert (orig_type
, expr
);
2115 op1
= TREE_OPERAND (expr
, 1);
2116 if (!cst_and_fits_in_hwi (op1
))
2119 op0
= TREE_OPERAND (expr
, 0);
2120 op0
= strip_offset_1 (op0
, false, false, &off0
);
2121 if (op0
== TREE_OPERAND (expr
, 0))
2124 *offset
= off0
* int_cst_value (op1
);
2125 if (integer_zerop (op0
))
2128 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2130 return fold_convert (orig_type
, expr
);
2133 case ARRAY_RANGE_REF
:
2137 step
= array_ref_element_size (expr
);
2138 if (!cst_and_fits_in_hwi (step
))
2141 st
= int_cst_value (step
);
2142 op1
= TREE_OPERAND (expr
, 1);
2143 op1
= strip_offset_1 (op1
, false, false, &off1
);
2144 *offset
= off1
* st
;
2147 && integer_zerop (op1
))
2149 /* Strip the component reference completely. */
2150 op0
= TREE_OPERAND (expr
, 0);
2151 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2164 tmp
= component_ref_field_offset (expr
);
2165 field
= TREE_OPERAND (expr
, 1);
2167 && cst_and_fits_in_hwi (tmp
)
2168 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2170 HOST_WIDE_INT boffset
, abs_off
;
2172 /* Strip the component reference completely. */
2173 op0
= TREE_OPERAND (expr
, 0);
2174 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2175 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2176 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2180 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2187 op0
= TREE_OPERAND (expr
, 0);
2188 op0
= strip_offset_1 (op0
, true, true, &off0
);
2191 if (op0
== TREE_OPERAND (expr
, 0))
2194 expr
= build_fold_addr_expr (op0
);
2195 return fold_convert (orig_type
, expr
);
2198 /* ??? Offset operand? */
2199 inside_addr
= false;
2206 /* Default handling of expressions for that we want to recurse into
2207 the first operand. */
2208 op0
= TREE_OPERAND (expr
, 0);
2209 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2212 if (op0
== TREE_OPERAND (expr
, 0)
2213 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2216 expr
= copy_node (expr
);
2217 TREE_OPERAND (expr
, 0) = op0
;
2219 TREE_OPERAND (expr
, 1) = op1
;
2221 /* Inside address, we might strip the top level component references,
2222 thus changing type of the expression. Handling of ADDR_EXPR
2224 expr
= fold_convert (orig_type
, expr
);
2229 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2232 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2235 tree core
= strip_offset_1 (expr
, false, false, &off
);
2240 /* Returns variant of TYPE that can be used as base for different uses.
2241 We return unsigned type with the same precision, which avoids problems
2245 generic_type_for (tree type
)
2247 if (POINTER_TYPE_P (type
))
2248 return unsigned_type_for (type
);
2250 if (TYPE_UNSIGNED (type
))
2253 return unsigned_type_for (type
);
2256 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2257 the bitmap to that we should store it. */
2259 static struct ivopts_data
*fd_ivopts_data
;
2261 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2263 bitmap
*depends_on
= (bitmap
*) data
;
2264 struct version_info
*info
;
2266 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2268 info
= name_info (fd_ivopts_data
, *expr_p
);
2270 if (!info
->inv_id
|| info
->has_nonlin_use
)
2274 *depends_on
= BITMAP_ALLOC (NULL
);
2275 bitmap_set_bit (*depends_on
, info
->inv_id
);
2280 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2281 position to POS. If USE is not NULL, the candidate is set as related to
2282 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2283 replacement of the final value of the iv by a direct computation. */
2285 static struct iv_cand
*
2286 add_candidate_1 (struct ivopts_data
*data
,
2287 tree base
, tree step
, bool important
, enum iv_position pos
,
2288 struct iv_use
*use
, gimple incremented_at
)
2291 struct iv_cand
*cand
= NULL
;
2292 tree type
, orig_type
;
2294 /* For non-original variables, make sure their values are computed in a type
2295 that does not invoke undefined behavior on overflows (since in general,
2296 we cannot prove that these induction variables are non-wrapping). */
2297 if (pos
!= IP_ORIGINAL
)
2299 orig_type
= TREE_TYPE (base
);
2300 type
= generic_type_for (orig_type
);
2301 if (type
!= orig_type
)
2303 base
= fold_convert (type
, base
);
2304 step
= fold_convert (type
, step
);
2308 for (i
= 0; i
< n_iv_cands (data
); i
++)
2310 cand
= iv_cand (data
, i
);
2312 if (cand
->pos
!= pos
)
2315 if (cand
->incremented_at
!= incremented_at
2316 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2317 && cand
->ainc_use
!= use
))
2331 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2332 && operand_equal_p (step
, cand
->iv
->step
, 0)
2333 && (TYPE_PRECISION (TREE_TYPE (base
))
2334 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2338 if (i
== n_iv_cands (data
))
2340 cand
= XCNEW (struct iv_cand
);
2346 cand
->iv
= alloc_iv (base
, step
);
2349 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2351 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2352 cand
->var_after
= cand
->var_before
;
2354 cand
->important
= important
;
2355 cand
->incremented_at
= incremented_at
;
2356 data
->iv_candidates
.safe_push (cand
);
2359 && TREE_CODE (step
) != INTEGER_CST
)
2361 fd_ivopts_data
= data
;
2362 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2365 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2366 cand
->ainc_use
= use
;
2368 cand
->ainc_use
= NULL
;
2370 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2371 dump_cand (dump_file
, cand
);
2374 if (important
&& !cand
->important
)
2376 cand
->important
= true;
2377 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2378 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2383 bitmap_set_bit (use
->related_cands
, i
);
2384 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2385 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2392 /* Returns true if incrementing the induction variable at the end of the LOOP
2395 The purpose is to avoid splitting latch edge with a biv increment, thus
2396 creating a jump, possibly confusing other optimization passes and leaving
2397 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2398 is not available (so we do not have a better alternative), or if the latch
2399 edge is already nonempty. */
2402 allow_ip_end_pos_p (struct loop
*loop
)
2404 if (!ip_normal_pos (loop
))
2407 if (!empty_block_p (ip_end_pos (loop
)))
2413 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2414 Important field is set to IMPORTANT. */
2417 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2418 bool important
, struct iv_use
*use
)
2420 basic_block use_bb
= gimple_bb (use
->stmt
);
2421 enum machine_mode mem_mode
;
2422 unsigned HOST_WIDE_INT cstepi
;
2424 /* If we insert the increment in any position other than the standard
2425 ones, we must ensure that it is incremented once per iteration.
2426 It must not be in an inner nested loop, or one side of an if
2428 if (use_bb
->loop_father
!= data
->current_loop
2429 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2430 || stmt_could_throw_p (use
->stmt
)
2431 || !cst_and_fits_in_hwi (step
))
2434 cstepi
= int_cst_value (step
);
2436 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2437 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2438 || USE_STORE_PRE_INCREMENT (mem_mode
))
2439 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2440 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2441 || USE_STORE_PRE_DECREMENT (mem_mode
))
2442 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2444 enum tree_code code
= MINUS_EXPR
;
2446 tree new_step
= step
;
2448 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2450 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2451 code
= POINTER_PLUS_EXPR
;
2454 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2455 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2456 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2459 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2460 || USE_STORE_POST_INCREMENT (mem_mode
))
2461 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2462 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2463 || USE_STORE_POST_DECREMENT (mem_mode
))
2464 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2466 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2471 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2472 position to POS. If USE is not NULL, the candidate is set as related to
2473 it. The candidate computation is scheduled on all available positions. */
2476 add_candidate (struct ivopts_data
*data
,
2477 tree base
, tree step
, bool important
, struct iv_use
*use
)
2479 if (ip_normal_pos (data
->current_loop
))
2480 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2481 if (ip_end_pos (data
->current_loop
)
2482 && allow_ip_end_pos_p (data
->current_loop
))
2483 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2485 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2486 add_autoinc_candidates (data
, base
, step
, important
, use
);
2489 /* Adds standard iv candidates. */
2492 add_standard_iv_candidates (struct ivopts_data
*data
)
2494 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2496 /* The same for a double-integer type if it is still fast enough. */
2498 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2499 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2500 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2501 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2503 /* The same for a double-integer type if it is still fast enough. */
2505 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2506 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2507 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2508 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2512 /* Adds candidates bases on the old induction variable IV. */
2515 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2519 struct iv_cand
*cand
;
2521 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2523 /* The same, but with initial value zero. */
2524 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2525 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2527 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2528 iv
->step
, true, NULL
);
2530 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2531 if (gimple_code (phi
) == GIMPLE_PHI
)
2533 /* Additionally record the possibility of leaving the original iv
2535 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2536 /* Don't add candidate if it's from another PHI node because
2537 it's an affine iv appearing in the form of PEELED_CHREC. */
2538 phi
= SSA_NAME_DEF_STMT (def
);
2539 if (gimple_code (phi
) != GIMPLE_PHI
)
2541 cand
= add_candidate_1 (data
,
2542 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2543 SSA_NAME_DEF_STMT (def
));
2544 cand
->var_before
= iv
->ssa_name
;
2545 cand
->var_after
= def
;
2548 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2552 /* Adds candidates based on the old induction variables. */
2555 add_old_ivs_candidates (struct ivopts_data
*data
)
2561 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2563 iv
= ver_info (data
, i
)->iv
;
2564 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2565 add_old_iv_candidates (data
, iv
);
2569 /* Adds candidates based on the value of the induction variable IV and USE. */
2572 add_iv_value_candidates (struct ivopts_data
*data
,
2573 struct iv
*iv
, struct iv_use
*use
)
2575 unsigned HOST_WIDE_INT offset
;
2579 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2581 /* The same, but with initial value zero. Make such variable important,
2582 since it is generic enough so that possibly many uses may be based
2584 basetype
= TREE_TYPE (iv
->base
);
2585 if (POINTER_TYPE_P (basetype
))
2586 basetype
= sizetype
;
2587 add_candidate (data
, build_int_cst (basetype
, 0),
2588 iv
->step
, true, use
);
2590 /* Third, try removing the constant offset. Make sure to even
2591 add a candidate for &a[0] vs. (T *)&a. */
2592 base
= strip_offset (iv
->base
, &offset
);
2594 || base
!= iv
->base
)
2595 add_candidate (data
, base
, iv
->step
, false, use
);
2598 /* Adds candidates based on the uses. */
2601 add_derived_ivs_candidates (struct ivopts_data
*data
)
2605 for (i
= 0; i
< n_iv_uses (data
); i
++)
2607 struct iv_use
*use
= iv_use (data
, i
);
2614 case USE_NONLINEAR_EXPR
:
2617 /* Just add the ivs based on the value of the iv used here. */
2618 add_iv_value_candidates (data
, use
->iv
, use
);
2627 /* Record important candidates and add them to related_cands bitmaps
2631 record_important_candidates (struct ivopts_data
*data
)
2636 for (i
= 0; i
< n_iv_cands (data
); i
++)
2638 struct iv_cand
*cand
= iv_cand (data
, i
);
2640 if (cand
->important
)
2641 bitmap_set_bit (data
->important_candidates
, i
);
2644 data
->consider_all_candidates
= (n_iv_cands (data
)
2645 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2647 if (data
->consider_all_candidates
)
2649 /* We will not need "related_cands" bitmaps in this case,
2650 so release them to decrease peak memory consumption. */
2651 for (i
= 0; i
< n_iv_uses (data
); i
++)
2653 use
= iv_use (data
, i
);
2654 BITMAP_FREE (use
->related_cands
);
2659 /* Add important candidates to the related_cands bitmaps. */
2660 for (i
= 0; i
< n_iv_uses (data
); i
++)
2661 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2662 data
->important_candidates
);
2666 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2667 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2668 we allocate a simple list to every use. */
2671 alloc_use_cost_map (struct ivopts_data
*data
)
2673 unsigned i
, size
, s
;
2675 for (i
= 0; i
< n_iv_uses (data
); i
++)
2677 struct iv_use
*use
= iv_use (data
, i
);
2679 if (data
->consider_all_candidates
)
2680 size
= n_iv_cands (data
);
2683 s
= bitmap_count_bits (use
->related_cands
);
2685 /* Round up to the power of two, so that moduling by it is fast. */
2686 size
= s
? (1 << ceil_log2 (s
)) : 1;
2689 use
->n_map_members
= size
;
2690 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2694 /* Returns description of computation cost of expression whose runtime
2695 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2698 new_cost (unsigned runtime
, unsigned complexity
)
2702 cost
.cost
= runtime
;
2703 cost
.complexity
= complexity
;
2708 /* Adds costs COST1 and COST2. */
2711 add_costs (comp_cost cost1
, comp_cost cost2
)
2713 cost1
.cost
+= cost2
.cost
;
2714 cost1
.complexity
+= cost2
.complexity
;
2718 /* Subtracts costs COST1 and COST2. */
2721 sub_costs (comp_cost cost1
, comp_cost cost2
)
2723 cost1
.cost
-= cost2
.cost
;
2724 cost1
.complexity
-= cost2
.complexity
;
2729 /* Returns a negative number if COST1 < COST2, a positive number if
2730 COST1 > COST2, and 0 if COST1 = COST2. */
2733 compare_costs (comp_cost cost1
, comp_cost cost2
)
2735 if (cost1
.cost
== cost2
.cost
)
2736 return cost1
.complexity
- cost2
.complexity
;
2738 return cost1
.cost
- cost2
.cost
;
2741 /* Returns true if COST is infinite. */
2744 infinite_cost_p (comp_cost cost
)
2746 return cost
.cost
== INFTY
;
2749 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2750 on invariants DEPENDS_ON and that the value used in expressing it
2751 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2754 set_use_iv_cost (struct ivopts_data
*data
,
2755 struct iv_use
*use
, struct iv_cand
*cand
,
2756 comp_cost cost
, bitmap depends_on
, tree value
,
2757 enum tree_code comp
, int inv_expr_id
)
2761 if (infinite_cost_p (cost
))
2763 BITMAP_FREE (depends_on
);
2767 if (data
->consider_all_candidates
)
2769 use
->cost_map
[cand
->id
].cand
= cand
;
2770 use
->cost_map
[cand
->id
].cost
= cost
;
2771 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2772 use
->cost_map
[cand
->id
].value
= value
;
2773 use
->cost_map
[cand
->id
].comp
= comp
;
2774 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2778 /* n_map_members is a power of two, so this computes modulo. */
2779 s
= cand
->id
& (use
->n_map_members
- 1);
2780 for (i
= s
; i
< use
->n_map_members
; i
++)
2781 if (!use
->cost_map
[i
].cand
)
2783 for (i
= 0; i
< s
; i
++)
2784 if (!use
->cost_map
[i
].cand
)
2790 use
->cost_map
[i
].cand
= cand
;
2791 use
->cost_map
[i
].cost
= cost
;
2792 use
->cost_map
[i
].depends_on
= depends_on
;
2793 use
->cost_map
[i
].value
= value
;
2794 use
->cost_map
[i
].comp
= comp
;
2795 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2798 /* Gets cost of (USE, CANDIDATE) pair. */
2800 static struct cost_pair
*
2801 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2802 struct iv_cand
*cand
)
2805 struct cost_pair
*ret
;
2810 if (data
->consider_all_candidates
)
2812 ret
= use
->cost_map
+ cand
->id
;
2819 /* n_map_members is a power of two, so this computes modulo. */
2820 s
= cand
->id
& (use
->n_map_members
- 1);
2821 for (i
= s
; i
< use
->n_map_members
; i
++)
2822 if (use
->cost_map
[i
].cand
== cand
)
2823 return use
->cost_map
+ i
;
2824 else if (use
->cost_map
[i
].cand
== NULL
)
2826 for (i
= 0; i
< s
; i
++)
2827 if (use
->cost_map
[i
].cand
== cand
)
2828 return use
->cost_map
+ i
;
2829 else if (use
->cost_map
[i
].cand
== NULL
)
2835 /* Returns estimate on cost of computing SEQ. */
2838 seq_cost (rtx seq
, bool speed
)
2843 for (; seq
; seq
= NEXT_INSN (seq
))
2845 set
= single_set (seq
);
2847 cost
+= set_src_cost (SET_SRC (set
), speed
);
2855 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2857 produce_memory_decl_rtl (tree obj
, int *regno
)
2859 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2860 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2864 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2866 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2867 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2868 SET_SYMBOL_REF_DECL (x
, obj
);
2869 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2870 set_mem_addr_space (x
, as
);
2871 targetm
.encode_section_info (obj
, x
, true);
2875 x
= gen_raw_REG (address_mode
, (*regno
)++);
2876 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2877 set_mem_addr_space (x
, as
);
2883 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2884 walk_tree. DATA contains the actual fake register number. */
2887 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2889 tree obj
= NULL_TREE
;
2891 int *regno
= (int *) data
;
2893 switch (TREE_CODE (*expr_p
))
2896 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2897 handled_component_p (*expr_p
);
2898 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2901 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2902 x
= produce_memory_decl_rtl (obj
, regno
);
2907 obj
= SSA_NAME_VAR (*expr_p
);
2908 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2911 if (!DECL_RTL_SET_P (obj
))
2912 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2921 if (DECL_RTL_SET_P (obj
))
2924 if (DECL_MODE (obj
) == BLKmode
)
2925 x
= produce_memory_decl_rtl (obj
, regno
);
2927 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2937 decl_rtl_to_reset
.safe_push (obj
);
2938 SET_DECL_RTL (obj
, x
);
2944 /* Determines cost of the computation of EXPR. */
2947 computation_cost (tree expr
, bool speed
)
2950 tree type
= TREE_TYPE (expr
);
2952 /* Avoid using hard regs in ways which may be unsupported. */
2953 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2954 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2955 enum node_frequency real_frequency
= node
->frequency
;
2957 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2958 crtl
->maybe_hot_insn_p
= speed
;
2959 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2961 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2964 default_rtl_profile ();
2965 node
->frequency
= real_frequency
;
2967 cost
= seq_cost (seq
, speed
);
2969 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2970 TYPE_ADDR_SPACE (type
), speed
);
2971 else if (!REG_P (rslt
))
2972 cost
+= set_src_cost (rslt
, speed
);
2977 /* Returns variable containing the value of candidate CAND at statement AT. */
2980 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2982 if (stmt_after_increment (loop
, cand
, stmt
))
2983 return cand
->var_after
;
2985 return cand
->var_before
;
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989 same precision that is at least as wide as the precision of TYPE, stores
2990 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2994 determine_common_wider_type (tree
*a
, tree
*b
)
2996 tree wider_type
= NULL
;
2998 tree atype
= TREE_TYPE (*a
);
3000 if (CONVERT_EXPR_P (*a
))
3002 suba
= TREE_OPERAND (*a
, 0);
3003 wider_type
= TREE_TYPE (suba
);
3004 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3010 if (CONVERT_EXPR_P (*b
))
3012 subb
= TREE_OPERAND (*b
, 0);
3013 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3024 /* Determines the expression by that USE is expressed from induction variable
3025 CAND at statement AT in LOOP. The expression is stored in a decomposed
3026 form into AFF. Returns false if USE cannot be expressed using CAND. */
3029 get_computation_aff (struct loop
*loop
,
3030 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3031 struct aff_tree
*aff
)
3033 tree ubase
= use
->iv
->base
;
3034 tree ustep
= use
->iv
->step
;
3035 tree cbase
= cand
->iv
->base
;
3036 tree cstep
= cand
->iv
->step
, cstep_common
;
3037 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3038 tree common_type
, var
;
3040 aff_tree cbase_aff
, var_aff
;
3043 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3045 /* We do not have a precision to express the values of use. */
3049 var
= var_at_stmt (loop
, cand
, at
);
3050 uutype
= unsigned_type_for (utype
);
3052 /* If the conversion is not noop, perform it. */
3053 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3055 cstep
= fold_convert (uutype
, cstep
);
3056 cbase
= fold_convert (uutype
, cbase
);
3057 var
= fold_convert (uutype
, var
);
3060 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3063 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3064 type, we achieve better folding by computing their difference in this
3065 wider type, and cast the result to UUTYPE. We do not need to worry about
3066 overflows, as all the arithmetics will in the end be performed in UUTYPE
3068 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3070 /* use = ubase - ratio * cbase + ratio * var. */
3071 tree_to_aff_combination (ubase
, common_type
, aff
);
3072 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3073 tree_to_aff_combination (var
, uutype
, &var_aff
);
3075 /* We need to shift the value if we are after the increment. */
3076 if (stmt_after_increment (loop
, cand
, at
))
3080 if (common_type
!= uutype
)
3081 cstep_common
= fold_convert (common_type
, cstep
);
3083 cstep_common
= cstep
;
3085 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3086 aff_combination_add (&cbase_aff
, &cstep_aff
);
3089 aff_combination_scale (&cbase_aff
, -rat
);
3090 aff_combination_add (aff
, &cbase_aff
);
3091 if (common_type
!= uutype
)
3092 aff_combination_convert (aff
, uutype
);
3094 aff_combination_scale (&var_aff
, rat
);
3095 aff_combination_add (aff
, &var_aff
);
3100 /* Return the type of USE. */
3103 get_use_type (struct iv_use
*use
)
3105 tree base_type
= TREE_TYPE (use
->iv
->base
);
3108 if (use
->type
== USE_ADDRESS
)
3110 /* The base_type may be a void pointer. Create a pointer type based on
3111 the mem_ref instead. */
3112 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3113 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3114 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3122 /* Determines the expression by that USE is expressed from induction variable
3123 CAND at statement AT in LOOP. The computation is unshared. */
3126 get_computation_at (struct loop
*loop
,
3127 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3130 tree type
= get_use_type (use
);
3132 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3134 unshare_aff_combination (&aff
);
3135 return fold_convert (type
, aff_combination_to_tree (&aff
));
3138 /* Determines the expression by that USE is expressed from induction variable
3139 CAND in LOOP. The computation is unshared. */
3142 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3144 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3147 /* Adjust the cost COST for being in loop setup rather than loop body.
3148 If we're optimizing for space, the loop setup overhead is constant;
3149 if we're optimizing for speed, amortize it over the per-iteration cost. */
3151 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3155 else if (optimize_loop_for_speed_p (data
->current_loop
))
3156 return cost
/ avg_loop_niter (data
->current_loop
);
3161 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3162 validity for a memory reference accessing memory of mode MODE in
3163 address space AS. */
3167 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3170 #define MAX_RATIO 128
3171 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3172 static vec
<sbitmap
> valid_mult_list
;
3175 if (data_index
>= valid_mult_list
.length ())
3176 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3178 valid_mult
= valid_mult_list
[data_index
];
3181 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3182 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3183 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3187 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3188 bitmap_clear (valid_mult
);
3189 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3190 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3191 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3193 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3194 if (memory_address_addr_space_p (mode
, addr
, as
)
3195 || memory_address_addr_space_p (mode
, scaled
, as
))
3196 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3201 fprintf (dump_file
, " allowed multipliers:");
3202 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3203 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3204 fprintf (dump_file
, " %d", (int) i
);
3205 fprintf (dump_file
, "\n");
3206 fprintf (dump_file
, "\n");
3209 valid_mult_list
[data_index
] = valid_mult
;
3212 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3215 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3218 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3219 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3220 variable is omitted. Compute the cost for a memory reference that accesses
3221 a memory location of mode MEM_MODE in address space AS.
3223 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3224 size of MEM_MODE / RATIO) is available. To make this determination, we
3225 look at the size of the increment to be made, which is given in CSTEP.
3226 CSTEP may be zero if the step is unknown.
3227 STMT_AFTER_INC is true iff the statement we're looking at is after the
3228 increment of the original biv.
3230 TODO -- there must be some better way. This all is quite crude. */
3234 AINC_PRE_INC
, /* Pre increment. */
3235 AINC_PRE_DEC
, /* Pre decrement. */
3236 AINC_POST_INC
, /* Post increment. */
3237 AINC_POST_DEC
, /* Post decrement. */
3238 AINC_NONE
/* Also the number of auto increment types. */
3241 typedef struct address_cost_data_s
3243 HOST_WIDE_INT min_offset
, max_offset
;
3244 unsigned costs
[2][2][2][2];
3245 unsigned ainc_costs
[AINC_NONE
];
3246 } *address_cost_data
;
3250 get_address_cost (bool symbol_present
, bool var_present
,
3251 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3252 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3253 addr_space_t as
, bool speed
,
3254 bool stmt_after_inc
, bool *may_autoinc
)
3256 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3257 static vec
<address_cost_data
> address_cost_data_list
;
3258 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3259 address_cost_data data
;
3260 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3261 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3262 unsigned cost
, acost
, complexity
;
3263 enum ainc_type autoinc_type
;
3264 bool offset_p
, ratio_p
, autoinc
;
3265 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3266 unsigned HOST_WIDE_INT mask
;
3269 if (data_index
>= address_cost_data_list
.length ())
3270 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3272 data
= address_cost_data_list
[data_index
];
3276 HOST_WIDE_INT rat
, off
= 0;
3277 int old_cse_not_expected
, width
;
3278 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3279 rtx seq
, addr
, base
;
3282 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3284 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3286 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3287 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3288 width
= HOST_BITS_PER_WIDE_INT
- 1;
3289 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3291 for (i
= width
; i
>= 0; i
--)
3293 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3294 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3295 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3298 data
->min_offset
= (i
== -1? 0 : off
);
3300 for (i
= width
; i
>= 0; i
--)
3302 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3303 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3304 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3309 data
->max_offset
= off
;
3311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3313 fprintf (dump_file
, "get_address_cost:\n");
3314 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3315 GET_MODE_NAME (mem_mode
),
3317 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3318 GET_MODE_NAME (mem_mode
),
3323 for (i
= 2; i
<= MAX_RATIO
; i
++)
3324 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3330 /* Compute the cost of various addressing modes. */
3332 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3333 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3335 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3336 || USE_STORE_PRE_DECREMENT (mem_mode
))
3338 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3339 has_predec
[mem_mode
]
3340 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3342 if (has_predec
[mem_mode
])
3343 data
->ainc_costs
[AINC_PRE_DEC
]
3344 = address_cost (addr
, mem_mode
, as
, speed
);
3346 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3347 || USE_STORE_POST_DECREMENT (mem_mode
))
3349 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3350 has_postdec
[mem_mode
]
3351 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3353 if (has_postdec
[mem_mode
])
3354 data
->ainc_costs
[AINC_POST_DEC
]
3355 = address_cost (addr
, mem_mode
, as
, speed
);
3357 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3358 || USE_STORE_PRE_DECREMENT (mem_mode
))
3360 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3361 has_preinc
[mem_mode
]
3362 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3364 if (has_preinc
[mem_mode
])
3365 data
->ainc_costs
[AINC_PRE_INC
]
3366 = address_cost (addr
, mem_mode
, as
, speed
);
3368 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3369 || USE_STORE_POST_INCREMENT (mem_mode
))
3371 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3372 has_postinc
[mem_mode
]
3373 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3375 if (has_postinc
[mem_mode
])
3376 data
->ainc_costs
[AINC_POST_INC
]
3377 = address_cost (addr
, mem_mode
, as
, speed
);
3379 for (i
= 0; i
< 16; i
++)
3382 var_p
= (i
>> 1) & 1;
3383 off_p
= (i
>> 2) & 1;
3384 rat_p
= (i
>> 3) & 1;
3388 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3389 gen_int_mode (rat
, address_mode
));
3392 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3396 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3397 /* ??? We can run into trouble with some backends by presenting
3398 it with symbols which haven't been properly passed through
3399 targetm.encode_section_info. By setting the local bit, we
3400 enhance the probability of things working. */
3401 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3404 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3406 (PLUS
, address_mode
, base
,
3407 gen_int_mode (off
, address_mode
)));
3410 base
= gen_int_mode (off
, address_mode
);
3415 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3418 /* To avoid splitting addressing modes, pretend that no cse will
3420 old_cse_not_expected
= cse_not_expected
;
3421 cse_not_expected
= true;
3422 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3423 cse_not_expected
= old_cse_not_expected
;
3427 acost
= seq_cost (seq
, speed
);
3428 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3432 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3435 /* On some targets, it is quite expensive to load symbol to a register,
3436 which makes addresses that contain symbols look much more expensive.
3437 However, the symbol will have to be loaded in any case before the
3438 loop (and quite likely we have it in register already), so it does not
3439 make much sense to penalize them too heavily. So make some final
3440 tweaks for the SYMBOL_PRESENT modes:
3442 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3443 var is cheaper, use this mode with small penalty.
3444 If VAR_PRESENT is true, try whether the mode with
3445 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3446 if this is the case, use it. */
3447 add_c
= add_cost (speed
, address_mode
);
3448 for (i
= 0; i
< 8; i
++)
3451 off_p
= (i
>> 1) & 1;
3452 rat_p
= (i
>> 2) & 1;
3454 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3458 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3459 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3462 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3464 fprintf (dump_file
, "Address costs:\n");
3466 for (i
= 0; i
< 16; i
++)
3469 var_p
= (i
>> 1) & 1;
3470 off_p
= (i
>> 2) & 1;
3471 rat_p
= (i
>> 3) & 1;
3473 fprintf (dump_file
, " ");
3475 fprintf (dump_file
, "sym + ");
3477 fprintf (dump_file
, "var + ");
3479 fprintf (dump_file
, "cst + ");
3481 fprintf (dump_file
, "rat * ");
3483 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3484 fprintf (dump_file
, "index costs %d\n", acost
);
3486 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3487 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3488 fprintf (dump_file
, " May include autoinc/dec\n");
3489 fprintf (dump_file
, "\n");
3492 address_cost_data_list
[data_index
] = data
;
3495 bits
= GET_MODE_BITSIZE (address_mode
);
3496 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3498 if ((offset
>> (bits
- 1) & 1))
3503 autoinc_type
= AINC_NONE
;
3504 msize
= GET_MODE_SIZE (mem_mode
);
3505 autoinc_offset
= offset
;
3507 autoinc_offset
+= ratio
* cstep
;
3508 if (symbol_present
|| var_present
|| ratio
!= 1)
3512 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3514 autoinc_type
= AINC_POST_INC
;
3515 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3517 autoinc_type
= AINC_POST_DEC
;
3518 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3520 autoinc_type
= AINC_PRE_INC
;
3521 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3523 autoinc_type
= AINC_PRE_DEC
;
3525 if (autoinc_type
!= AINC_NONE
)
3530 offset_p
= (s_offset
!= 0
3531 && data
->min_offset
<= s_offset
3532 && s_offset
<= data
->max_offset
);
3533 ratio_p
= (ratio
!= 1
3534 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3536 if (ratio
!= 1 && !ratio_p
)
3537 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3539 if (s_offset
&& !offset_p
&& !symbol_present
)
3540 cost
+= add_cost (speed
, address_mode
);
3543 *may_autoinc
= autoinc
;
3545 acost
= data
->ainc_costs
[autoinc_type
];
3547 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3548 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3549 return new_cost (cost
+ acost
, complexity
);
3552 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3553 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3554 calculating the operands of EXPR. Returns true if successful, and returns
3555 the cost in COST. */
3558 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3559 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3562 tree op1
= TREE_OPERAND (expr
, 1);
3563 tree cst
= TREE_OPERAND (mult
, 1);
3564 tree multop
= TREE_OPERAND (mult
, 0);
3565 int m
= exact_log2 (int_cst_value (cst
));
3566 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3568 bool equal_p
= false;
3570 if (!(m
>= 0 && m
< maxm
))
3573 if (operand_equal_p (op1
, mult
, 0))
3576 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3577 ? shiftadd_cost (speed
, mode
, m
)
3579 ? shiftsub1_cost (speed
, mode
, m
)
3580 : shiftsub0_cost (speed
, mode
, m
)));
3581 res
= new_cost (sa_cost
, 0);
3582 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3584 STRIP_NOPS (multop
);
3585 if (!is_gimple_val (multop
))
3586 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3592 /* Estimates cost of forcing expression EXPR into a variable. */
3595 force_expr_to_var_cost (tree expr
, bool speed
)
3597 static bool costs_initialized
= false;
3598 static unsigned integer_cost
[2];
3599 static unsigned symbol_cost
[2];
3600 static unsigned address_cost
[2];
3602 comp_cost cost0
, cost1
, cost
;
3603 enum machine_mode mode
;
3605 if (!costs_initialized
)
3607 tree type
= build_pointer_type (integer_type_node
);
3612 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3613 TREE_STATIC (var
) = 1;
3614 x
= produce_memory_decl_rtl (var
, NULL
);
3615 SET_DECL_RTL (var
, x
);
3617 addr
= build1 (ADDR_EXPR
, type
, var
);
3620 for (i
= 0; i
< 2; i
++)
3622 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3625 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3628 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3631 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3632 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3633 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3634 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3635 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3636 fprintf (dump_file
, "\n");
3640 costs_initialized
= true;
3645 if (SSA_VAR_P (expr
))
3648 if (is_gimple_min_invariant (expr
))
3650 if (TREE_CODE (expr
) == INTEGER_CST
)
3651 return new_cost (integer_cost
[speed
], 0);
3653 if (TREE_CODE (expr
) == ADDR_EXPR
)
3655 tree obj
= TREE_OPERAND (expr
, 0);
3657 if (TREE_CODE (obj
) == VAR_DECL
3658 || TREE_CODE (obj
) == PARM_DECL
3659 || TREE_CODE (obj
) == RESULT_DECL
)
3660 return new_cost (symbol_cost
[speed
], 0);
3663 return new_cost (address_cost
[speed
], 0);
3666 switch (TREE_CODE (expr
))
3668 case POINTER_PLUS_EXPR
:
3672 op0
= TREE_OPERAND (expr
, 0);
3673 op1
= TREE_OPERAND (expr
, 1);
3680 op0
= TREE_OPERAND (expr
, 0);
3686 /* Just an arbitrary value, FIXME. */
3687 return new_cost (target_spill_cost
[speed
], 0);
3690 if (op0
== NULL_TREE
3691 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3694 cost0
= force_expr_to_var_cost (op0
, speed
);
3696 if (op1
== NULL_TREE
3697 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3700 cost1
= force_expr_to_var_cost (op1
, speed
);
3702 mode
= TYPE_MODE (TREE_TYPE (expr
));
3703 switch (TREE_CODE (expr
))
3705 case POINTER_PLUS_EXPR
:
3709 cost
= new_cost (add_cost (speed
, mode
), 0);
3710 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3712 tree mult
= NULL_TREE
;
3714 if (TREE_CODE (op1
) == MULT_EXPR
)
3716 else if (TREE_CODE (op0
) == MULT_EXPR
)
3719 if (mult
!= NULL_TREE
3720 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3721 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3729 tree inner_mode
, outer_mode
;
3730 outer_mode
= TREE_TYPE (expr
);
3731 inner_mode
= TREE_TYPE (op0
);
3732 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3733 TYPE_MODE (inner_mode
), speed
), 0);
3738 if (cst_and_fits_in_hwi (op0
))
3739 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3741 else if (cst_and_fits_in_hwi (op1
))
3742 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3745 return new_cost (target_spill_cost
[speed
], 0);
3752 cost
= add_costs (cost
, cost0
);
3753 cost
= add_costs (cost
, cost1
);
3755 /* Bound the cost by target_spill_cost. The parts of complicated
3756 computations often are either loop invariant or at least can
3757 be shared between several iv uses, so letting this grow without
3758 limits would not give reasonable results. */
3759 if (cost
.cost
> (int) target_spill_cost
[speed
])
3760 cost
.cost
= target_spill_cost
[speed
];
3765 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3766 invariants the computation depends on. */
3769 force_var_cost (struct ivopts_data
*data
,
3770 tree expr
, bitmap
*depends_on
)
3774 fd_ivopts_data
= data
;
3775 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3778 return force_expr_to_var_cost (expr
, data
->speed
);
3781 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3782 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3783 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3787 split_address_cost (struct ivopts_data
*data
,
3788 tree addr
, bool *symbol_present
, bool *var_present
,
3789 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3792 HOST_WIDE_INT bitsize
;
3793 HOST_WIDE_INT bitpos
;
3795 enum machine_mode mode
;
3796 int unsignedp
, volatilep
;
3798 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3799 &unsignedp
, &volatilep
, false);
3802 || bitpos
% BITS_PER_UNIT
!= 0
3803 || TREE_CODE (core
) != VAR_DECL
)
3805 *symbol_present
= false;
3806 *var_present
= true;
3807 fd_ivopts_data
= data
;
3808 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3809 return new_cost (target_spill_cost
[data
->speed
], 0);
3812 *offset
+= bitpos
/ BITS_PER_UNIT
;
3813 if (TREE_STATIC (core
)
3814 || DECL_EXTERNAL (core
))
3816 *symbol_present
= true;
3817 *var_present
= false;
3821 *symbol_present
= false;
3822 *var_present
= true;
3826 /* Estimates cost of expressing difference of addresses E1 - E2 as
3827 var + symbol + offset. The value of offset is added to OFFSET,
3828 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3829 part is missing. DEPENDS_ON is a set of the invariants the computation
3833 ptr_difference_cost (struct ivopts_data
*data
,
3834 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3835 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3837 HOST_WIDE_INT diff
= 0;
3838 aff_tree aff_e1
, aff_e2
;
3841 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3843 if (ptr_difference_const (e1
, e2
, &diff
))
3846 *symbol_present
= false;
3847 *var_present
= false;
3851 if (integer_zerop (e2
))
3852 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3853 symbol_present
, var_present
, offset
, depends_on
);
3855 *symbol_present
= false;
3856 *var_present
= true;
3858 type
= signed_type_for (TREE_TYPE (e1
));
3859 tree_to_aff_combination (e1
, type
, &aff_e1
);
3860 tree_to_aff_combination (e2
, type
, &aff_e2
);
3861 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3862 aff_combination_add (&aff_e1
, &aff_e2
);
3864 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3867 /* Estimates cost of expressing difference E1 - E2 as
3868 var + symbol + offset. The value of offset is added to OFFSET,
3869 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3870 part is missing. DEPENDS_ON is a set of the invariants the computation
3874 difference_cost (struct ivopts_data
*data
,
3875 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3876 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3878 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3879 unsigned HOST_WIDE_INT off1
, off2
;
3880 aff_tree aff_e1
, aff_e2
;
3883 e1
= strip_offset (e1
, &off1
);
3884 e2
= strip_offset (e2
, &off2
);
3885 *offset
+= off1
- off2
;
3890 if (TREE_CODE (e1
) == ADDR_EXPR
)
3891 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3892 offset
, depends_on
);
3893 *symbol_present
= false;
3895 if (operand_equal_p (e1
, e2
, 0))
3897 *var_present
= false;
3901 *var_present
= true;
3903 if (integer_zerop (e2
))
3904 return force_var_cost (data
, e1
, depends_on
);
3906 if (integer_zerop (e1
))
3908 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3909 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3913 type
= signed_type_for (TREE_TYPE (e1
));
3914 tree_to_aff_combination (e1
, type
, &aff_e1
);
3915 tree_to_aff_combination (e2
, type
, &aff_e2
);
3916 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3917 aff_combination_add (&aff_e1
, &aff_e2
);
3919 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3922 /* Returns true if AFF1 and AFF2 are identical. */
3925 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3929 if (aff1
->n
!= aff2
->n
)
3932 for (i
= 0; i
< aff1
->n
; i
++)
3934 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3937 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3943 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3946 get_expr_id (struct ivopts_data
*data
, tree expr
)
3948 struct iv_inv_expr_ent ent
;
3949 struct iv_inv_expr_ent
**slot
;
3952 ent
.hash
= iterative_hash_expr (expr
, 0);
3953 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3957 *slot
= XNEW (struct iv_inv_expr_ent
);
3958 (*slot
)->expr
= expr
;
3959 (*slot
)->hash
= ent
.hash
;
3960 (*slot
)->id
= data
->inv_expr_id
++;
3964 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3965 requires a new compiler generated temporary. Returns -1 otherwise.
3966 ADDRESS_P is a flag indicating if the expression is for address
3970 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3971 tree cbase
, HOST_WIDE_INT ratio
,
3974 aff_tree ubase_aff
, cbase_aff
;
3982 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3983 && (TREE_CODE (cbase
) == INTEGER_CST
))
3986 /* Strips the constant part. */
3987 if (TREE_CODE (ubase
) == PLUS_EXPR
3988 || TREE_CODE (ubase
) == MINUS_EXPR
3989 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3991 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3992 ubase
= TREE_OPERAND (ubase
, 0);
3995 /* Strips the constant part. */
3996 if (TREE_CODE (cbase
) == PLUS_EXPR
3997 || TREE_CODE (cbase
) == MINUS_EXPR
3998 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4000 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4001 cbase
= TREE_OPERAND (cbase
, 0);
4006 if (((TREE_CODE (ubase
) == SSA_NAME
)
4007 || (TREE_CODE (ubase
) == ADDR_EXPR
4008 && is_gimple_min_invariant (ubase
)))
4009 && (TREE_CODE (cbase
) == INTEGER_CST
))
4012 if (((TREE_CODE (cbase
) == SSA_NAME
)
4013 || (TREE_CODE (cbase
) == ADDR_EXPR
4014 && is_gimple_min_invariant (cbase
)))
4015 && (TREE_CODE (ubase
) == INTEGER_CST
))
4021 if (operand_equal_p (ubase
, cbase
, 0))
4024 if (TREE_CODE (ubase
) == ADDR_EXPR
4025 && TREE_CODE (cbase
) == ADDR_EXPR
)
4029 usym
= TREE_OPERAND (ubase
, 0);
4030 csym
= TREE_OPERAND (cbase
, 0);
4031 if (TREE_CODE (usym
) == ARRAY_REF
)
4033 tree ind
= TREE_OPERAND (usym
, 1);
4034 if (TREE_CODE (ind
) == INTEGER_CST
4035 && tree_fits_shwi_p (ind
)
4036 && tree_to_shwi (ind
) == 0)
4037 usym
= TREE_OPERAND (usym
, 0);
4039 if (TREE_CODE (csym
) == ARRAY_REF
)
4041 tree ind
= TREE_OPERAND (csym
, 1);
4042 if (TREE_CODE (ind
) == INTEGER_CST
4043 && tree_fits_shwi_p (ind
)
4044 && tree_to_shwi (ind
) == 0)
4045 csym
= TREE_OPERAND (csym
, 0);
4047 if (operand_equal_p (usym
, csym
, 0))
4050 /* Now do more complex comparison */
4051 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4052 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4053 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4057 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4058 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4060 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
4061 aff_combination_add (&ubase_aff
, &cbase_aff
);
4062 expr
= aff_combination_to_tree (&ubase_aff
);
4063 return get_expr_id (data
, expr
);
4068 /* Determines the cost of the computation by that USE is expressed
4069 from induction variable CAND. If ADDRESS_P is true, we just need
4070 to create an address from it, otherwise we want to get it into
4071 register. A set of invariants we depend on is stored in
4072 DEPENDS_ON. AT is the statement at that the value is computed.
4073 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4074 addressing is likely. */
4077 get_computation_cost_at (struct ivopts_data
*data
,
4078 struct iv_use
*use
, struct iv_cand
*cand
,
4079 bool address_p
, bitmap
*depends_on
, gimple at
,
4083 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4085 tree utype
= TREE_TYPE (ubase
), ctype
;
4086 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4087 HOST_WIDE_INT ratio
, aratio
;
4088 bool var_present
, symbol_present
, stmt_is_after_inc
;
4091 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4092 enum machine_mode mem_mode
= (address_p
4093 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4098 /* Only consider real candidates. */
4100 return infinite_cost
;
4102 cbase
= cand
->iv
->base
;
4103 cstep
= cand
->iv
->step
;
4104 ctype
= TREE_TYPE (cbase
);
4106 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4108 /* We do not have a precision to express the values of use. */
4109 return infinite_cost
;
4113 || (use
->iv
->base_object
4114 && cand
->iv
->base_object
4115 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4116 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4118 /* Do not try to express address of an object with computation based
4119 on address of a different object. This may cause problems in rtl
4120 level alias analysis (that does not expect this to be happening,
4121 as this is illegal in C), and would be unlikely to be useful
4123 if (use
->iv
->base_object
4124 && cand
->iv
->base_object
4125 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4126 return infinite_cost
;
4129 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4131 /* TODO -- add direct handling of this case. */
4135 /* CSTEPI is removed from the offset in case statement is after the
4136 increment. If the step is not constant, we use zero instead.
4137 This is a bit imprecise (there is the extra addition), but
4138 redundancy elimination is likely to transform the code so that
4139 it uses value of the variable before increment anyway,
4140 so it is not that much unrealistic. */
4141 if (cst_and_fits_in_hwi (cstep
))
4142 cstepi
= int_cst_value (cstep
);
4146 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4147 return infinite_cost
;
4149 if (rat
.fits_shwi ())
4150 ratio
= rat
.to_shwi ();
4152 return infinite_cost
;
4155 ctype
= TREE_TYPE (cbase
);
4157 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4159 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4160 or ratio == 1, it is better to handle this like
4162 ubase - ratio * cbase + ratio * var
4164 (also holds in the case ratio == -1, TODO. */
4166 if (cst_and_fits_in_hwi (cbase
))
4168 offset
= - ratio
* int_cst_value (cbase
);
4169 cost
= difference_cost (data
,
4170 ubase
, build_int_cst (utype
, 0),
4171 &symbol_present
, &var_present
, &offset
,
4173 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4175 else if (ratio
== 1)
4177 tree real_cbase
= cbase
;
4179 /* Check to see if any adjustment is needed. */
4180 if (cstepi
== 0 && stmt_is_after_inc
)
4182 aff_tree real_cbase_aff
;
4185 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4187 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4189 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4190 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4193 cost
= difference_cost (data
,
4195 &symbol_present
, &var_present
, &offset
,
4197 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4200 && !POINTER_TYPE_P (ctype
)
4201 && multiplier_allowed_in_address_p
4203 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4206 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4207 cost
= difference_cost (data
,
4209 &symbol_present
, &var_present
, &offset
,
4211 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4215 cost
= force_var_cost (data
, cbase
, depends_on
);
4216 cost
= add_costs (cost
,
4217 difference_cost (data
,
4218 ubase
, build_int_cst (utype
, 0),
4219 &symbol_present
, &var_present
,
4220 &offset
, depends_on
));
4221 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4222 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4228 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4229 /* Clear depends on. */
4230 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4231 bitmap_clear (*depends_on
);
4234 /* If we are after the increment, the value of the candidate is higher by
4236 if (stmt_is_after_inc
)
4237 offset
-= ratio
* cstepi
;
4239 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4240 (symbol/var1/const parts may be omitted). If we are looking for an
4241 address, find the cost of addressing this. */
4243 return add_costs (cost
,
4244 get_address_cost (symbol_present
, var_present
,
4245 offset
, ratio
, cstepi
,
4247 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4248 speed
, stmt_is_after_inc
,
4251 /* Otherwise estimate the costs for computing the expression. */
4252 if (!symbol_present
&& !var_present
&& !offset
)
4255 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4259 /* Symbol + offset should be compile-time computable so consider that they
4260 are added once to the variable, if present. */
4261 if (var_present
&& (symbol_present
|| offset
))
4262 cost
.cost
+= adjust_setup_cost (data
,
4263 add_cost (speed
, TYPE_MODE (ctype
)));
4265 /* Having offset does not affect runtime cost in case it is added to
4266 symbol, but it increases complexity. */
4270 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4272 aratio
= ratio
> 0 ? ratio
: -ratio
;
4274 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4279 *can_autoinc
= false;
4282 /* Just get the expression, expand it and measure the cost. */
4283 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4286 return infinite_cost
;
4289 comp
= build_simple_mem_ref (comp
);
4291 return new_cost (computation_cost (comp
, speed
), 0);
4295 /* Determines the cost of the computation by that USE is expressed
4296 from induction variable CAND. If ADDRESS_P is true, we just need
4297 to create an address from it, otherwise we want to get it into
4298 register. A set of invariants we depend on is stored in
4299 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4300 autoinc addressing is likely. */
4303 get_computation_cost (struct ivopts_data
*data
,
4304 struct iv_use
*use
, struct iv_cand
*cand
,
4305 bool address_p
, bitmap
*depends_on
,
4306 bool *can_autoinc
, int *inv_expr_id
)
4308 return get_computation_cost_at (data
,
4309 use
, cand
, address_p
, depends_on
, use
->stmt
,
4310 can_autoinc
, inv_expr_id
);
4313 /* Determines cost of basing replacement of USE on CAND in a generic
4317 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4318 struct iv_use
*use
, struct iv_cand
*cand
)
4322 int inv_expr_id
= -1;
4324 /* The simple case first -- if we need to express value of the preserved
4325 original biv, the cost is 0. This also prevents us from counting the
4326 cost of increment twice -- once at this use and once in the cost of
4328 if (cand
->pos
== IP_ORIGINAL
4329 && cand
->incremented_at
== use
->stmt
)
4331 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4336 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4337 NULL
, &inv_expr_id
);
4339 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4342 return !infinite_cost_p (cost
);
4345 /* Determines cost of basing replacement of USE on CAND in an address. */
4348 determine_use_iv_cost_address (struct ivopts_data
*data
,
4349 struct iv_use
*use
, struct iv_cand
*cand
)
4353 int inv_expr_id
= -1;
4354 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4355 &can_autoinc
, &inv_expr_id
);
4357 if (cand
->ainc_use
== use
)
4360 cost
.cost
-= cand
->cost_step
;
4361 /* If we generated the candidate solely for exploiting autoincrement
4362 opportunities, and it turns out it can't be used, set the cost to
4363 infinity to make sure we ignore it. */
4364 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4365 cost
= infinite_cost
;
4367 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4370 return !infinite_cost_p (cost
);
4373 /* Computes value of candidate CAND at position AT in iteration NITER, and
4374 stores it to VAL. */
4377 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4380 aff_tree step
, delta
, nit
;
4381 struct iv
*iv
= cand
->iv
;
4382 tree type
= TREE_TYPE (iv
->base
);
4383 tree steptype
= type
;
4384 if (POINTER_TYPE_P (type
))
4385 steptype
= sizetype
;
4387 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4388 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4389 aff_combination_convert (&nit
, steptype
);
4390 aff_combination_mult (&nit
, &step
, &delta
);
4391 if (stmt_after_increment (loop
, cand
, at
))
4392 aff_combination_add (&delta
, &step
);
4394 tree_to_aff_combination (iv
->base
, type
, val
);
4395 aff_combination_add (val
, &delta
);
4398 /* Returns period of induction variable iv. */
4401 iv_period (struct iv
*iv
)
4403 tree step
= iv
->step
, period
, type
;
4406 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4408 type
= unsigned_type_for (TREE_TYPE (step
));
4409 /* Period of the iv is lcm (step, type_range)/step -1,
4410 i.e., N*type_range/step - 1. Since type range is power
4411 of two, N == (step >> num_of_ending_zeros_binary (step),
4412 so the final result is
4414 (type_range >> num_of_ending_zeros_binary (step)) - 1
4417 pow2div
= num_ending_zeros (step
);
4419 period
= build_low_bits_mask (type
,
4420 (TYPE_PRECISION (type
)
4421 - tree_to_uhwi (pow2div
)));
4426 /* Returns the comparison operator used when eliminating the iv USE. */
4428 static enum tree_code
4429 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4431 struct loop
*loop
= data
->current_loop
;
4435 ex_bb
= gimple_bb (use
->stmt
);
4436 exit
= EDGE_SUCC (ex_bb
, 0);
4437 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4438 exit
= EDGE_SUCC (ex_bb
, 1);
4440 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4444 strip_wrap_conserving_type_conversions (tree exp
)
4446 while (tree_ssa_useless_type_conversion (exp
)
4447 && (nowrap_type_p (TREE_TYPE (exp
))
4448 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4449 exp
= TREE_OPERAND (exp
, 0);
4453 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4454 check for an exact match. */
4457 expr_equal_p (tree e
, tree what
)
4460 enum tree_code code
;
4462 e
= strip_wrap_conserving_type_conversions (e
);
4463 what
= strip_wrap_conserving_type_conversions (what
);
4465 code
= TREE_CODE (what
);
4466 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4469 if (operand_equal_p (e
, what
, 0))
4472 if (TREE_CODE (e
) != SSA_NAME
)
4475 stmt
= SSA_NAME_DEF_STMT (e
);
4476 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4477 || gimple_assign_rhs_code (stmt
) != code
)
4480 switch (get_gimple_rhs_class (code
))
4482 case GIMPLE_BINARY_RHS
:
4483 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4487 case GIMPLE_UNARY_RHS
:
4488 case GIMPLE_SINGLE_RHS
:
4489 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4495 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4496 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4497 calculation is performed in non-wrapping type.
4499 TODO: More generally, we could test for the situation that
4500 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4501 This would require knowing the sign of OFFSET.
4503 Also, we only look for the first addition in the computation of BASE.
4504 More complex analysis would be better, but introducing it just for
4505 this optimization seems like an overkill. */
4508 difference_cannot_overflow_p (tree base
, tree offset
)
4510 enum tree_code code
;
4513 if (!nowrap_type_p (TREE_TYPE (base
)))
4516 base
= expand_simple_operations (base
);
4518 if (TREE_CODE (base
) == SSA_NAME
)
4520 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4522 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4525 code
= gimple_assign_rhs_code (stmt
);
4526 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4529 e1
= gimple_assign_rhs1 (stmt
);
4530 e2
= gimple_assign_rhs2 (stmt
);
4534 code
= TREE_CODE (base
);
4535 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4537 e1
= TREE_OPERAND (base
, 0);
4538 e2
= TREE_OPERAND (base
, 1);
4541 /* TODO: deeper inspection may be necessary to prove the equality. */
4545 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4546 case POINTER_PLUS_EXPR
:
4547 return expr_equal_p (e2
, offset
);
4554 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4555 comparison with CAND. NITER describes the number of iterations of
4556 the loops. If successful, the comparison in COMP_P is altered accordingly.
4558 We aim to handle the following situation:
4574 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4575 We aim to optimize this to
4583 while (p < p_0 - a + b);
4585 This preserves the correctness, since the pointer arithmetics does not
4586 overflow. More precisely:
4588 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4589 overflow in computing it or the values of p.
4590 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4591 overflow. To prove this, we use the fact that p_0 = base + a. */
4594 iv_elimination_compare_lt (struct ivopts_data
*data
,
4595 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4596 struct tree_niter_desc
*niter
)
4598 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4599 struct aff_tree nit
, tmpa
, tmpb
;
4600 enum tree_code comp
;
4603 /* We need to know that the candidate induction variable does not overflow.
4604 While more complex analysis may be used to prove this, for now just
4605 check that the variable appears in the original program and that it
4606 is computed in a type that guarantees no overflows. */
4607 cand_type
= TREE_TYPE (cand
->iv
->base
);
4608 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4611 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4612 the calculation of the BOUND could overflow, making the comparison
4614 if (!data
->loop_single_exit_p
)
4617 /* We need to be able to decide whether candidate is increasing or decreasing
4618 in order to choose the right comparison operator. */
4619 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4621 step
= int_cst_value (cand
->iv
->step
);
4623 /* Check that the number of iterations matches the expected pattern:
4624 a + 1 > b ? 0 : b - a - 1. */
4625 mbz
= niter
->may_be_zero
;
4626 if (TREE_CODE (mbz
) == GT_EXPR
)
4628 /* Handle a + 1 > b. */
4629 tree op0
= TREE_OPERAND (mbz
, 0);
4630 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4632 a
= TREE_OPERAND (op0
, 0);
4633 b
= TREE_OPERAND (mbz
, 1);
4638 else if (TREE_CODE (mbz
) == LT_EXPR
)
4640 tree op1
= TREE_OPERAND (mbz
, 1);
4642 /* Handle b < a + 1. */
4643 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4645 a
= TREE_OPERAND (op1
, 0);
4646 b
= TREE_OPERAND (mbz
, 0);
4654 /* Expected number of iterations is B - A - 1. Check that it matches
4655 the actual number, i.e., that B - A - NITER = 1. */
4656 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4657 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4658 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4659 aff_combination_scale (&nit
, double_int_minus_one
);
4660 aff_combination_scale (&tmpa
, double_int_minus_one
);
4661 aff_combination_add (&tmpb
, &tmpa
);
4662 aff_combination_add (&tmpb
, &nit
);
4663 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4666 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4668 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4670 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4671 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4674 /* Determine the new comparison operator. */
4675 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4676 if (*comp_p
== NE_EXPR
)
4678 else if (*comp_p
== EQ_EXPR
)
4679 *comp_p
= invert_tree_comparison (comp
, false);
4686 /* Check whether it is possible to express the condition in USE by comparison
4687 of candidate CAND. If so, store the value compared with to BOUND, and the
4688 comparison operator to COMP. */
4691 may_eliminate_iv (struct ivopts_data
*data
,
4692 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4693 enum tree_code
*comp
)
4698 struct loop
*loop
= data
->current_loop
;
4700 struct tree_niter_desc
*desc
= NULL
;
4702 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4705 /* For now works only for exits that dominate the loop latch.
4706 TODO: extend to other conditions inside loop body. */
4707 ex_bb
= gimple_bb (use
->stmt
);
4708 if (use
->stmt
!= last_stmt (ex_bb
)
4709 || gimple_code (use
->stmt
) != GIMPLE_COND
4710 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4713 exit
= EDGE_SUCC (ex_bb
, 0);
4714 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4715 exit
= EDGE_SUCC (ex_bb
, 1);
4716 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4719 desc
= niter_for_exit (data
, exit
);
4723 /* Determine whether we can use the variable to test the exit condition.
4724 This is the case iff the period of the induction variable is greater
4725 than the number of iterations for which the exit condition is true. */
4726 period
= iv_period (cand
->iv
);
4728 /* If the number of iterations is constant, compare against it directly. */
4729 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4731 /* See cand_value_at. */
4732 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4734 if (!tree_int_cst_lt (desc
->niter
, period
))
4739 if (tree_int_cst_lt (period
, desc
->niter
))
4744 /* If not, and if this is the only possible exit of the loop, see whether
4745 we can get a conservative estimate on the number of iterations of the
4746 entire loop and compare against that instead. */
4749 double_int period_value
, max_niter
;
4751 max_niter
= desc
->max
;
4752 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4753 max_niter
+= double_int_one
;
4754 period_value
= tree_to_double_int (period
);
4755 if (max_niter
.ugt (period_value
))
4757 /* See if we can take advantage of inferred loop bound information. */
4758 if (data
->loop_single_exit_p
)
4760 if (!max_loop_iterations (loop
, &max_niter
))
4762 /* The loop bound is already adjusted by adding 1. */
4763 if (max_niter
.ugt (period_value
))
4771 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4773 *bound
= aff_combination_to_tree (&bnd
);
4774 *comp
= iv_elimination_compare (data
, use
);
4776 /* It is unlikely that computing the number of iterations using division
4777 would be more profitable than keeping the original induction variable. */
4778 if (expression_expensive_p (*bound
))
4781 /* Sometimes, it is possible to handle the situation that the number of
4782 iterations may be zero unless additional assumtions by using <
4783 instead of != in the exit condition.
4785 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4786 base the exit condition on it. However, that is often too
4788 if (!integer_zerop (desc
->may_be_zero
))
4789 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4794 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4795 be copied, if is is used in the loop body and DATA->body_includes_call. */
4798 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4800 tree sbound
= bound
;
4801 STRIP_NOPS (sbound
);
4803 if (TREE_CODE (sbound
) == SSA_NAME
4804 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4805 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4806 && data
->body_includes_call
)
4807 return COSTS_N_INSNS (1);
4812 /* Determines cost of basing replacement of USE on CAND in a condition. */
4815 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4816 struct iv_use
*use
, struct iv_cand
*cand
)
4818 tree bound
= NULL_TREE
;
4820 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4821 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4823 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4824 tree
*control_var
, *bound_cst
;
4825 enum tree_code comp
= ERROR_MARK
;
4827 /* Only consider real candidates. */
4830 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4835 /* Try iv elimination. */
4836 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4838 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4839 if (elim_cost
.cost
== 0)
4840 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4841 else if (TREE_CODE (bound
) == INTEGER_CST
)
4843 /* If we replace a loop condition 'i < n' with 'p < base + n',
4844 depends_on_elim will have 'base' and 'n' set, which implies
4845 that both 'base' and 'n' will be live during the loop. More likely,
4846 'base + n' will be loop invariant, resulting in only one live value
4847 during the loop. So in that case we clear depends_on_elim and set
4848 elim_inv_expr_id instead. */
4849 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4851 elim_inv_expr_id
= get_expr_id (data
, bound
);
4852 bitmap_clear (depends_on_elim
);
4854 /* The bound is a loop invariant, so it will be only computed
4856 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4859 elim_cost
= infinite_cost
;
4861 /* Try expressing the original giv. If it is compared with an invariant,
4862 note that we cannot get rid of it. */
4863 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4867 /* When the condition is a comparison of the candidate IV against
4868 zero, prefer this IV.
4870 TODO: The constant that we're subtracting from the cost should
4871 be target-dependent. This information should be added to the
4872 target costs for each backend. */
4873 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4874 && integer_zerop (*bound_cst
)
4875 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4876 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4877 elim_cost
.cost
-= 1;
4879 express_cost
= get_computation_cost (data
, use
, cand
, false,
4880 &depends_on_express
, NULL
,
4881 &express_inv_expr_id
);
4882 fd_ivopts_data
= data
;
4883 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4885 /* Count the cost of the original bound as well. */
4886 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4887 if (bound_cost
.cost
== 0)
4888 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4889 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4890 bound_cost
.cost
= 0;
4891 express_cost
.cost
+= bound_cost
.cost
;
4893 /* Choose the better approach, preferring the eliminated IV. */
4894 if (compare_costs (elim_cost
, express_cost
) <= 0)
4897 depends_on
= depends_on_elim
;
4898 depends_on_elim
= NULL
;
4899 inv_expr_id
= elim_inv_expr_id
;
4903 cost
= express_cost
;
4904 depends_on
= depends_on_express
;
4905 depends_on_express
= NULL
;
4908 inv_expr_id
= express_inv_expr_id
;
4911 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4913 if (depends_on_elim
)
4914 BITMAP_FREE (depends_on_elim
);
4915 if (depends_on_express
)
4916 BITMAP_FREE (depends_on_express
);
4918 return !infinite_cost_p (cost
);
4921 /* Determines cost of basing replacement of USE on CAND. Returns false
4922 if USE cannot be based on CAND. */
4925 determine_use_iv_cost (struct ivopts_data
*data
,
4926 struct iv_use
*use
, struct iv_cand
*cand
)
4930 case USE_NONLINEAR_EXPR
:
4931 return determine_use_iv_cost_generic (data
, use
, cand
);
4934 return determine_use_iv_cost_address (data
, use
, cand
);
4937 return determine_use_iv_cost_condition (data
, use
, cand
);
4944 /* Return true if get_computation_cost indicates that autoincrement is
4945 a possibility for the pair of USE and CAND, false otherwise. */
4948 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4949 struct iv_cand
*cand
)
4955 if (use
->type
!= USE_ADDRESS
)
4958 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4959 &can_autoinc
, NULL
);
4961 BITMAP_FREE (depends_on
);
4963 return !infinite_cost_p (cost
) && can_autoinc
;
4966 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4967 use that allows autoincrement, and set their AINC_USE if possible. */
4970 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4974 for (i
= 0; i
< n_iv_cands (data
); i
++)
4976 struct iv_cand
*cand
= iv_cand (data
, i
);
4977 struct iv_use
*closest_before
= NULL
;
4978 struct iv_use
*closest_after
= NULL
;
4979 if (cand
->pos
!= IP_ORIGINAL
)
4982 for (j
= 0; j
< n_iv_uses (data
); j
++)
4984 struct iv_use
*use
= iv_use (data
, j
);
4985 unsigned uid
= gimple_uid (use
->stmt
);
4987 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4990 if (uid
< gimple_uid (cand
->incremented_at
)
4991 && (closest_before
== NULL
4992 || uid
> gimple_uid (closest_before
->stmt
)))
4993 closest_before
= use
;
4995 if (uid
> gimple_uid (cand
->incremented_at
)
4996 && (closest_after
== NULL
4997 || uid
< gimple_uid (closest_after
->stmt
)))
4998 closest_after
= use
;
5001 if (closest_before
!= NULL
5002 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5003 cand
->ainc_use
= closest_before
;
5004 else if (closest_after
!= NULL
5005 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5006 cand
->ainc_use
= closest_after
;
5010 /* Finds the candidates for the induction variables. */
5013 find_iv_candidates (struct ivopts_data
*data
)
5015 /* Add commonly used ivs. */
5016 add_standard_iv_candidates (data
);
5018 /* Add old induction variables. */
5019 add_old_ivs_candidates (data
);
5021 /* Add induction variables derived from uses. */
5022 add_derived_ivs_candidates (data
);
5024 set_autoinc_for_original_candidates (data
);
5026 /* Record the important candidates. */
5027 record_important_candidates (data
);
5030 /* Determines costs of basing the use of the iv on an iv candidate. */
5033 determine_use_iv_costs (struct ivopts_data
*data
)
5037 struct iv_cand
*cand
;
5038 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5040 alloc_use_cost_map (data
);
5042 for (i
= 0; i
< n_iv_uses (data
); i
++)
5044 use
= iv_use (data
, i
);
5046 if (data
->consider_all_candidates
)
5048 for (j
= 0; j
< n_iv_cands (data
); j
++)
5050 cand
= iv_cand (data
, j
);
5051 determine_use_iv_cost (data
, use
, cand
);
5058 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5060 cand
= iv_cand (data
, j
);
5061 if (!determine_use_iv_cost (data
, use
, cand
))
5062 bitmap_set_bit (to_clear
, j
);
5065 /* Remove the candidates for that the cost is infinite from
5066 the list of related candidates. */
5067 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5068 bitmap_clear (to_clear
);
5072 BITMAP_FREE (to_clear
);
5074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5076 fprintf (dump_file
, "Use-candidate costs:\n");
5078 for (i
= 0; i
< n_iv_uses (data
); i
++)
5080 use
= iv_use (data
, i
);
5082 fprintf (dump_file
, "Use %d:\n", i
);
5083 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5084 for (j
= 0; j
< use
->n_map_members
; j
++)
5086 if (!use
->cost_map
[j
].cand
5087 || infinite_cost_p (use
->cost_map
[j
].cost
))
5090 fprintf (dump_file
, " %d\t%d\t%d\t",
5091 use
->cost_map
[j
].cand
->id
,
5092 use
->cost_map
[j
].cost
.cost
,
5093 use
->cost_map
[j
].cost
.complexity
);
5094 if (use
->cost_map
[j
].depends_on
)
5095 bitmap_print (dump_file
,
5096 use
->cost_map
[j
].depends_on
, "","");
5097 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5098 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5099 fprintf (dump_file
, "\n");
5102 fprintf (dump_file
, "\n");
5104 fprintf (dump_file
, "\n");
5108 /* Determines cost of the candidate CAND. */
5111 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5113 comp_cost cost_base
;
5114 unsigned cost
, cost_step
;
5123 /* There are two costs associated with the candidate -- its increment
5124 and its initialization. The second is almost negligible for any loop
5125 that rolls enough, so we take it just very little into account. */
5127 base
= cand
->iv
->base
;
5128 cost_base
= force_var_cost (data
, base
, NULL
);
5129 /* It will be exceptional that the iv register happens to be initialized with
5130 the proper value at no cost. In general, there will at least be a regcopy
5132 if (cost_base
.cost
== 0)
5133 cost_base
.cost
= COSTS_N_INSNS (1);
5134 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5136 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5138 /* Prefer the original ivs unless we may gain something by replacing it.
5139 The reason is to make debugging simpler; so this is not relevant for
5140 artificial ivs created by other optimization passes. */
5141 if (cand
->pos
!= IP_ORIGINAL
5142 || !SSA_NAME_VAR (cand
->var_before
)
5143 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5146 /* Prefer not to insert statements into latch unless there are some
5147 already (so that we do not create unnecessary jumps). */
5148 if (cand
->pos
== IP_END
5149 && empty_block_p (ip_end_pos (data
->current_loop
)))
5153 cand
->cost_step
= cost_step
;
5156 /* Determines costs of computation of the candidates. */
5159 determine_iv_costs (struct ivopts_data
*data
)
5163 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5165 fprintf (dump_file
, "Candidate costs:\n");
5166 fprintf (dump_file
, " cand\tcost\n");
5169 for (i
= 0; i
< n_iv_cands (data
); i
++)
5171 struct iv_cand
*cand
= iv_cand (data
, i
);
5173 determine_iv_cost (data
, cand
);
5175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5176 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5179 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5180 fprintf (dump_file
, "\n");
5183 /* Calculates cost for having SIZE induction variables. */
5186 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5188 /* We add size to the cost, so that we prefer eliminating ivs
5190 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5191 data
->body_includes_call
);
5194 /* For each size of the induction variable set determine the penalty. */
5197 determine_set_costs (struct ivopts_data
*data
)
5201 gimple_stmt_iterator psi
;
5203 struct loop
*loop
= data
->current_loop
;
5206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5208 fprintf (dump_file
, "Global costs:\n");
5209 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5210 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5211 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5212 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5216 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5218 phi
= gsi_stmt (psi
);
5219 op
= PHI_RESULT (phi
);
5221 if (virtual_operand_p (op
))
5224 if (get_iv (data
, op
))
5230 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5232 struct version_info
*info
= ver_info (data
, j
);
5234 if (info
->inv_id
&& info
->has_nonlin_use
)
5238 data
->regs_used
= n
;
5239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5240 fprintf (dump_file
, " regs_used %d\n", n
);
5242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5244 fprintf (dump_file
, " cost for size:\n");
5245 fprintf (dump_file
, " ivs\tcost\n");
5246 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5247 fprintf (dump_file
, " %d\t%d\n", j
,
5248 ivopts_global_cost_for_size (data
, j
));
5249 fprintf (dump_file
, "\n");
5253 /* Returns true if A is a cheaper cost pair than B. */
5256 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5266 cmp
= compare_costs (a
->cost
, b
->cost
);
5273 /* In case the costs are the same, prefer the cheaper candidate. */
5274 if (a
->cand
->cost
< b
->cand
->cost
)
5281 /* Returns candidate by that USE is expressed in IVS. */
5283 static struct cost_pair
*
5284 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5286 return ivs
->cand_for_use
[use
->id
];
5289 /* Computes the cost field of IVS structure. */
5292 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5294 comp_cost cost
= ivs
->cand_use_cost
;
5296 cost
.cost
+= ivs
->cand_cost
;
5298 cost
.cost
+= ivopts_global_cost_for_size (data
,
5299 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5304 /* Remove invariants in set INVS to set IVS. */
5307 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5315 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5317 ivs
->n_invariant_uses
[iid
]--;
5318 if (ivs
->n_invariant_uses
[iid
] == 0)
5323 /* Set USE not to be expressed by any candidate in IVS. */
5326 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5329 unsigned uid
= use
->id
, cid
;
5330 struct cost_pair
*cp
;
5332 cp
= ivs
->cand_for_use
[uid
];
5338 ivs
->cand_for_use
[uid
] = NULL
;
5339 ivs
->n_cand_uses
[cid
]--;
5341 if (ivs
->n_cand_uses
[cid
] == 0)
5343 bitmap_clear_bit (ivs
->cands
, cid
);
5344 /* Do not count the pseudocandidates. */
5348 ivs
->cand_cost
-= cp
->cand
->cost
;
5350 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5353 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5355 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5357 if (cp
->inv_expr_id
!= -1)
5359 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5360 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5361 ivs
->num_used_inv_expr
--;
5363 iv_ca_recount_cost (data
, ivs
);
5366 /* Add invariants in set INVS to set IVS. */
5369 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5377 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5379 ivs
->n_invariant_uses
[iid
]++;
5380 if (ivs
->n_invariant_uses
[iid
] == 1)
5385 /* Set cost pair for USE in set IVS to CP. */
5388 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5389 struct iv_use
*use
, struct cost_pair
*cp
)
5391 unsigned uid
= use
->id
, cid
;
5393 if (ivs
->cand_for_use
[uid
] == cp
)
5396 if (ivs
->cand_for_use
[uid
])
5397 iv_ca_set_no_cp (data
, ivs
, use
);
5404 ivs
->cand_for_use
[uid
] = cp
;
5405 ivs
->n_cand_uses
[cid
]++;
5406 if (ivs
->n_cand_uses
[cid
] == 1)
5408 bitmap_set_bit (ivs
->cands
, cid
);
5409 /* Do not count the pseudocandidates. */
5413 ivs
->cand_cost
+= cp
->cand
->cost
;
5415 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5418 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5419 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5421 if (cp
->inv_expr_id
!= -1)
5423 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5424 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5425 ivs
->num_used_inv_expr
++;
5427 iv_ca_recount_cost (data
, ivs
);
5431 /* Extend set IVS by expressing USE by some of the candidates in it
5432 if possible. All important candidates will be considered
5433 if IMPORTANT_CANDIDATES is true. */
5436 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5437 struct iv_use
*use
, bool important_candidates
)
5439 struct cost_pair
*best_cp
= NULL
, *cp
;
5444 gcc_assert (ivs
->upto
>= use
->id
);
5446 if (ivs
->upto
== use
->id
)
5452 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5453 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5455 struct iv_cand
*cand
= iv_cand (data
, i
);
5457 cp
= get_use_iv_cost (data
, use
, cand
);
5459 if (cheaper_cost_pair (cp
, best_cp
))
5463 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5466 /* Get cost for assignment IVS. */
5469 iv_ca_cost (struct iv_ca
*ivs
)
5471 /* This was a conditional expression but it triggered a bug in
5474 return infinite_cost
;
5479 /* Returns true if all dependences of CP are among invariants in IVS. */
5482 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5487 if (!cp
->depends_on
)
5490 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5492 if (ivs
->n_invariant_uses
[i
] == 0)
5499 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5500 it before NEXT_CHANGE. */
5502 static struct iv_ca_delta
*
5503 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5504 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5506 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5509 change
->old_cp
= old_cp
;
5510 change
->new_cp
= new_cp
;
5511 change
->next_change
= next_change
;
5516 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5519 static struct iv_ca_delta
*
5520 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5522 struct iv_ca_delta
*last
;
5530 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5532 last
->next_change
= l2
;
5537 /* Reverse the list of changes DELTA, forming the inverse to it. */
5539 static struct iv_ca_delta
*
5540 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5542 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5543 struct cost_pair
*tmp
;
5545 for (act
= delta
; act
; act
= next
)
5547 next
= act
->next_change
;
5548 act
->next_change
= prev
;
5552 act
->old_cp
= act
->new_cp
;
5559 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5560 reverted instead. */
5563 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5564 struct iv_ca_delta
*delta
, bool forward
)
5566 struct cost_pair
*from
, *to
;
5567 struct iv_ca_delta
*act
;
5570 delta
= iv_ca_delta_reverse (delta
);
5572 for (act
= delta
; act
; act
= act
->next_change
)
5576 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5577 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5581 iv_ca_delta_reverse (delta
);
5584 /* Returns true if CAND is used in IVS. */
5587 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5589 return ivs
->n_cand_uses
[cand
->id
] > 0;
5592 /* Returns number of induction variable candidates in the set IVS. */
5595 iv_ca_n_cands (struct iv_ca
*ivs
)
5597 return ivs
->n_cands
;
5600 /* Free the list of changes DELTA. */
5603 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5605 struct iv_ca_delta
*act
, *next
;
5607 for (act
= *delta
; act
; act
= next
)
5609 next
= act
->next_change
;
5616 /* Allocates new iv candidates assignment. */
5618 static struct iv_ca
*
5619 iv_ca_new (struct ivopts_data
*data
)
5621 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5625 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5626 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5627 nw
->cands
= BITMAP_ALLOC (NULL
);
5630 nw
->cand_use_cost
= no_cost
;
5632 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5634 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5635 nw
->num_used_inv_expr
= 0;
5640 /* Free memory occupied by the set IVS. */
5643 iv_ca_free (struct iv_ca
**ivs
)
5645 free ((*ivs
)->cand_for_use
);
5646 free ((*ivs
)->n_cand_uses
);
5647 BITMAP_FREE ((*ivs
)->cands
);
5648 free ((*ivs
)->n_invariant_uses
);
5649 free ((*ivs
)->used_inv_expr
);
5654 /* Dumps IVS to FILE. */
5657 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5659 const char *pref
= " invariants ";
5661 comp_cost cost
= iv_ca_cost (ivs
);
5663 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5664 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5665 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5666 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5668 for (i
= 0; i
< ivs
->upto
; i
++)
5670 struct iv_use
*use
= iv_use (data
, i
);
5671 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5673 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5674 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5676 fprintf (file
, " use:%d --> ??\n", use
->id
);
5679 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5680 if (ivs
->n_invariant_uses
[i
])
5682 fprintf (file
, "%s%d", pref
, i
);
5685 fprintf (file
, "\n\n");
5688 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5689 new set, and store differences in DELTA. Number of induction variables
5690 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5691 the function will try to find a solution with mimimal iv candidates. */
5694 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5695 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5696 unsigned *n_ivs
, bool min_ncand
)
5701 struct cost_pair
*old_cp
, *new_cp
;
5704 for (i
= 0; i
< ivs
->upto
; i
++)
5706 use
= iv_use (data
, i
);
5707 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5710 && old_cp
->cand
== cand
)
5713 new_cp
= get_use_iv_cost (data
, use
, cand
);
5717 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5720 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5723 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5726 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5727 cost
= iv_ca_cost (ivs
);
5729 *n_ivs
= iv_ca_n_cands (ivs
);
5730 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5735 /* Try narrowing set IVS by removing CAND. Return the cost of
5736 the new set and store the differences in DELTA. */
5739 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5740 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5744 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5746 struct iv_cand
*cnd
;
5750 for (i
= 0; i
< n_iv_uses (data
); i
++)
5752 use
= iv_use (data
, i
);
5754 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5755 if (old_cp
->cand
!= cand
)
5760 if (data
->consider_all_candidates
)
5762 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5767 cnd
= iv_cand (data
, ci
);
5769 cp
= get_use_iv_cost (data
, use
, cnd
);
5773 if (!iv_ca_has_deps (ivs
, cp
))
5776 if (!cheaper_cost_pair (cp
, new_cp
))
5784 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5789 cnd
= iv_cand (data
, ci
);
5791 cp
= get_use_iv_cost (data
, use
, cnd
);
5794 if (!iv_ca_has_deps (ivs
, cp
))
5797 if (!cheaper_cost_pair (cp
, new_cp
))
5806 iv_ca_delta_free (delta
);
5807 return infinite_cost
;
5810 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5813 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5814 cost
= iv_ca_cost (ivs
);
5815 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5820 /* Try optimizing the set of candidates IVS by removing candidates different
5821 from to EXCEPT_CAND from it. Return cost of the new set, and store
5822 differences in DELTA. */
5825 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5826 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5829 struct iv_ca_delta
*act_delta
, *best_delta
;
5831 comp_cost best_cost
, acost
;
5832 struct iv_cand
*cand
;
5835 best_cost
= iv_ca_cost (ivs
);
5837 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5839 cand
= iv_cand (data
, i
);
5841 if (cand
== except_cand
)
5844 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5846 if (compare_costs (acost
, best_cost
) < 0)
5849 iv_ca_delta_free (&best_delta
);
5850 best_delta
= act_delta
;
5853 iv_ca_delta_free (&act_delta
);
5862 /* Recurse to possibly remove other unnecessary ivs. */
5863 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5864 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5865 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5866 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5870 /* Tries to extend the sets IVS in the best possible way in order
5871 to express the USE. If ORIGINALP is true, prefer candidates from
5872 the original set of IVs, otherwise favor important candidates not
5873 based on any memory object. */
5876 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5877 struct iv_use
*use
, bool originalp
)
5879 comp_cost best_cost
, act_cost
;
5882 struct iv_cand
*cand
;
5883 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5884 struct cost_pair
*cp
;
5886 iv_ca_add_use (data
, ivs
, use
, false);
5887 best_cost
= iv_ca_cost (ivs
);
5889 cp
= iv_ca_cand_for_use (ivs
, use
);
5894 iv_ca_add_use (data
, ivs
, use
, true);
5895 best_cost
= iv_ca_cost (ivs
);
5896 cp
= iv_ca_cand_for_use (ivs
, use
);
5900 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5901 iv_ca_set_no_cp (data
, ivs
, use
);
5904 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5905 first try important candidates not based on any memory object. Only if
5906 this fails, try the specific ones. Rationale -- in loops with many
5907 variables the best choice often is to use just one generic biv. If we
5908 added here many ivs specific to the uses, the optimization algorithm later
5909 would be likely to get stuck in a local minimum, thus causing us to create
5910 too many ivs. The approach from few ivs to more seems more likely to be
5911 successful -- starting from few ivs, replacing an expensive use by a
5912 specific iv should always be a win. */
5913 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5915 cand
= iv_cand (data
, i
);
5917 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5920 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5923 if (iv_ca_cand_used_p (ivs
, cand
))
5926 cp
= get_use_iv_cost (data
, use
, cand
);
5930 iv_ca_set_cp (data
, ivs
, use
, cp
);
5931 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5933 iv_ca_set_no_cp (data
, ivs
, use
);
5934 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5936 if (compare_costs (act_cost
, best_cost
) < 0)
5938 best_cost
= act_cost
;
5940 iv_ca_delta_free (&best_delta
);
5941 best_delta
= act_delta
;
5944 iv_ca_delta_free (&act_delta
);
5947 if (infinite_cost_p (best_cost
))
5949 for (i
= 0; i
< use
->n_map_members
; i
++)
5951 cp
= use
->cost_map
+ i
;
5956 /* Already tried this. */
5957 if (cand
->important
)
5959 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5961 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5965 if (iv_ca_cand_used_p (ivs
, cand
))
5969 iv_ca_set_cp (data
, ivs
, use
, cp
);
5970 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5971 iv_ca_set_no_cp (data
, ivs
, use
);
5972 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5975 if (compare_costs (act_cost
, best_cost
) < 0)
5977 best_cost
= act_cost
;
5980 iv_ca_delta_free (&best_delta
);
5981 best_delta
= act_delta
;
5984 iv_ca_delta_free (&act_delta
);
5988 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5989 iv_ca_delta_free (&best_delta
);
5991 return !infinite_cost_p (best_cost
);
5994 /* Finds an initial assignment of candidates to uses. */
5996 static struct iv_ca
*
5997 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5999 struct iv_ca
*ivs
= iv_ca_new (data
);
6002 for (i
= 0; i
< n_iv_uses (data
); i
++)
6003 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6012 /* Tries to improve set of induction variables IVS. */
6015 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6018 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6019 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6020 struct iv_cand
*cand
;
6022 /* Try extending the set of induction variables by one. */
6023 for (i
= 0; i
< n_iv_cands (data
); i
++)
6025 cand
= iv_cand (data
, i
);
6027 if (iv_ca_cand_used_p (ivs
, cand
))
6030 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6034 /* If we successfully added the candidate and the set is small enough,
6035 try optimizing it by removing other candidates. */
6036 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6038 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6039 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6040 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6041 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6044 if (compare_costs (acost
, best_cost
) < 0)
6047 iv_ca_delta_free (&best_delta
);
6048 best_delta
= act_delta
;
6051 iv_ca_delta_free (&act_delta
);
6056 /* Try removing the candidates from the set instead. */
6057 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6059 /* Nothing more we can do. */
6064 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6065 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6066 iv_ca_delta_free (&best_delta
);
6070 /* Attempts to find the optimal set of induction variables. We do simple
6071 greedy heuristic -- we try to replace at most one candidate in the selected
6072 solution and remove the unused ivs while this improves the cost. */
6074 static struct iv_ca
*
6075 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6079 /* Get the initial solution. */
6080 set
= get_initial_solution (data
, originalp
);
6083 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6084 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6088 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6090 fprintf (dump_file
, "Initial set of candidates:\n");
6091 iv_ca_dump (data
, dump_file
, set
);
6094 while (try_improve_iv_set (data
, set
))
6096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6098 fprintf (dump_file
, "Improved to:\n");
6099 iv_ca_dump (data
, dump_file
, set
);
6106 static struct iv_ca
*
6107 find_optimal_iv_set (struct ivopts_data
*data
)
6110 struct iv_ca
*set
, *origset
;
6112 comp_cost cost
, origcost
;
6114 /* Determine the cost based on a strategy that starts with original IVs,
6115 and try again using a strategy that prefers candidates not based
6117 origset
= find_optimal_iv_set_1 (data
, true);
6118 set
= find_optimal_iv_set_1 (data
, false);
6120 if (!origset
&& !set
)
6123 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6124 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6126 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6128 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6129 origcost
.cost
, origcost
.complexity
);
6130 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6131 cost
.cost
, cost
.complexity
);
6134 /* Choose the one with the best cost. */
6135 if (compare_costs (origcost
, cost
) <= 0)
6142 iv_ca_free (&origset
);
6144 for (i
= 0; i
< n_iv_uses (data
); i
++)
6146 use
= iv_use (data
, i
);
6147 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6153 /* Creates a new induction variable corresponding to CAND. */
6156 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6158 gimple_stmt_iterator incr_pos
;
6168 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6172 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6180 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6184 /* Mark that the iv is preserved. */
6185 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6186 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6188 /* Rewrite the increment so that it uses var_before directly. */
6189 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6193 gimple_add_tmp_var (cand
->var_before
);
6195 base
= unshare_expr (cand
->iv
->base
);
6197 create_iv (base
, unshare_expr (cand
->iv
->step
),
6198 cand
->var_before
, data
->current_loop
,
6199 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6202 /* Creates new induction variables described in SET. */
6205 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6208 struct iv_cand
*cand
;
6211 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6213 cand
= iv_cand (data
, i
);
6214 create_new_iv (data
, cand
);
6217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6219 fprintf (dump_file
, "\nSelected IV set: \n");
6220 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6222 cand
= iv_cand (data
, i
);
6223 dump_cand (dump_file
, cand
);
6225 fprintf (dump_file
, "\n");
6229 /* Rewrites USE (definition of iv used in a nonlinear expression)
6230 using candidate CAND. */
6233 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6234 struct iv_use
*use
, struct iv_cand
*cand
)
6239 gimple_stmt_iterator bsi
;
6241 /* An important special case -- if we are asked to express value of
6242 the original iv by itself, just exit; there is no need to
6243 introduce a new computation (that might also need casting the
6244 variable to unsigned and back). */
6245 if (cand
->pos
== IP_ORIGINAL
6246 && cand
->incremented_at
== use
->stmt
)
6248 enum tree_code stmt_code
;
6250 gcc_assert (is_gimple_assign (use
->stmt
));
6251 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6253 /* Check whether we may leave the computation unchanged.
6254 This is the case only if it does not rely on other
6255 computations in the loop -- otherwise, the computation
6256 we rely upon may be removed in remove_unused_ivs,
6257 thus leading to ICE. */
6258 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6259 if (stmt_code
== PLUS_EXPR
6260 || stmt_code
== MINUS_EXPR
6261 || stmt_code
== POINTER_PLUS_EXPR
)
6263 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6264 op
= gimple_assign_rhs2 (use
->stmt
);
6265 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6266 op
= gimple_assign_rhs1 (use
->stmt
);
6273 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6277 comp
= get_computation (data
->current_loop
, use
, cand
);
6278 gcc_assert (comp
!= NULL_TREE
);
6280 switch (gimple_code (use
->stmt
))
6283 tgt
= PHI_RESULT (use
->stmt
);
6285 /* If we should keep the biv, do not replace it. */
6286 if (name_info (data
, tgt
)->preserve_biv
)
6289 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6293 tgt
= gimple_assign_lhs (use
->stmt
);
6294 bsi
= gsi_for_stmt (use
->stmt
);
6301 if (!valid_gimple_rhs_p (comp
)
6302 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6303 /* We can't allow re-allocating the stmt as it might be pointed
6305 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6306 >= gimple_num_ops (gsi_stmt (bsi
)))))
6308 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6309 true, GSI_SAME_STMT
);
6310 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6312 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6313 /* As this isn't a plain copy we have to reset alignment
6315 if (SSA_NAME_PTR_INFO (comp
))
6316 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6320 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6322 ass
= gimple_build_assign (tgt
, comp
);
6323 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6325 bsi
= gsi_for_stmt (use
->stmt
);
6326 remove_phi_node (&bsi
, false);
6330 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6331 use
->stmt
= gsi_stmt (bsi
);
6335 /* Performs a peephole optimization to reorder the iv update statement with
6336 a mem ref to enable instruction combining in later phases. The mem ref uses
6337 the iv value before the update, so the reordering transformation requires
6338 adjustment of the offset. CAND is the selected IV_CAND.
6342 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6350 directly propagating t over to (1) will introduce overlapping live range
6351 thus increase register pressure. This peephole transform it into:
6355 t = MEM_REF (base, iv2, 8, 8);
6362 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6365 gimple iv_update
, stmt
;
6367 gimple_stmt_iterator gsi
, gsi_iv
;
6369 if (cand
->pos
!= IP_NORMAL
)
6372 var_after
= cand
->var_after
;
6373 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6375 bb
= gimple_bb (iv_update
);
6376 gsi
= gsi_last_nondebug_bb (bb
);
6377 stmt
= gsi_stmt (gsi
);
6379 /* Only handle conditional statement for now. */
6380 if (gimple_code (stmt
) != GIMPLE_COND
)
6383 gsi_prev_nondebug (&gsi
);
6384 stmt
= gsi_stmt (gsi
);
6385 if (stmt
!= iv_update
)
6388 gsi_prev_nondebug (&gsi
);
6389 if (gsi_end_p (gsi
))
6392 stmt
= gsi_stmt (gsi
);
6393 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6396 if (stmt
!= use
->stmt
)
6399 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6404 fprintf (dump_file
, "Reordering \n");
6405 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6406 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6407 fprintf (dump_file
, "\n");
6410 gsi
= gsi_for_stmt (use
->stmt
);
6411 gsi_iv
= gsi_for_stmt (iv_update
);
6412 gsi_move_before (&gsi_iv
, &gsi
);
6414 cand
->pos
= IP_BEFORE_USE
;
6415 cand
->incremented_at
= use
->stmt
;
6418 /* Rewrites USE (address that is an iv) using candidate CAND. */
6421 rewrite_use_address (struct ivopts_data
*data
,
6422 struct iv_use
*use
, struct iv_cand
*cand
)
6425 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6426 tree base_hint
= NULL_TREE
;
6430 adjust_iv_update_pos (cand
, use
);
6431 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6433 unshare_aff_combination (&aff
);
6435 /* To avoid undefined overflow problems, all IV candidates use unsigned
6436 integer types. The drawback is that this makes it impossible for
6437 create_mem_ref to distinguish an IV that is based on a memory object
6438 from one that represents simply an offset.
6440 To work around this problem, we pass a hint to create_mem_ref that
6441 indicates which variable (if any) in aff is an IV based on a memory
6442 object. Note that we only consider the candidate. If this is not
6443 based on an object, the base of the reference is in some subexpression
6444 of the use -- but these will use pointer types, so they are recognized
6445 by the create_mem_ref heuristics anyway. */
6446 if (cand
->iv
->base_object
)
6447 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6449 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6450 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6451 reference_alias_ptr_type (*use
->op_p
),
6452 iv
, base_hint
, data
->speed
);
6453 copy_ref_info (ref
, *use
->op_p
);
6457 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6461 rewrite_use_compare (struct ivopts_data
*data
,
6462 struct iv_use
*use
, struct iv_cand
*cand
)
6464 tree comp
, *var_p
, op
, bound
;
6465 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6466 enum tree_code compare
;
6467 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6473 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6474 tree var_type
= TREE_TYPE (var
);
6477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6479 fprintf (dump_file
, "Replacing exit test: ");
6480 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6483 bound
= unshare_expr (fold_convert (var_type
, bound
));
6484 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6486 gsi_insert_seq_on_edge_immediate (
6487 loop_preheader_edge (data
->current_loop
),
6490 gimple_cond_set_lhs (use
->stmt
, var
);
6491 gimple_cond_set_code (use
->stmt
, compare
);
6492 gimple_cond_set_rhs (use
->stmt
, op
);
6496 /* The induction variable elimination failed; just express the original
6498 comp
= get_computation (data
->current_loop
, use
, cand
);
6499 gcc_assert (comp
!= NULL_TREE
);
6501 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6504 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6505 true, GSI_SAME_STMT
);
6508 /* Rewrites USE using candidate CAND. */
6511 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6515 case USE_NONLINEAR_EXPR
:
6516 rewrite_use_nonlinear_expr (data
, use
, cand
);
6520 rewrite_use_address (data
, use
, cand
);
6524 rewrite_use_compare (data
, use
, cand
);
6531 update_stmt (use
->stmt
);
6534 /* Rewrite the uses using the selected induction variables. */
6537 rewrite_uses (struct ivopts_data
*data
)
6540 struct iv_cand
*cand
;
6543 for (i
= 0; i
< n_iv_uses (data
); i
++)
6545 use
= iv_use (data
, i
);
6546 cand
= use
->selected
;
6549 rewrite_use (data
, use
, cand
);
6553 /* Removes the ivs that are not used after rewriting. */
6556 remove_unused_ivs (struct ivopts_data
*data
)
6560 bitmap toremove
= BITMAP_ALLOC (NULL
);
6562 /* Figure out an order in which to release SSA DEFs so that we don't
6563 release something that we'd have to propagate into a debug stmt
6565 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6567 struct version_info
*info
;
6569 info
= ver_info (data
, j
);
6571 && !integer_zerop (info
->iv
->step
)
6573 && !info
->iv
->have_use_for
6574 && !info
->preserve_biv
)
6576 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6578 tree def
= info
->iv
->ssa_name
;
6580 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6582 imm_use_iterator imm_iter
;
6583 use_operand_p use_p
;
6587 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6589 if (!gimple_debug_bind_p (stmt
))
6592 /* We just want to determine whether to do nothing
6593 (count == 0), to substitute the computed
6594 expression into a single use of the SSA DEF by
6595 itself (count == 1), or to use a debug temp
6596 because the SSA DEF is used multiple times or as
6597 part of a larger expression (count > 1). */
6599 if (gimple_debug_bind_get_value (stmt
) != def
)
6603 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6609 struct iv_use dummy_use
;
6610 struct iv_cand
*best_cand
= NULL
, *cand
;
6611 unsigned i
, best_pref
= 0, cand_pref
;
6613 memset (&dummy_use
, 0, sizeof (dummy_use
));
6614 dummy_use
.iv
= info
->iv
;
6615 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6617 cand
= iv_use (data
, i
)->selected
;
6618 if (cand
== best_cand
)
6620 cand_pref
= operand_equal_p (cand
->iv
->step
,
6624 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6625 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6628 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6630 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6633 best_pref
= cand_pref
;
6640 tree comp
= get_computation_at (data
->current_loop
,
6641 &dummy_use
, best_cand
,
6642 SSA_NAME_DEF_STMT (def
));
6648 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6649 DECL_ARTIFICIAL (vexpr
) = 1;
6650 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6651 if (SSA_NAME_VAR (def
))
6652 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6654 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6655 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6656 gimple_stmt_iterator gsi
;
6658 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6659 gsi
= gsi_after_labels (gimple_bb
6660 (SSA_NAME_DEF_STMT (def
)));
6662 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6664 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6668 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6670 if (!gimple_debug_bind_p (stmt
))
6673 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6674 SET_USE (use_p
, comp
);
6682 release_defs_bitset (toremove
);
6684 BITMAP_FREE (toremove
);
6687 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6688 for pointer_map_traverse. */
6691 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6692 void *data ATTRIBUTE_UNUSED
)
6694 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6700 /* Frees data allocated by the optimization of a single loop. */
6703 free_loop_data (struct ivopts_data
*data
)
6711 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6712 pointer_map_destroy (data
->niters
);
6713 data
->niters
= NULL
;
6716 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6718 struct version_info
*info
;
6720 info
= ver_info (data
, i
);
6723 info
->has_nonlin_use
= false;
6724 info
->preserve_biv
= false;
6727 bitmap_clear (data
->relevant
);
6728 bitmap_clear (data
->important_candidates
);
6730 for (i
= 0; i
< n_iv_uses (data
); i
++)
6732 struct iv_use
*use
= iv_use (data
, i
);
6735 BITMAP_FREE (use
->related_cands
);
6736 for (j
= 0; j
< use
->n_map_members
; j
++)
6737 if (use
->cost_map
[j
].depends_on
)
6738 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6739 free (use
->cost_map
);
6742 data
->iv_uses
.truncate (0);
6744 for (i
= 0; i
< n_iv_cands (data
); i
++)
6746 struct iv_cand
*cand
= iv_cand (data
, i
);
6749 if (cand
->depends_on
)
6750 BITMAP_FREE (cand
->depends_on
);
6753 data
->iv_candidates
.truncate (0);
6755 if (data
->version_info_size
< num_ssa_names
)
6757 data
->version_info_size
= 2 * num_ssa_names
;
6758 free (data
->version_info
);
6759 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6762 data
->max_inv_id
= 0;
6764 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6765 SET_DECL_RTL (obj
, NULL_RTX
);
6767 decl_rtl_to_reset
.truncate (0);
6769 data
->inv_expr_tab
.empty ();
6770 data
->inv_expr_id
= 0;
6773 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6777 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6779 free_loop_data (data
);
6780 free (data
->version_info
);
6781 BITMAP_FREE (data
->relevant
);
6782 BITMAP_FREE (data
->important_candidates
);
6784 decl_rtl_to_reset
.release ();
6785 data
->iv_uses
.release ();
6786 data
->iv_candidates
.release ();
6787 data
->inv_expr_tab
.dispose ();
6790 /* Returns true if the loop body BODY includes any function calls. */
6793 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6795 gimple_stmt_iterator gsi
;
6798 for (i
= 0; i
< num_nodes
; i
++)
6799 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6801 gimple stmt
= gsi_stmt (gsi
);
6802 if (is_gimple_call (stmt
)
6803 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6809 /* Optimizes the LOOP. Returns true if anything changed. */
6812 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6814 bool changed
= false;
6815 struct iv_ca
*iv_ca
;
6816 edge exit
= single_dom_exit (loop
);
6819 gcc_assert (!data
->niters
);
6820 data
->current_loop
= loop
;
6821 data
->speed
= optimize_loop_for_speed_p (loop
);
6823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6825 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6829 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6830 exit
->src
->index
, exit
->dest
->index
);
6831 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6832 fprintf (dump_file
, "\n");
6835 fprintf (dump_file
, "\n");
6838 body
= get_loop_body (loop
);
6839 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6840 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6843 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6845 /* For each ssa name determines whether it behaves as an induction variable
6847 if (!find_induction_variables (data
))
6850 /* Finds interesting uses (item 1). */
6851 find_interesting_uses (data
);
6852 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6855 /* Finds candidates for the induction variables (item 2). */
6856 find_iv_candidates (data
);
6858 /* Calculates the costs (item 3, part 1). */
6859 determine_iv_costs (data
);
6860 determine_use_iv_costs (data
);
6861 determine_set_costs (data
);
6863 /* Find the optimal set of induction variables (item 3, part 2). */
6864 iv_ca
= find_optimal_iv_set (data
);
6869 /* Create the new induction variables (item 4, part 1). */
6870 create_new_ivs (data
, iv_ca
);
6871 iv_ca_free (&iv_ca
);
6873 /* Rewrite the uses (item 4, part 2). */
6874 rewrite_uses (data
);
6876 /* Remove the ivs that are unused after rewriting. */
6877 remove_unused_ivs (data
);
6879 /* We have changed the structure of induction variables; it might happen
6880 that definitions in the scev database refer to some of them that were
6885 free_loop_data (data
);
6890 /* Main entry point. Optimizes induction variables in loops. */
6893 tree_ssa_iv_optimize (void)
6896 struct ivopts_data data
;
6898 tree_ssa_iv_optimize_init (&data
);
6900 /* Optimize the loops starting with the innermost ones. */
6901 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
6903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6904 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6906 tree_ssa_iv_optimize_loop (&data
, loop
);
6909 tree_ssa_iv_optimize_finalize (&data
);