1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-flow.h"
74 #include "tree-pass.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
91 cost of different addressing modes. This should be moved to a TBD
92 interface between the GIMPLE and RTL worlds. */
96 /* The infinite cost. */
97 #define INFTY 10000000
99 #define AVG_LOOP_NITER(LOOP) 5
101 /* Returns the expected number of loop iterations for LOOP.
102 The average trip count is computed from profile data if it
105 static inline HOST_WIDE_INT
106 avg_loop_niter (struct loop
*loop
)
108 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
110 return AVG_LOOP_NITER (loop
);
115 /* Representation of the induction variable. */
118 tree base
; /* Initial value of the iv. */
119 tree base_object
; /* A memory object to that the induction variable points. */
120 tree step
; /* Step of the iv (constant only). */
121 tree ssa_name
; /* The ssa name with the value. */
122 bool biv_p
; /* Is it a biv? */
123 bool have_use_for
; /* Do we already have a use for it? */
124 unsigned use_id
; /* The identifier in the use if it is the case. */
127 /* Per-ssa version information (induction variable descriptions, etc.). */
130 tree name
; /* The ssa name. */
131 struct iv
*iv
; /* Induction variable description. */
132 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
133 an expression that is not an induction variable. */
134 bool preserve_biv
; /* For the original biv, whether to preserve it. */
135 unsigned inv_id
; /* Id of an invariant. */
141 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
142 USE_ADDRESS
, /* Use in an address. */
143 USE_COMPARE
/* Use is a compare. */
146 /* Cost of a computation. */
149 int cost
; /* The runtime cost. */
150 unsigned complexity
; /* The estimate of the complexity of the code for
151 the computation (in no concrete units --
152 complexity field should be larger for more
153 complex expressions and addressing modes). */
156 static const comp_cost no_cost
= {0, 0};
157 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
159 /* The candidate - cost pair. */
162 struct iv_cand
*cand
; /* The candidate. */
163 comp_cost cost
; /* The cost. */
164 bitmap depends_on
; /* The list of invariants that have to be
166 tree value
; /* For final value elimination, the expression for
167 the final value of the iv. For iv elimination,
168 the new bound to compare with. */
169 enum tree_code comp
; /* For iv elimination, the comparison. */
170 int inv_expr_id
; /* Loop invariant expression id. */
176 unsigned id
; /* The id of the use. */
177 enum use_type type
; /* Type of the use. */
178 struct iv
*iv
; /* The induction variable it is based on. */
179 gimple stmt
; /* Statement in that it occurs. */
180 tree
*op_p
; /* The place where it occurs. */
181 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
184 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
185 struct cost_pair
*cost_map
;
186 /* The costs wrto the iv candidates. */
188 struct iv_cand
*selected
;
189 /* The selected candidate. */
192 /* The position where the iv is computed. */
195 IP_NORMAL
, /* At the end, just before the exit condition. */
196 IP_END
, /* At the end of the latch block. */
197 IP_BEFORE_USE
, /* Immediately before a specific use. */
198 IP_AFTER_USE
, /* Immediately after a specific use. */
199 IP_ORIGINAL
/* The original biv. */
202 /* The induction variable candidate. */
205 unsigned id
; /* The number of the candidate. */
206 bool important
; /* Whether this is an "important" candidate, i.e. such
207 that it should be considered by all uses. */
208 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
209 gimple incremented_at
;/* For original biv, the statement where it is
211 tree var_before
; /* The variable used for it before increment. */
212 tree var_after
; /* The variable used for it after increment. */
213 struct iv
*iv
; /* The value of the candidate. NULL for
214 "pseudocandidate" used to indicate the possibility
215 to replace the final value of an iv by direct
216 computation of the value. */
217 unsigned cost
; /* Cost of the candidate. */
218 unsigned cost_step
; /* Cost of the candidate's increment operation. */
219 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
220 where it is incremented. */
221 bitmap depends_on
; /* The list of invariants that are used in step of the
225 /* Loop invariant expression hashtable entry. */
226 struct iv_inv_expr_ent
233 /* The data used by the induction variable optimizations. */
235 typedef struct iv_use
*iv_use_p
;
237 typedef struct iv_cand
*iv_cand_p
;
241 /* The currently optimized loop. */
242 struct loop
*current_loop
;
244 /* Numbers of iterations for all exits of the current loop. */
245 struct pointer_map_t
*niters
;
247 /* Number of registers used in it. */
250 /* The size of version_info array allocated. */
251 unsigned version_info_size
;
253 /* The array of information for the ssa names. */
254 struct version_info
*version_info
;
256 /* The hashtable of loop invariant expressions created
260 /* Loop invariant expression id. */
263 /* The bitmap of indices in version_info whose value was changed. */
266 /* The uses of induction variables. */
267 vec
<iv_use_p
> iv_uses
;
269 /* The candidates. */
270 vec
<iv_cand_p
> iv_candidates
;
272 /* A bitmap of important candidates. */
273 bitmap important_candidates
;
275 /* The maximum invariant id. */
278 /* Whether to consider just related and important candidates when replacing a
280 bool consider_all_candidates
;
282 /* Are we optimizing for speed? */
285 /* Whether the loop body includes any function calls. */
286 bool body_includes_call
;
288 /* Whether the loop body can only be exited via single exit. */
289 bool loop_single_exit_p
;
292 /* An assignment of iv candidates to uses. */
296 /* The number of uses covered by the assignment. */
299 /* Number of uses that cannot be expressed by the candidates in the set. */
302 /* Candidate assigned to a use, together with the related costs. */
303 struct cost_pair
**cand_for_use
;
305 /* Number of times each candidate is used. */
306 unsigned *n_cand_uses
;
308 /* The candidates used. */
311 /* The number of candidates in the set. */
314 /* Total number of registers needed. */
317 /* Total cost of expressing uses. */
318 comp_cost cand_use_cost
;
320 /* Total cost of candidates. */
323 /* Number of times each invariant is used. */
324 unsigned *n_invariant_uses
;
326 /* The array holding the number of uses of each loop
327 invariant expressions created by ivopt. */
328 unsigned *used_inv_expr
;
330 /* The number of created loop invariants. */
331 unsigned num_used_inv_expr
;
333 /* Total cost of the assignment. */
337 /* Difference of two iv candidate assignments. */
344 /* An old assignment (for rollback purposes). */
345 struct cost_pair
*old_cp
;
347 /* A new assignment. */
348 struct cost_pair
*new_cp
;
350 /* Next change in the list. */
351 struct iv_ca_delta
*next_change
;
354 /* Bound on number of candidates below that all candidates are considered. */
356 #define CONSIDER_ALL_CANDIDATES_BOUND \
357 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
360 optimizing such a loop would help, and it would take ages). */
362 #define MAX_CONSIDERED_USES \
363 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
365 /* If there are at most this number of ivs in the set, try removing unnecessary
366 ivs from the set always. */
368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
369 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
371 /* The list of trees for that the decl_rtl field must be reset is stored
374 static vec
<tree
> decl_rtl_to_reset
;
376 static comp_cost
force_expr_to_var_cost (tree
, bool);
378 /* Number of uses recorded in DATA. */
380 static inline unsigned
381 n_iv_uses (struct ivopts_data
*data
)
383 return data
->iv_uses
.length ();
386 /* Ith use recorded in DATA. */
388 static inline struct iv_use
*
389 iv_use (struct ivopts_data
*data
, unsigned i
)
391 return data
->iv_uses
[i
];
394 /* Number of candidates recorded in DATA. */
396 static inline unsigned
397 n_iv_cands (struct ivopts_data
*data
)
399 return data
->iv_candidates
.length ();
402 /* Ith candidate recorded in DATA. */
404 static inline struct iv_cand
*
405 iv_cand (struct ivopts_data
*data
, unsigned i
)
407 return data
->iv_candidates
[i
];
410 /* The single loop exit if it dominates the latch, NULL otherwise. */
413 single_dom_exit (struct loop
*loop
)
415 edge exit
= single_exit (loop
);
420 if (!just_once_each_iteration_p (loop
, exit
->src
))
426 /* Dumps information about the induction variable IV to FILE. */
428 extern void dump_iv (FILE *, struct iv
*);
430 dump_iv (FILE *file
, struct iv
*iv
)
434 fprintf (file
, "ssa name ");
435 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
436 fprintf (file
, "\n");
439 fprintf (file
, " type ");
440 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
441 fprintf (file
, "\n");
445 fprintf (file
, " base ");
446 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
447 fprintf (file
, "\n");
449 fprintf (file
, " step ");
450 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
451 fprintf (file
, "\n");
455 fprintf (file
, " invariant ");
456 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
457 fprintf (file
, "\n");
462 fprintf (file
, " base object ");
463 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
464 fprintf (file
, "\n");
468 fprintf (file
, " is a biv\n");
471 /* Dumps information about the USE to FILE. */
473 extern void dump_use (FILE *, struct iv_use
*);
475 dump_use (FILE *file
, struct iv_use
*use
)
477 fprintf (file
, "use %d\n", use
->id
);
481 case USE_NONLINEAR_EXPR
:
482 fprintf (file
, " generic\n");
486 fprintf (file
, " address\n");
490 fprintf (file
, " compare\n");
497 fprintf (file
, " in statement ");
498 print_gimple_stmt (file
, use
->stmt
, 0, 0);
499 fprintf (file
, "\n");
501 fprintf (file
, " at position ");
503 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
504 fprintf (file
, "\n");
506 dump_iv (file
, use
->iv
);
508 if (use
->related_cands
)
510 fprintf (file
, " related candidates ");
511 dump_bitmap (file
, use
->related_cands
);
515 /* Dumps information about the uses to FILE. */
517 extern void dump_uses (FILE *, struct ivopts_data
*);
519 dump_uses (FILE *file
, struct ivopts_data
*data
)
524 for (i
= 0; i
< n_iv_uses (data
); i
++)
526 use
= iv_use (data
, i
);
528 dump_use (file
, use
);
529 fprintf (file
, "\n");
533 /* Dumps information about induction variable candidate CAND to FILE. */
535 extern void dump_cand (FILE *, struct iv_cand
*);
537 dump_cand (FILE *file
, struct iv_cand
*cand
)
539 struct iv
*iv
= cand
->iv
;
541 fprintf (file
, "candidate %d%s\n",
542 cand
->id
, cand
->important
? " (important)" : "");
544 if (cand
->depends_on
)
546 fprintf (file
, " depends on ");
547 dump_bitmap (file
, cand
->depends_on
);
552 fprintf (file
, " final value replacement\n");
556 if (cand
->var_before
)
558 fprintf (file
, " var_before ");
559 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
560 fprintf (file
, "\n");
564 fprintf (file
, " var_after ");
565 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
566 fprintf (file
, "\n");
572 fprintf (file
, " incremented before exit test\n");
576 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
580 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
584 fprintf (file
, " incremented at end\n");
588 fprintf (file
, " original biv\n");
595 /* Returns the info for ssa version VER. */
597 static inline struct version_info
*
598 ver_info (struct ivopts_data
*data
, unsigned ver
)
600 return data
->version_info
+ ver
;
603 /* Returns the info for ssa name NAME. */
605 static inline struct version_info
*
606 name_info (struct ivopts_data
*data
, tree name
)
608 return ver_info (data
, SSA_NAME_VERSION (name
));
611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
615 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
617 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
621 if (sbb
== loop
->latch
)
627 return stmt
== last_stmt (bb
);
630 /* Returns true if STMT if after the place where the original induction
631 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
632 if the positions are identical. */
635 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
637 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
638 basic_block stmt_bb
= gimple_bb (stmt
);
640 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
643 if (stmt_bb
!= cand_bb
)
647 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
649 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
652 /* Returns true if STMT if after the place where the induction variable
653 CAND is incremented in LOOP. */
656 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
664 return stmt_after_ip_normal_pos (loop
, stmt
);
668 return stmt_after_inc_pos (cand
, stmt
, false);
671 return stmt_after_inc_pos (cand
, stmt
, true);
678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
681 abnormal_ssa_name_p (tree exp
)
686 if (TREE_CODE (exp
) != SSA_NAME
)
689 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
696 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
697 void *data ATTRIBUTE_UNUSED
)
699 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
701 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
707 return !abnormal_ssa_name_p (*index
);
710 /* Returns true if EXPR contains a ssa name that occurs in an
711 abnormal phi node. */
714 contains_abnormal_ssa_name_p (tree expr
)
717 enum tree_code_class codeclass
;
722 code
= TREE_CODE (expr
);
723 codeclass
= TREE_CODE_CLASS (code
);
725 if (code
== SSA_NAME
)
726 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
728 if (code
== INTEGER_CST
729 || is_gimple_min_invariant (expr
))
732 if (code
== ADDR_EXPR
)
733 return !for_each_index (&TREE_OPERAND (expr
, 0),
734 idx_contains_abnormal_ssa_name_p
,
737 if (code
== COND_EXPR
)
738 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
739 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
763 /* Returns the structure describing number of iterations determined from
764 EXIT of DATA->current_loop, or NULL if something goes wrong. */
766 static struct tree_niter_desc
*
767 niter_for_exit (struct ivopts_data
*data
, edge exit
)
769 struct tree_niter_desc
*desc
;
774 data
->niters
= pointer_map_create ();
778 slot
= pointer_map_contains (data
->niters
, exit
);
782 /* Try to determine number of iterations. We cannot safely work with ssa
783 names that appear in phi nodes on abnormal edges, so that we do not
784 create overlapping life ranges for them (PR 27283). */
785 desc
= XNEW (struct tree_niter_desc
);
786 if (!number_of_iterations_exit (data
->current_loop
,
788 || contains_abnormal_ssa_name_p (desc
->niter
))
793 slot
= pointer_map_insert (data
->niters
, exit
);
797 desc
= (struct tree_niter_desc
*) *slot
;
802 /* Returns the structure describing number of iterations determined from
803 single dominating exit of DATA->current_loop, or NULL if something
806 static struct tree_niter_desc
*
807 niter_for_single_dom_exit (struct ivopts_data
*data
)
809 edge exit
= single_dom_exit (data
->current_loop
);
814 return niter_for_exit (data
, exit
);
817 /* Hash table equality function for expressions. */
820 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
822 const struct iv_inv_expr_ent
*expr1
=
823 (const struct iv_inv_expr_ent
*)ent1
;
824 const struct iv_inv_expr_ent
*expr2
=
825 (const struct iv_inv_expr_ent
*)ent2
;
827 return expr1
->hash
== expr2
->hash
828 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
831 /* Hash function for loop invariant expressions. */
834 htab_inv_expr_hash (const void *ent
)
836 const struct iv_inv_expr_ent
*expr
=
837 (const struct iv_inv_expr_ent
*)ent
;
841 /* Initializes data structures used by the iv optimization pass, stored
845 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
847 data
->version_info_size
= 2 * num_ssa_names
;
848 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
849 data
->relevant
= BITMAP_ALLOC (NULL
);
850 data
->important_candidates
= BITMAP_ALLOC (NULL
);
851 data
->max_inv_id
= 0;
853 data
->iv_uses
.create (20);
854 data
->iv_candidates
.create (20);
855 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
856 htab_inv_expr_eq
, free
);
857 data
->inv_expr_id
= 0;
858 decl_rtl_to_reset
.create (20);
861 /* Returns a memory object to that EXPR points. In case we are able to
862 determine that it does not point to any such object, NULL is returned. */
865 determine_base_object (tree expr
)
867 enum tree_code code
= TREE_CODE (expr
);
870 /* If this is a pointer casted to any type, we need to determine
871 the base object for the pointer; so handle conversions before
872 throwing away non-pointer expressions. */
873 if (CONVERT_EXPR_P (expr
))
874 return determine_base_object (TREE_OPERAND (expr
, 0));
876 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
885 obj
= TREE_OPERAND (expr
, 0);
886 base
= get_base_address (obj
);
891 if (TREE_CODE (base
) == MEM_REF
)
892 return determine_base_object (TREE_OPERAND (base
, 0));
894 return fold_convert (ptr_type_node
,
895 build_fold_addr_expr (base
));
897 case POINTER_PLUS_EXPR
:
898 return determine_base_object (TREE_OPERAND (expr
, 0));
902 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
906 return fold_convert (ptr_type_node
, expr
);
910 /* Allocates an induction variable with given initial value BASE and step STEP
914 alloc_iv (tree base
, tree step
)
916 struct iv
*iv
= XCNEW (struct iv
);
917 gcc_assert (step
!= NULL_TREE
);
920 iv
->base_object
= determine_base_object (base
);
923 iv
->have_use_for
= false;
925 iv
->ssa_name
= NULL_TREE
;
930 /* Sets STEP and BASE for induction variable IV. */
933 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
935 struct version_info
*info
= name_info (data
, iv
);
937 gcc_assert (!info
->iv
);
939 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
940 info
->iv
= alloc_iv (base
, step
);
941 info
->iv
->ssa_name
= iv
;
944 /* Finds induction variable declaration for VAR. */
947 get_iv (struct ivopts_data
*data
, tree var
)
950 tree type
= TREE_TYPE (var
);
952 if (!POINTER_TYPE_P (type
)
953 && !INTEGRAL_TYPE_P (type
))
956 if (!name_info (data
, var
)->iv
)
958 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
961 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
962 set_iv (data
, var
, var
, build_int_cst (type
, 0));
965 return name_info (data
, var
)->iv
;
968 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
969 not define a simple affine biv with nonzero step. */
972 determine_biv_step (gimple phi
)
974 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
975 tree name
= PHI_RESULT (phi
);
978 if (virtual_operand_p (name
))
981 if (!simple_iv (loop
, loop
, name
, &iv
, true))
984 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
987 /* Finds basic ivs. */
990 find_bivs (struct ivopts_data
*data
)
993 tree step
, type
, base
;
995 struct loop
*loop
= data
->current_loop
;
996 gimple_stmt_iterator psi
;
998 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1000 phi
= gsi_stmt (psi
);
1002 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1005 step
= determine_biv_step (phi
);
1009 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1010 base
= expand_simple_operations (base
);
1011 if (contains_abnormal_ssa_name_p (base
)
1012 || contains_abnormal_ssa_name_p (step
))
1015 type
= TREE_TYPE (PHI_RESULT (phi
));
1016 base
= fold_convert (type
, base
);
1019 if (POINTER_TYPE_P (type
))
1020 step
= convert_to_ptrofftype (step
);
1022 step
= fold_convert (type
, step
);
1025 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1032 /* Marks basic ivs. */
1035 mark_bivs (struct ivopts_data
*data
)
1039 struct iv
*iv
, *incr_iv
;
1040 struct loop
*loop
= data
->current_loop
;
1041 basic_block incr_bb
;
1042 gimple_stmt_iterator psi
;
1044 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1046 phi
= gsi_stmt (psi
);
1048 iv
= get_iv (data
, PHI_RESULT (phi
));
1052 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1053 incr_iv
= get_iv (data
, var
);
1057 /* If the increment is in the subloop, ignore it. */
1058 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1059 if (incr_bb
->loop_father
!= data
->current_loop
1060 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1064 incr_iv
->biv_p
= true;
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069 parameters to IV. */
1072 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1075 struct loop
*loop
= data
->current_loop
;
1077 iv
->base
= NULL_TREE
;
1078 iv
->step
= NULL_TREE
;
1080 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1083 lhs
= gimple_assign_lhs (stmt
);
1084 if (TREE_CODE (lhs
) != SSA_NAME
)
1087 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1089 iv
->base
= expand_simple_operations (iv
->base
);
1091 if (contains_abnormal_ssa_name_p (iv
->base
)
1092 || contains_abnormal_ssa_name_p (iv
->step
))
1095 /* If STMT could throw, then do not consider STMT as defining a GIV.
1096 While this will suppress optimizations, we can not safely delete this
1097 GIV and associated statements, even if it appears it is not used. */
1098 if (stmt_could_throw_p (stmt
))
1104 /* Finds general ivs in statement STMT. */
1107 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1111 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1114 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1117 /* Finds general ivs in basic block BB. */
1120 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1122 gimple_stmt_iterator bsi
;
1124 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1125 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1128 /* Finds general ivs. */
1131 find_givs (struct ivopts_data
*data
)
1133 struct loop
*loop
= data
->current_loop
;
1134 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1137 for (i
= 0; i
< loop
->num_nodes
; i
++)
1138 find_givs_in_bb (data
, body
[i
]);
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143 variable and if so, its initial value and step. */
1146 find_induction_variables (struct ivopts_data
*data
)
1151 if (!find_bivs (data
))
1157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1159 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1163 fprintf (dump_file
, " number of iterations ");
1164 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1165 if (!integer_zerop (niter
->may_be_zero
))
1167 fprintf (dump_file
, "; zero if ");
1168 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1170 fprintf (dump_file
, "\n\n");
1173 fprintf (dump_file
, "Induction variables:\n\n");
1175 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1177 if (ver_info (data
, i
)->iv
)
1178 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1187 static struct iv_use
*
1188 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1189 gimple stmt
, enum use_type use_type
)
1191 struct iv_use
*use
= XCNEW (struct iv_use
);
1193 use
->id
= n_iv_uses (data
);
1194 use
->type
= use_type
;
1198 use
->related_cands
= BITMAP_ALLOC (NULL
);
1200 /* To avoid showing ssa name in the dumps, if it was not reset by the
1202 iv
->ssa_name
= NULL_TREE
;
1204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1205 dump_use (dump_file
, use
);
1207 data
->iv_uses
.safe_push (use
);
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213 NONLINEAR_USE is true if the invariant is used in a way we do not
1214 handle specially. */
1217 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1220 struct version_info
*info
;
1222 if (TREE_CODE (op
) != SSA_NAME
1223 || virtual_operand_p (op
))
1226 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1228 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1231 info
= name_info (data
, op
);
1233 info
->has_nonlin_use
|= nonlinear_use
;
1235 info
->inv_id
= ++data
->max_inv_id
;
1236 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1239 /* Checks whether the use OP is interesting and if so, records it. */
1241 static struct iv_use
*
1242 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1249 if (TREE_CODE (op
) != SSA_NAME
)
1252 iv
= get_iv (data
, op
);
1256 if (iv
->have_use_for
)
1258 use
= iv_use (data
, iv
->use_id
);
1260 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1264 if (integer_zerop (iv
->step
))
1266 record_invariant (data
, op
, true);
1269 iv
->have_use_for
= true;
1271 civ
= XNEW (struct iv
);
1274 stmt
= SSA_NAME_DEF_STMT (op
);
1275 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1276 || is_gimple_assign (stmt
));
1278 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1279 iv
->use_id
= use
->id
;
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285 of an induction variable and an invariant. If this is the case,
1286 CONTROL_VAR is set to location of the iv, BOUND to the location of
1287 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288 induction variable descriptions, and true is returned. If this is not
1289 the case, CONTROL_VAR and BOUND are set to the arguments of the
1290 condition and false is returned. */
1293 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1294 tree
**control_var
, tree
**bound
,
1295 struct iv
**iv_var
, struct iv
**iv_bound
)
1297 /* The objects returned when COND has constant operands. */
1298 static struct iv const_iv
;
1300 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1301 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1304 if (gimple_code (stmt
) == GIMPLE_COND
)
1306 op0
= gimple_cond_lhs_ptr (stmt
);
1307 op1
= gimple_cond_rhs_ptr (stmt
);
1311 op0
= gimple_assign_rhs1_ptr (stmt
);
1312 op1
= gimple_assign_rhs2_ptr (stmt
);
1315 zero
= integer_zero_node
;
1316 const_iv
.step
= integer_zero_node
;
1318 if (TREE_CODE (*op0
) == SSA_NAME
)
1319 iv0
= get_iv (data
, *op0
);
1320 if (TREE_CODE (*op1
) == SSA_NAME
)
1321 iv1
= get_iv (data
, *op1
);
1323 /* Exactly one of the compared values must be an iv, and the other one must
1328 if (integer_zerop (iv0
->step
))
1330 /* Control variable may be on the other side. */
1331 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1332 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1334 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1338 *control_var
= op0
;;
1349 /* Checks whether the condition in STMT is interesting and if so,
1353 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1355 tree
*var_p
, *bound_p
;
1356 struct iv
*var_iv
, *civ
;
1358 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1360 find_interesting_uses_op (data
, *var_p
);
1361 find_interesting_uses_op (data
, *bound_p
);
1365 civ
= XNEW (struct iv
);
1367 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371 i.e. if all its operands are defined outside of the LOOP. LOOP
1372 should not be the function body. */
1375 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1380 gcc_assert (loop_depth (loop
) > 0);
1382 if (is_gimple_min_invariant (expr
))
1385 if (TREE_CODE (expr
) == SSA_NAME
)
1387 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1389 && flow_bb_inside_loop_p (loop
, def_bb
))
1398 len
= TREE_OPERAND_LENGTH (expr
);
1399 for (i
= 0; i
< len
; i
++)
1400 if (TREE_OPERAND (expr
, i
)
1401 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408 i.e. if all its operands on the RHS are defined outside of the LOOP.
1409 LOOP should not be the function body. */
1412 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1417 gcc_assert (loop_depth (loop
) > 0);
1419 lhs
= gimple_get_lhs (stmt
);
1420 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1422 tree op
= gimple_op (stmt
, i
);
1423 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431 initial ones. Returns false when the value of the index cannot be determined.
1432 Callback for for_each_index. */
1434 struct ifs_ivopts_data
1436 struct ivopts_data
*ivopts_data
;
1442 idx_find_step (tree base
, tree
*idx
, void *data
)
1444 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1446 tree step
, iv_base
, iv_step
, lbound
, off
;
1447 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1449 /* If base is a component ref, require that the offset of the reference
1451 if (TREE_CODE (base
) == COMPONENT_REF
)
1453 off
= component_ref_field_offset (base
);
1454 return expr_invariant_in_loop_p (loop
, off
);
1457 /* If base is array, first check whether we will be able to move the
1458 reference out of the loop (in order to take its address in strength
1459 reduction). In order for this to work we need both lower bound
1460 and step to be loop invariants. */
1461 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1463 /* Moreover, for a range, the size needs to be invariant as well. */
1464 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1465 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1468 step
= array_ref_element_size (base
);
1469 lbound
= array_ref_low_bound (base
);
1471 if (!expr_invariant_in_loop_p (loop
, step
)
1472 || !expr_invariant_in_loop_p (loop
, lbound
))
1476 if (TREE_CODE (*idx
) != SSA_NAME
)
1479 iv
= get_iv (dta
->ivopts_data
, *idx
);
1483 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1484 *&x[0], which is not folded and does not trigger the
1485 ARRAY_REF path below. */
1488 if (integer_zerop (iv
->step
))
1491 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1493 step
= array_ref_element_size (base
);
1495 /* We only handle addresses whose step is an integer constant. */
1496 if (TREE_CODE (step
) != INTEGER_CST
)
1500 /* The step for pointer arithmetics already is 1 byte. */
1501 step
= size_one_node
;
1505 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1506 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1509 /* The index might wrap. */
1513 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1514 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1520 object is passed to it in DATA. */
1523 idx_record_use (tree base
, tree
*idx
,
1526 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1527 find_interesting_uses_op (data
, *idx
);
1528 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1530 find_interesting_uses_op (data
, array_ref_element_size (base
));
1531 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537 store cst to MUL and return true. Otherwise return false.
1538 The returned value is always sign-extended, regardless of the
1539 signedness of TOP and BOT. */
1542 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1545 enum tree_code code
;
1546 double_int res
, p0
, p1
;
1547 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1552 if (operand_equal_p (top
, bot
, 0))
1554 *mul
= double_int_one
;
1558 code
= TREE_CODE (top
);
1562 mby
= TREE_OPERAND (top
, 1);
1563 if (TREE_CODE (mby
) != INTEGER_CST
)
1566 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1569 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1574 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1575 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1578 if (code
== MINUS_EXPR
)
1580 *mul
= (p0
+ p1
).sext (precision
);
1584 if (TREE_CODE (bot
) != INTEGER_CST
)
1587 p0
= tree_to_double_int (top
).sext (precision
);
1588 p1
= tree_to_double_int (bot
).sext (precision
);
1591 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1592 return res
.is_zero ();
1599 /* Returns true if memory reference REF with step STEP may be unaligned. */
1602 may_be_unaligned_p (tree ref
, tree step
)
1606 HOST_WIDE_INT bitsize
;
1607 HOST_WIDE_INT bitpos
;
1609 enum machine_mode mode
;
1610 int unsignedp
, volatilep
;
1611 unsigned base_align
;
1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614 thus they are not misaligned. */
1615 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1618 /* The test below is basically copy of what expr.c:normal_inner_ref
1619 does to check whether the object must be loaded by parts when
1620 STRICT_ALIGNMENT is true. */
1621 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1622 &unsignedp
, &volatilep
, true);
1623 base_type
= TREE_TYPE (base
);
1624 base_align
= get_object_alignment (base
);
1625 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1627 if (mode
!= BLKmode
)
1629 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1631 if (base_align
< mode_align
1632 || (bitpos
% mode_align
) != 0
1633 || (bitpos
% BITS_PER_UNIT
) != 0)
1637 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1640 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1647 /* Return true if EXPR may be non-addressable. */
1650 may_be_nonaddressable_p (tree expr
)
1652 switch (TREE_CODE (expr
))
1654 case TARGET_MEM_REF
:
1655 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656 target, thus they are always addressable. */
1660 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1661 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1663 case VIEW_CONVERT_EXPR
:
1664 /* This kind of view-conversions may wrap non-addressable objects
1665 and make them look addressable. After some processing the
1666 non-addressability may be uncovered again, causing ADDR_EXPRs
1667 of inappropriate objects to be built. */
1668 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1669 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1672 /* ... fall through ... */
1675 case ARRAY_RANGE_REF
:
1676 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1688 /* Finds addresses in *OP_P inside STMT. */
1691 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1693 tree base
= *op_p
, step
= size_zero_node
;
1695 struct ifs_ivopts_data ifs_ivopts_data
;
1697 /* Do not play with volatile memory references. A bit too conservative,
1698 perhaps, but safe. */
1699 if (gimple_has_volatile_ops (stmt
))
1702 /* Ignore bitfields for now. Not really something terribly complicated
1704 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1707 base
= unshare_expr (base
);
1709 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1711 tree type
= build_pointer_type (TREE_TYPE (base
));
1715 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1717 civ
= get_iv (data
, TMR_BASE (base
));
1721 TMR_BASE (base
) = civ
->base
;
1724 if (TMR_INDEX2 (base
)
1725 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1727 civ
= get_iv (data
, TMR_INDEX2 (base
));
1731 TMR_INDEX2 (base
) = civ
->base
;
1734 if (TMR_INDEX (base
)
1735 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1737 civ
= get_iv (data
, TMR_INDEX (base
));
1741 TMR_INDEX (base
) = civ
->base
;
1746 if (TMR_STEP (base
))
1747 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1749 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1753 if (integer_zerop (step
))
1755 base
= tree_mem_ref_addr (type
, base
);
1759 ifs_ivopts_data
.ivopts_data
= data
;
1760 ifs_ivopts_data
.stmt
= stmt
;
1761 ifs_ivopts_data
.step
= size_zero_node
;
1762 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1763 || integer_zerop (ifs_ivopts_data
.step
))
1765 step
= ifs_ivopts_data
.step
;
1767 /* Check that the base expression is addressable. This needs
1768 to be done after substituting bases of IVs into it. */
1769 if (may_be_nonaddressable_p (base
))
1772 /* Moreover, on strict alignment platforms, check that it is
1773 sufficiently aligned. */
1774 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1777 base
= build_fold_addr_expr (base
);
1779 /* Substituting bases of IVs into the base expression might
1780 have caused folding opportunities. */
1781 if (TREE_CODE (base
) == ADDR_EXPR
)
1783 tree
*ref
= &TREE_OPERAND (base
, 0);
1784 while (handled_component_p (*ref
))
1785 ref
= &TREE_OPERAND (*ref
, 0);
1786 if (TREE_CODE (*ref
) == MEM_REF
)
1788 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1789 TREE_OPERAND (*ref
, 0),
1790 TREE_OPERAND (*ref
, 1));
1797 civ
= alloc_iv (base
, step
);
1798 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1802 for_each_index (op_p
, idx_record_use
, data
);
1805 /* Finds and records invariants used in STMT. */
1808 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1811 use_operand_p use_p
;
1814 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1816 op
= USE_FROM_PTR (use_p
);
1817 record_invariant (data
, op
, false);
1821 /* Finds interesting uses of induction variables in the statement STMT. */
1824 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1827 tree op
, *lhs
, *rhs
;
1829 use_operand_p use_p
;
1830 enum tree_code code
;
1832 find_invariants_stmt (data
, stmt
);
1834 if (gimple_code (stmt
) == GIMPLE_COND
)
1836 find_interesting_uses_cond (data
, stmt
);
1840 if (is_gimple_assign (stmt
))
1842 lhs
= gimple_assign_lhs_ptr (stmt
);
1843 rhs
= gimple_assign_rhs1_ptr (stmt
);
1845 if (TREE_CODE (*lhs
) == SSA_NAME
)
1847 /* If the statement defines an induction variable, the uses are not
1848 interesting by themselves. */
1850 iv
= get_iv (data
, *lhs
);
1852 if (iv
&& !integer_zerop (iv
->step
))
1856 code
= gimple_assign_rhs_code (stmt
);
1857 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1858 && (REFERENCE_CLASS_P (*rhs
)
1859 || is_gimple_val (*rhs
)))
1861 if (REFERENCE_CLASS_P (*rhs
))
1862 find_interesting_uses_address (data
, stmt
, rhs
);
1864 find_interesting_uses_op (data
, *rhs
);
1866 if (REFERENCE_CLASS_P (*lhs
))
1867 find_interesting_uses_address (data
, stmt
, lhs
);
1870 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1872 find_interesting_uses_cond (data
, stmt
);
1876 /* TODO -- we should also handle address uses of type
1878 memory = call (whatever);
1885 if (gimple_code (stmt
) == GIMPLE_PHI
1886 && gimple_bb (stmt
) == data
->current_loop
->header
)
1888 iv
= get_iv (data
, PHI_RESULT (stmt
));
1890 if (iv
&& !integer_zerop (iv
->step
))
1894 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1896 op
= USE_FROM_PTR (use_p
);
1898 if (TREE_CODE (op
) != SSA_NAME
)
1901 iv
= get_iv (data
, op
);
1905 find_interesting_uses_op (data
, op
);
1909 /* Finds interesting uses of induction variables outside of loops
1910 on loop exit edge EXIT. */
1913 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1916 gimple_stmt_iterator psi
;
1919 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1921 phi
= gsi_stmt (psi
);
1922 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1923 if (!virtual_operand_p (def
))
1924 find_interesting_uses_op (data
, def
);
1928 /* Finds uses of the induction variables that are interesting. */
1931 find_interesting_uses (struct ivopts_data
*data
)
1934 gimple_stmt_iterator bsi
;
1935 basic_block
*body
= get_loop_body (data
->current_loop
);
1937 struct version_info
*info
;
1940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1941 fprintf (dump_file
, "Uses:\n\n");
1943 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1948 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1949 if (e
->dest
!= EXIT_BLOCK_PTR
1950 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1951 find_interesting_uses_outside (data
, e
);
1953 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1954 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1955 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1956 if (!is_gimple_debug (gsi_stmt (bsi
)))
1957 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1960 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1964 fprintf (dump_file
, "\n");
1966 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1968 info
= ver_info (data
, i
);
1971 fprintf (dump_file
, " ");
1972 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1973 fprintf (dump_file
, " is invariant (%d)%s\n",
1974 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1978 fprintf (dump_file
, "\n");
1984 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1985 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1986 we are at the top-level of the processed address. */
1989 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1990 unsigned HOST_WIDE_INT
*offset
)
1992 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1993 enum tree_code code
;
1994 tree type
, orig_type
= TREE_TYPE (expr
);
1995 unsigned HOST_WIDE_INT off0
, off1
, st
;
1996 tree orig_expr
= expr
;
2000 type
= TREE_TYPE (expr
);
2001 code
= TREE_CODE (expr
);
2007 if (!cst_and_fits_in_hwi (expr
)
2008 || integer_zerop (expr
))
2011 *offset
= int_cst_value (expr
);
2012 return build_int_cst (orig_type
, 0);
2014 case POINTER_PLUS_EXPR
:
2017 op0
= TREE_OPERAND (expr
, 0);
2018 op1
= TREE_OPERAND (expr
, 1);
2020 op0
= strip_offset_1 (op0
, false, false, &off0
);
2021 op1
= strip_offset_1 (op1
, false, false, &off1
);
2023 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2024 if (op0
== TREE_OPERAND (expr
, 0)
2025 && op1
== TREE_OPERAND (expr
, 1))
2028 if (integer_zerop (op1
))
2030 else if (integer_zerop (op0
))
2032 if (code
== MINUS_EXPR
)
2033 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2038 expr
= fold_build2 (code
, type
, op0
, op1
);
2040 return fold_convert (orig_type
, expr
);
2043 op1
= TREE_OPERAND (expr
, 1);
2044 if (!cst_and_fits_in_hwi (op1
))
2047 op0
= TREE_OPERAND (expr
, 0);
2048 op0
= strip_offset_1 (op0
, false, false, &off0
);
2049 if (op0
== TREE_OPERAND (expr
, 0))
2052 *offset
= off0
* int_cst_value (op1
);
2053 if (integer_zerop (op0
))
2056 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2058 return fold_convert (orig_type
, expr
);
2061 case ARRAY_RANGE_REF
:
2065 step
= array_ref_element_size (expr
);
2066 if (!cst_and_fits_in_hwi (step
))
2069 st
= int_cst_value (step
);
2070 op1
= TREE_OPERAND (expr
, 1);
2071 op1
= strip_offset_1 (op1
, false, false, &off1
);
2072 *offset
= off1
* st
;
2075 && integer_zerop (op1
))
2077 /* Strip the component reference completely. */
2078 op0
= TREE_OPERAND (expr
, 0);
2079 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2089 tmp
= component_ref_field_offset (expr
);
2091 && cst_and_fits_in_hwi (tmp
))
2093 /* Strip the component reference completely. */
2094 op0
= TREE_OPERAND (expr
, 0);
2095 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2096 *offset
= off0
+ int_cst_value (tmp
);
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, true, true, &off0
);
2106 if (op0
== TREE_OPERAND (expr
, 0))
2109 expr
= build_fold_addr_expr (op0
);
2110 return fold_convert (orig_type
, expr
);
2113 /* ??? Offset operand? */
2114 inside_addr
= false;
2121 /* Default handling of expressions for that we want to recurse into
2122 the first operand. */
2123 op0
= TREE_OPERAND (expr
, 0);
2124 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2127 if (op0
== TREE_OPERAND (expr
, 0)
2128 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2131 expr
= copy_node (expr
);
2132 TREE_OPERAND (expr
, 0) = op0
;
2134 TREE_OPERAND (expr
, 1) = op1
;
2136 /* Inside address, we might strip the top level component references,
2137 thus changing type of the expression. Handling of ADDR_EXPR
2139 expr
= fold_convert (orig_type
, expr
);
2144 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2147 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2149 return strip_offset_1 (expr
, false, false, offset
);
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153 We return unsigned type with the same precision, which avoids problems
2157 generic_type_for (tree type
)
2159 if (POINTER_TYPE_P (type
))
2160 return unsigned_type_for (type
);
2162 if (TYPE_UNSIGNED (type
))
2165 return unsigned_type_for (type
);
2168 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2169 the bitmap to that we should store it. */
2171 static struct ivopts_data
*fd_ivopts_data
;
2173 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2175 bitmap
*depends_on
= (bitmap
*) data
;
2176 struct version_info
*info
;
2178 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2180 info
= name_info (fd_ivopts_data
, *expr_p
);
2182 if (!info
->inv_id
|| info
->has_nonlin_use
)
2186 *depends_on
= BITMAP_ALLOC (NULL
);
2187 bitmap_set_bit (*depends_on
, info
->inv_id
);
2192 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2193 position to POS. If USE is not NULL, the candidate is set as related to
2194 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2195 replacement of the final value of the iv by a direct computation. */
2197 static struct iv_cand
*
2198 add_candidate_1 (struct ivopts_data
*data
,
2199 tree base
, tree step
, bool important
, enum iv_position pos
,
2200 struct iv_use
*use
, gimple incremented_at
)
2203 struct iv_cand
*cand
= NULL
;
2204 tree type
, orig_type
;
2206 /* For non-original variables, make sure their values are computed in a type
2207 that does not invoke undefined behavior on overflows (since in general,
2208 we cannot prove that these induction variables are non-wrapping). */
2209 if (pos
!= IP_ORIGINAL
)
2211 orig_type
= TREE_TYPE (base
);
2212 type
= generic_type_for (orig_type
);
2213 if (type
!= orig_type
)
2215 base
= fold_convert (type
, base
);
2216 step
= fold_convert (type
, step
);
2220 for (i
= 0; i
< n_iv_cands (data
); i
++)
2222 cand
= iv_cand (data
, i
);
2224 if (cand
->pos
!= pos
)
2227 if (cand
->incremented_at
!= incremented_at
2228 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2229 && cand
->ainc_use
!= use
))
2243 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2244 && operand_equal_p (step
, cand
->iv
->step
, 0)
2245 && (TYPE_PRECISION (TREE_TYPE (base
))
2246 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2250 if (i
== n_iv_cands (data
))
2252 cand
= XCNEW (struct iv_cand
);
2258 cand
->iv
= alloc_iv (base
, step
);
2261 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2263 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2264 cand
->var_after
= cand
->var_before
;
2266 cand
->important
= important
;
2267 cand
->incremented_at
= incremented_at
;
2268 data
->iv_candidates
.safe_push (cand
);
2271 && TREE_CODE (step
) != INTEGER_CST
)
2273 fd_ivopts_data
= data
;
2274 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2277 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2278 cand
->ainc_use
= use
;
2280 cand
->ainc_use
= NULL
;
2282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2283 dump_cand (dump_file
, cand
);
2286 if (important
&& !cand
->important
)
2288 cand
->important
= true;
2289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2290 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2295 bitmap_set_bit (use
->related_cands
, i
);
2296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2297 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2307 The purpose is to avoid splitting latch edge with a biv increment, thus
2308 creating a jump, possibly confusing other optimization passes and leaving
2309 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2310 is not available (so we do not have a better alternative), or if the latch
2311 edge is already nonempty. */
2314 allow_ip_end_pos_p (struct loop
*loop
)
2316 if (!ip_normal_pos (loop
))
2319 if (!empty_block_p (ip_end_pos (loop
)))
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326 Important field is set to IMPORTANT. */
2329 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2330 bool important
, struct iv_use
*use
)
2332 basic_block use_bb
= gimple_bb (use
->stmt
);
2333 enum machine_mode mem_mode
;
2334 unsigned HOST_WIDE_INT cstepi
;
2336 /* If we insert the increment in any position other than the standard
2337 ones, we must ensure that it is incremented once per iteration.
2338 It must not be in an inner nested loop, or one side of an if
2340 if (use_bb
->loop_father
!= data
->current_loop
2341 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2342 || stmt_could_throw_p (use
->stmt
)
2343 || !cst_and_fits_in_hwi (step
))
2346 cstepi
= int_cst_value (step
);
2348 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2349 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2350 || USE_STORE_PRE_INCREMENT (mem_mode
))
2351 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2352 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2353 || USE_STORE_PRE_DECREMENT (mem_mode
))
2354 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2356 enum tree_code code
= MINUS_EXPR
;
2358 tree new_step
= step
;
2360 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2362 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2363 code
= POINTER_PLUS_EXPR
;
2366 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2367 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2368 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2371 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2372 || USE_STORE_POST_INCREMENT (mem_mode
))
2373 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2374 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2375 || USE_STORE_POST_DECREMENT (mem_mode
))
2376 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2378 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2383 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2384 position to POS. If USE is not NULL, the candidate is set as related to
2385 it. The candidate computation is scheduled on all available positions. */
2388 add_candidate (struct ivopts_data
*data
,
2389 tree base
, tree step
, bool important
, struct iv_use
*use
)
2391 if (ip_normal_pos (data
->current_loop
))
2392 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2393 if (ip_end_pos (data
->current_loop
)
2394 && allow_ip_end_pos_p (data
->current_loop
))
2395 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2397 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2398 add_autoinc_candidates (data
, base
, step
, important
, use
);
2401 /* Adds standard iv candidates. */
2404 add_standard_iv_candidates (struct ivopts_data
*data
)
2406 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2408 /* The same for a double-integer type if it is still fast enough. */
2410 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2411 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2412 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2413 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2415 /* The same for a double-integer type if it is still fast enough. */
2417 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2418 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2419 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2420 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2424 /* Adds candidates bases on the old induction variable IV. */
2427 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2431 struct iv_cand
*cand
;
2433 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2435 /* The same, but with initial value zero. */
2436 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2437 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2439 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2440 iv
->step
, true, NULL
);
2442 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2443 if (gimple_code (phi
) == GIMPLE_PHI
)
2445 /* Additionally record the possibility of leaving the original iv
2447 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2448 cand
= add_candidate_1 (data
,
2449 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2450 SSA_NAME_DEF_STMT (def
));
2451 cand
->var_before
= iv
->ssa_name
;
2452 cand
->var_after
= def
;
2456 /* Adds candidates based on the old induction variables. */
2459 add_old_ivs_candidates (struct ivopts_data
*data
)
2465 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2467 iv
= ver_info (data
, i
)->iv
;
2468 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2469 add_old_iv_candidates (data
, iv
);
2473 /* Adds candidates based on the value of the induction variable IV and USE. */
2476 add_iv_value_candidates (struct ivopts_data
*data
,
2477 struct iv
*iv
, struct iv_use
*use
)
2479 unsigned HOST_WIDE_INT offset
;
2483 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2485 /* The same, but with initial value zero. Make such variable important,
2486 since it is generic enough so that possibly many uses may be based
2488 basetype
= TREE_TYPE (iv
->base
);
2489 if (POINTER_TYPE_P (basetype
))
2490 basetype
= sizetype
;
2491 add_candidate (data
, build_int_cst (basetype
, 0),
2492 iv
->step
, true, use
);
2494 /* Third, try removing the constant offset. Make sure to even
2495 add a candidate for &a[0] vs. (T *)&a. */
2496 base
= strip_offset (iv
->base
, &offset
);
2498 || base
!= iv
->base
)
2499 add_candidate (data
, base
, iv
->step
, false, use
);
2502 /* Adds candidates based on the uses. */
2505 add_derived_ivs_candidates (struct ivopts_data
*data
)
2509 for (i
= 0; i
< n_iv_uses (data
); i
++)
2511 struct iv_use
*use
= iv_use (data
, i
);
2518 case USE_NONLINEAR_EXPR
:
2521 /* Just add the ivs based on the value of the iv used here. */
2522 add_iv_value_candidates (data
, use
->iv
, use
);
2531 /* Record important candidates and add them to related_cands bitmaps
2535 record_important_candidates (struct ivopts_data
*data
)
2540 for (i
= 0; i
< n_iv_cands (data
); i
++)
2542 struct iv_cand
*cand
= iv_cand (data
, i
);
2544 if (cand
->important
)
2545 bitmap_set_bit (data
->important_candidates
, i
);
2548 data
->consider_all_candidates
= (n_iv_cands (data
)
2549 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2551 if (data
->consider_all_candidates
)
2553 /* We will not need "related_cands" bitmaps in this case,
2554 so release them to decrease peak memory consumption. */
2555 for (i
= 0; i
< n_iv_uses (data
); i
++)
2557 use
= iv_use (data
, i
);
2558 BITMAP_FREE (use
->related_cands
);
2563 /* Add important candidates to the related_cands bitmaps. */
2564 for (i
= 0; i
< n_iv_uses (data
); i
++)
2565 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2566 data
->important_candidates
);
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572 we allocate a simple list to every use. */
2575 alloc_use_cost_map (struct ivopts_data
*data
)
2577 unsigned i
, size
, s
;
2579 for (i
= 0; i
< n_iv_uses (data
); i
++)
2581 struct iv_use
*use
= iv_use (data
, i
);
2583 if (data
->consider_all_candidates
)
2584 size
= n_iv_cands (data
);
2587 s
= bitmap_count_bits (use
->related_cands
);
2589 /* Round up to the power of two, so that moduling by it is fast. */
2590 size
= s
? (1 << ceil_log2 (s
)) : 1;
2593 use
->n_map_members
= size
;
2594 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2598 /* Returns description of computation cost of expression whose runtime
2599 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2602 new_cost (unsigned runtime
, unsigned complexity
)
2606 cost
.cost
= runtime
;
2607 cost
.complexity
= complexity
;
2612 /* Adds costs COST1 and COST2. */
2615 add_costs (comp_cost cost1
, comp_cost cost2
)
2617 cost1
.cost
+= cost2
.cost
;
2618 cost1
.complexity
+= cost2
.complexity
;
2622 /* Subtracts costs COST1 and COST2. */
2625 sub_costs (comp_cost cost1
, comp_cost cost2
)
2627 cost1
.cost
-= cost2
.cost
;
2628 cost1
.complexity
-= cost2
.complexity
;
2633 /* Returns a negative number if COST1 < COST2, a positive number if
2634 COST1 > COST2, and 0 if COST1 = COST2. */
2637 compare_costs (comp_cost cost1
, comp_cost cost2
)
2639 if (cost1
.cost
== cost2
.cost
)
2640 return cost1
.complexity
- cost2
.complexity
;
2642 return cost1
.cost
- cost2
.cost
;
2645 /* Returns true if COST is infinite. */
2648 infinite_cost_p (comp_cost cost
)
2650 return cost
.cost
== INFTY
;
2653 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2654 on invariants DEPENDS_ON and that the value used in expressing it
2655 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2658 set_use_iv_cost (struct ivopts_data
*data
,
2659 struct iv_use
*use
, struct iv_cand
*cand
,
2660 comp_cost cost
, bitmap depends_on
, tree value
,
2661 enum tree_code comp
, int inv_expr_id
)
2665 if (infinite_cost_p (cost
))
2667 BITMAP_FREE (depends_on
);
2671 if (data
->consider_all_candidates
)
2673 use
->cost_map
[cand
->id
].cand
= cand
;
2674 use
->cost_map
[cand
->id
].cost
= cost
;
2675 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2676 use
->cost_map
[cand
->id
].value
= value
;
2677 use
->cost_map
[cand
->id
].comp
= comp
;
2678 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2682 /* n_map_members is a power of two, so this computes modulo. */
2683 s
= cand
->id
& (use
->n_map_members
- 1);
2684 for (i
= s
; i
< use
->n_map_members
; i
++)
2685 if (!use
->cost_map
[i
].cand
)
2687 for (i
= 0; i
< s
; i
++)
2688 if (!use
->cost_map
[i
].cand
)
2694 use
->cost_map
[i
].cand
= cand
;
2695 use
->cost_map
[i
].cost
= cost
;
2696 use
->cost_map
[i
].depends_on
= depends_on
;
2697 use
->cost_map
[i
].value
= value
;
2698 use
->cost_map
[i
].comp
= comp
;
2699 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2702 /* Gets cost of (USE, CANDIDATE) pair. */
2704 static struct cost_pair
*
2705 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2706 struct iv_cand
*cand
)
2709 struct cost_pair
*ret
;
2714 if (data
->consider_all_candidates
)
2716 ret
= use
->cost_map
+ cand
->id
;
2723 /* n_map_members is a power of two, so this computes modulo. */
2724 s
= cand
->id
& (use
->n_map_members
- 1);
2725 for (i
= s
; i
< use
->n_map_members
; i
++)
2726 if (use
->cost_map
[i
].cand
== cand
)
2727 return use
->cost_map
+ i
;
2728 else if (use
->cost_map
[i
].cand
== NULL
)
2730 for (i
= 0; i
< s
; i
++)
2731 if (use
->cost_map
[i
].cand
== cand
)
2732 return use
->cost_map
+ i
;
2733 else if (use
->cost_map
[i
].cand
== NULL
)
2739 /* Returns estimate on cost of computing SEQ. */
2742 seq_cost (rtx seq
, bool speed
)
2747 for (; seq
; seq
= NEXT_INSN (seq
))
2749 set
= single_set (seq
);
2751 cost
+= set_src_cost (SET_SRC (set
), speed
);
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2761 produce_memory_decl_rtl (tree obj
, int *regno
)
2763 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2764 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2768 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2770 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2771 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2772 SET_SYMBOL_REF_DECL (x
, obj
);
2773 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2774 set_mem_addr_space (x
, as
);
2775 targetm
.encode_section_info (obj
, x
, true);
2779 x
= gen_raw_REG (address_mode
, (*regno
)++);
2780 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2781 set_mem_addr_space (x
, as
);
2787 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2788 walk_tree. DATA contains the actual fake register number. */
2791 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2793 tree obj
= NULL_TREE
;
2795 int *regno
= (int *) data
;
2797 switch (TREE_CODE (*expr_p
))
2800 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2801 handled_component_p (*expr_p
);
2802 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2805 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2806 x
= produce_memory_decl_rtl (obj
, regno
);
2811 obj
= SSA_NAME_VAR (*expr_p
);
2812 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2815 if (!DECL_RTL_SET_P (obj
))
2816 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2825 if (DECL_RTL_SET_P (obj
))
2828 if (DECL_MODE (obj
) == BLKmode
)
2829 x
= produce_memory_decl_rtl (obj
, regno
);
2831 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2841 decl_rtl_to_reset
.safe_push (obj
);
2842 SET_DECL_RTL (obj
, x
);
2848 /* Determines cost of the computation of EXPR. */
2851 computation_cost (tree expr
, bool speed
)
2854 tree type
= TREE_TYPE (expr
);
2856 /* Avoid using hard regs in ways which may be unsupported. */
2857 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2858 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2859 enum node_frequency real_frequency
= node
->frequency
;
2861 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2862 crtl
->maybe_hot_insn_p
= speed
;
2863 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2865 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2868 default_rtl_profile ();
2869 node
->frequency
= real_frequency
;
2871 cost
= seq_cost (seq
, speed
);
2873 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2874 TYPE_ADDR_SPACE (type
), speed
);
2875 else if (!REG_P (rslt
))
2876 cost
+= set_src_cost (rslt
, speed
);
2881 /* Returns variable containing the value of candidate CAND at statement AT. */
2884 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2886 if (stmt_after_increment (loop
, cand
, stmt
))
2887 return cand
->var_after
;
2889 return cand
->var_before
;
2892 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2893 same precision that is at least as wide as the precision of TYPE, stores
2894 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2898 determine_common_wider_type (tree
*a
, tree
*b
)
2900 tree wider_type
= NULL
;
2902 tree atype
= TREE_TYPE (*a
);
2904 if (CONVERT_EXPR_P (*a
))
2906 suba
= TREE_OPERAND (*a
, 0);
2907 wider_type
= TREE_TYPE (suba
);
2908 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2914 if (CONVERT_EXPR_P (*b
))
2916 subb
= TREE_OPERAND (*b
, 0);
2917 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2928 /* Determines the expression by that USE is expressed from induction variable
2929 CAND at statement AT in LOOP. The expression is stored in a decomposed
2930 form into AFF. Returns false if USE cannot be expressed using CAND. */
2933 get_computation_aff (struct loop
*loop
,
2934 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2935 struct affine_tree_combination
*aff
)
2937 tree ubase
= use
->iv
->base
;
2938 tree ustep
= use
->iv
->step
;
2939 tree cbase
= cand
->iv
->base
;
2940 tree cstep
= cand
->iv
->step
, cstep_common
;
2941 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2942 tree common_type
, var
;
2944 aff_tree cbase_aff
, var_aff
;
2947 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2949 /* We do not have a precision to express the values of use. */
2953 var
= var_at_stmt (loop
, cand
, at
);
2954 uutype
= unsigned_type_for (utype
);
2956 /* If the conversion is not noop, perform it. */
2957 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2959 cstep
= fold_convert (uutype
, cstep
);
2960 cbase
= fold_convert (uutype
, cbase
);
2961 var
= fold_convert (uutype
, var
);
2964 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2967 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2968 type, we achieve better folding by computing their difference in this
2969 wider type, and cast the result to UUTYPE. We do not need to worry about
2970 overflows, as all the arithmetics will in the end be performed in UUTYPE
2972 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2974 /* use = ubase - ratio * cbase + ratio * var. */
2975 tree_to_aff_combination (ubase
, common_type
, aff
);
2976 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2977 tree_to_aff_combination (var
, uutype
, &var_aff
);
2979 /* We need to shift the value if we are after the increment. */
2980 if (stmt_after_increment (loop
, cand
, at
))
2984 if (common_type
!= uutype
)
2985 cstep_common
= fold_convert (common_type
, cstep
);
2987 cstep_common
= cstep
;
2989 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2990 aff_combination_add (&cbase_aff
, &cstep_aff
);
2993 aff_combination_scale (&cbase_aff
, -rat
);
2994 aff_combination_add (aff
, &cbase_aff
);
2995 if (common_type
!= uutype
)
2996 aff_combination_convert (aff
, uutype
);
2998 aff_combination_scale (&var_aff
, rat
);
2999 aff_combination_add (aff
, &var_aff
);
3004 /* Return the type of USE. */
3007 get_use_type (struct iv_use
*use
)
3009 tree base_type
= TREE_TYPE (use
->iv
->base
);
3012 if (use
->type
== USE_ADDRESS
)
3014 /* The base_type may be a void pointer. Create a pointer type based on
3015 the mem_ref instead. */
3016 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3017 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3018 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND at statement AT in LOOP. The computation is unshared. */
3030 get_computation_at (struct loop
*loop
,
3031 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3034 tree type
= get_use_type (use
);
3036 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3038 unshare_aff_combination (&aff
);
3039 return fold_convert (type
, aff_combination_to_tree (&aff
));
3042 /* Determines the expression by that USE is expressed from induction variable
3043 CAND in LOOP. The computation is unshared. */
3046 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3048 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3051 /* Adjust the cost COST for being in loop setup rather than loop body.
3052 If we're optimizing for space, the loop setup overhead is constant;
3053 if we're optimizing for speed, amortize it over the per-iteration cost. */
3055 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3059 else if (optimize_loop_for_speed_p (data
->current_loop
))
3060 return cost
/ avg_loop_niter (data
->current_loop
);
3065 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3066 validity for a memory reference accessing memory of mode MODE in
3067 address space AS. */
3071 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3074 #define MAX_RATIO 128
3075 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3076 static vec
<sbitmap
> valid_mult_list
;
3079 if (data_index
>= valid_mult_list
.length ())
3080 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3082 valid_mult
= valid_mult_list
[data_index
];
3085 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3086 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3090 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3091 bitmap_clear (valid_mult
);
3092 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3093 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3095 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3096 if (memory_address_addr_space_p (mode
, addr
, as
))
3097 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3100 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3102 fprintf (dump_file
, " allowed multipliers:");
3103 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3104 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3105 fprintf (dump_file
, " %d", (int) i
);
3106 fprintf (dump_file
, "\n");
3107 fprintf (dump_file
, "\n");
3110 valid_mult_list
[data_index
] = valid_mult
;
3113 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3116 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3119 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3120 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3121 variable is omitted. Compute the cost for a memory reference that accesses
3122 a memory location of mode MEM_MODE in address space AS.
3124 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3125 size of MEM_MODE / RATIO) is available. To make this determination, we
3126 look at the size of the increment to be made, which is given in CSTEP.
3127 CSTEP may be zero if the step is unknown.
3128 STMT_AFTER_INC is true iff the statement we're looking at is after the
3129 increment of the original biv.
3131 TODO -- there must be some better way. This all is quite crude. */
3133 typedef struct address_cost_data_s
3135 HOST_WIDE_INT min_offset
, max_offset
;
3136 unsigned costs
[2][2][2][2];
3137 } *address_cost_data
;
3141 get_address_cost (bool symbol_present
, bool var_present
,
3142 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3143 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3144 addr_space_t as
, bool speed
,
3145 bool stmt_after_inc
, bool *may_autoinc
)
3147 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3148 static vec
<address_cost_data
> address_cost_data_list
;
3149 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3150 address_cost_data data
;
3151 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3152 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3153 unsigned cost
, acost
, complexity
;
3154 bool offset_p
, ratio_p
, autoinc
;
3155 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3156 unsigned HOST_WIDE_INT mask
;
3159 if (data_index
>= address_cost_data_list
.length ())
3160 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3162 data
= address_cost_data_list
[data_index
];
3166 HOST_WIDE_INT rat
, off
= 0;
3167 int old_cse_not_expected
, width
;
3168 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3169 rtx seq
, addr
, base
;
3172 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3174 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3176 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3177 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3178 width
= HOST_BITS_PER_WIDE_INT
- 1;
3179 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3181 for (i
= width
; i
>= 0; i
--)
3183 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3184 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3185 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3188 data
->min_offset
= (i
== -1? 0 : off
);
3190 for (i
= width
; i
>= 0; i
--)
3192 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3193 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3194 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3199 data
->max_offset
= off
;
3201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3203 fprintf (dump_file
, "get_address_cost:\n");
3204 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3205 GET_MODE_NAME (mem_mode
),
3207 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3208 GET_MODE_NAME (mem_mode
),
3213 for (i
= 2; i
<= MAX_RATIO
; i
++)
3214 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3220 /* Compute the cost of various addressing modes. */
3222 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3223 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3225 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3226 || USE_STORE_PRE_DECREMENT (mem_mode
))
3228 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3229 has_predec
[mem_mode
]
3230 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3232 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3233 || USE_STORE_POST_DECREMENT (mem_mode
))
3235 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3236 has_postdec
[mem_mode
]
3237 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3239 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3240 || USE_STORE_PRE_DECREMENT (mem_mode
))
3242 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3243 has_preinc
[mem_mode
]
3244 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3246 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3247 || USE_STORE_POST_INCREMENT (mem_mode
))
3249 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3250 has_postinc
[mem_mode
]
3251 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3253 for (i
= 0; i
< 16; i
++)
3256 var_p
= (i
>> 1) & 1;
3257 off_p
= (i
>> 2) & 1;
3258 rat_p
= (i
>> 3) & 1;
3262 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3263 gen_int_mode (rat
, address_mode
));
3266 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3270 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3271 /* ??? We can run into trouble with some backends by presenting
3272 it with symbols which haven't been properly passed through
3273 targetm.encode_section_info. By setting the local bit, we
3274 enhance the probability of things working. */
3275 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3278 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3280 (PLUS
, address_mode
, base
,
3281 gen_int_mode (off
, address_mode
)));
3284 base
= gen_int_mode (off
, address_mode
);
3289 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3292 /* To avoid splitting addressing modes, pretend that no cse will
3294 old_cse_not_expected
= cse_not_expected
;
3295 cse_not_expected
= true;
3296 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3297 cse_not_expected
= old_cse_not_expected
;
3301 acost
= seq_cost (seq
, speed
);
3302 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3306 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3309 /* On some targets, it is quite expensive to load symbol to a register,
3310 which makes addresses that contain symbols look much more expensive.
3311 However, the symbol will have to be loaded in any case before the
3312 loop (and quite likely we have it in register already), so it does not
3313 make much sense to penalize them too heavily. So make some final
3314 tweaks for the SYMBOL_PRESENT modes:
3316 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3317 var is cheaper, use this mode with small penalty.
3318 If VAR_PRESENT is true, try whether the mode with
3319 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3320 if this is the case, use it. */
3321 add_c
= add_cost (speed
, address_mode
);
3322 for (i
= 0; i
< 8; i
++)
3325 off_p
= (i
>> 1) & 1;
3326 rat_p
= (i
>> 2) & 1;
3328 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3332 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3333 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3336 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3338 fprintf (dump_file
, "Address costs:\n");
3340 for (i
= 0; i
< 16; i
++)
3343 var_p
= (i
>> 1) & 1;
3344 off_p
= (i
>> 2) & 1;
3345 rat_p
= (i
>> 3) & 1;
3347 fprintf (dump_file
, " ");
3349 fprintf (dump_file
, "sym + ");
3351 fprintf (dump_file
, "var + ");
3353 fprintf (dump_file
, "cst + ");
3355 fprintf (dump_file
, "rat * ");
3357 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3358 fprintf (dump_file
, "index costs %d\n", acost
);
3360 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3361 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3362 fprintf (dump_file
, " May include autoinc/dec\n");
3363 fprintf (dump_file
, "\n");
3366 address_cost_data_list
[data_index
] = data
;
3369 bits
= GET_MODE_BITSIZE (address_mode
);
3370 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3372 if ((offset
>> (bits
- 1) & 1))
3377 msize
= GET_MODE_SIZE (mem_mode
);
3378 autoinc_offset
= offset
;
3380 autoinc_offset
+= ratio
* cstep
;
3381 if (symbol_present
|| var_present
|| ratio
!= 1)
3383 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3385 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3387 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3389 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3390 && msize
== -cstep
))
3394 offset_p
= (s_offset
!= 0
3395 && data
->min_offset
<= s_offset
3396 && s_offset
<= data
->max_offset
);
3397 ratio_p
= (ratio
!= 1
3398 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3400 if (ratio
!= 1 && !ratio_p
)
3401 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3403 if (s_offset
&& !offset_p
&& !symbol_present
)
3404 cost
+= add_cost (speed
, address_mode
);
3407 *may_autoinc
= autoinc
;
3408 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3409 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3410 return new_cost (cost
+ acost
, complexity
);
3413 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3414 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3415 calculating the operands of EXPR. Returns true if successful, and returns
3416 the cost in COST. */
3419 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3420 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3423 tree op1
= TREE_OPERAND (expr
, 1);
3424 tree cst
= TREE_OPERAND (mult
, 1);
3425 tree multop
= TREE_OPERAND (mult
, 0);
3426 int m
= exact_log2 (int_cst_value (cst
));
3427 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3430 if (!(m
>= 0 && m
< maxm
))
3433 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3434 ? shiftadd_cost (speed
, mode
, m
)
3436 ? shiftsub1_cost (speed
, mode
, m
)
3437 : shiftsub0_cost (speed
, mode
, m
)));
3438 res
= new_cost (sa_cost
, 0);
3439 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3441 STRIP_NOPS (multop
);
3442 if (!is_gimple_val (multop
))
3443 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3449 /* Estimates cost of forcing expression EXPR into a variable. */
3452 force_expr_to_var_cost (tree expr
, bool speed
)
3454 static bool costs_initialized
= false;
3455 static unsigned integer_cost
[2];
3456 static unsigned symbol_cost
[2];
3457 static unsigned address_cost
[2];
3459 comp_cost cost0
, cost1
, cost
;
3460 enum machine_mode mode
;
3462 if (!costs_initialized
)
3464 tree type
= build_pointer_type (integer_type_node
);
3469 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3470 TREE_STATIC (var
) = 1;
3471 x
= produce_memory_decl_rtl (var
, NULL
);
3472 SET_DECL_RTL (var
, x
);
3474 addr
= build1 (ADDR_EXPR
, type
, var
);
3477 for (i
= 0; i
< 2; i
++)
3479 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3482 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3485 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3486 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3488 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3489 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3490 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3491 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3492 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3493 fprintf (dump_file
, "\n");
3497 costs_initialized
= true;
3502 if (SSA_VAR_P (expr
))
3505 if (is_gimple_min_invariant (expr
))
3507 if (TREE_CODE (expr
) == INTEGER_CST
)
3508 return new_cost (integer_cost
[speed
], 0);
3510 if (TREE_CODE (expr
) == ADDR_EXPR
)
3512 tree obj
= TREE_OPERAND (expr
, 0);
3514 if (TREE_CODE (obj
) == VAR_DECL
3515 || TREE_CODE (obj
) == PARM_DECL
3516 || TREE_CODE (obj
) == RESULT_DECL
)
3517 return new_cost (symbol_cost
[speed
], 0);
3520 return new_cost (address_cost
[speed
], 0);
3523 switch (TREE_CODE (expr
))
3525 case POINTER_PLUS_EXPR
:
3529 op0
= TREE_OPERAND (expr
, 0);
3530 op1
= TREE_OPERAND (expr
, 1);
3534 if (is_gimple_val (op0
))
3537 cost0
= force_expr_to_var_cost (op0
, speed
);
3539 if (is_gimple_val (op1
))
3542 cost1
= force_expr_to_var_cost (op1
, speed
);
3547 op0
= TREE_OPERAND (expr
, 0);
3551 if (is_gimple_val (op0
))
3554 cost0
= force_expr_to_var_cost (op0
, speed
);
3560 /* Just an arbitrary value, FIXME. */
3561 return new_cost (target_spill_cost
[speed
], 0);
3564 mode
= TYPE_MODE (TREE_TYPE (expr
));
3565 switch (TREE_CODE (expr
))
3567 case POINTER_PLUS_EXPR
:
3571 cost
= new_cost (add_cost (speed
, mode
), 0);
3572 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3574 tree mult
= NULL_TREE
;
3576 if (TREE_CODE (op1
) == MULT_EXPR
)
3578 else if (TREE_CODE (op0
) == MULT_EXPR
)
3581 if (mult
!= NULL_TREE
3582 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3583 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3590 if (cst_and_fits_in_hwi (op0
))
3591 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3593 else if (cst_and_fits_in_hwi (op1
))
3594 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3597 return new_cost (target_spill_cost
[speed
], 0);
3604 cost
= add_costs (cost
, cost0
);
3605 cost
= add_costs (cost
, cost1
);
3607 /* Bound the cost by target_spill_cost. The parts of complicated
3608 computations often are either loop invariant or at least can
3609 be shared between several iv uses, so letting this grow without
3610 limits would not give reasonable results. */
3611 if (cost
.cost
> (int) target_spill_cost
[speed
])
3612 cost
.cost
= target_spill_cost
[speed
];
3617 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3618 invariants the computation depends on. */
3621 force_var_cost (struct ivopts_data
*data
,
3622 tree expr
, bitmap
*depends_on
)
3626 fd_ivopts_data
= data
;
3627 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3630 return force_expr_to_var_cost (expr
, data
->speed
);
3633 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3634 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3635 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3636 invariants the computation depends on. */
3639 split_address_cost (struct ivopts_data
*data
,
3640 tree addr
, bool *symbol_present
, bool *var_present
,
3641 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3644 HOST_WIDE_INT bitsize
;
3645 HOST_WIDE_INT bitpos
;
3647 enum machine_mode mode
;
3648 int unsignedp
, volatilep
;
3650 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3651 &unsignedp
, &volatilep
, false);
3654 || bitpos
% BITS_PER_UNIT
!= 0
3655 || TREE_CODE (core
) != VAR_DECL
)
3657 *symbol_present
= false;
3658 *var_present
= true;
3659 fd_ivopts_data
= data
;
3660 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3661 return new_cost (target_spill_cost
[data
->speed
], 0);
3664 *offset
+= bitpos
/ BITS_PER_UNIT
;
3665 if (TREE_STATIC (core
)
3666 || DECL_EXTERNAL (core
))
3668 *symbol_present
= true;
3669 *var_present
= false;
3673 *symbol_present
= false;
3674 *var_present
= true;
3678 /* Estimates cost of expressing difference of addresses E1 - E2 as
3679 var + symbol + offset. The value of offset is added to OFFSET,
3680 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3681 part is missing. DEPENDS_ON is a set of the invariants the computation
3685 ptr_difference_cost (struct ivopts_data
*data
,
3686 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3687 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3689 HOST_WIDE_INT diff
= 0;
3690 aff_tree aff_e1
, aff_e2
;
3693 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3695 if (ptr_difference_const (e1
, e2
, &diff
))
3698 *symbol_present
= false;
3699 *var_present
= false;
3703 if (integer_zerop (e2
))
3704 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3705 symbol_present
, var_present
, offset
, depends_on
);
3707 *symbol_present
= false;
3708 *var_present
= true;
3710 type
= signed_type_for (TREE_TYPE (e1
));
3711 tree_to_aff_combination (e1
, type
, &aff_e1
);
3712 tree_to_aff_combination (e2
, type
, &aff_e2
);
3713 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3714 aff_combination_add (&aff_e1
, &aff_e2
);
3716 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3719 /* Estimates cost of expressing difference E1 - E2 as
3720 var + symbol + offset. The value of offset is added to OFFSET,
3721 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3722 part is missing. DEPENDS_ON is a set of the invariants the computation
3726 difference_cost (struct ivopts_data
*data
,
3727 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3728 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3730 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3731 unsigned HOST_WIDE_INT off1
, off2
;
3732 aff_tree aff_e1
, aff_e2
;
3735 e1
= strip_offset (e1
, &off1
);
3736 e2
= strip_offset (e2
, &off2
);
3737 *offset
+= off1
- off2
;
3742 if (TREE_CODE (e1
) == ADDR_EXPR
)
3743 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3744 offset
, depends_on
);
3745 *symbol_present
= false;
3747 if (operand_equal_p (e1
, e2
, 0))
3749 *var_present
= false;
3753 *var_present
= true;
3755 if (integer_zerop (e2
))
3756 return force_var_cost (data
, e1
, depends_on
);
3758 if (integer_zerop (e1
))
3760 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3761 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3765 type
= signed_type_for (TREE_TYPE (e1
));
3766 tree_to_aff_combination (e1
, type
, &aff_e1
);
3767 tree_to_aff_combination (e2
, type
, &aff_e2
);
3768 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3769 aff_combination_add (&aff_e1
, &aff_e2
);
3771 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3774 /* Returns true if AFF1 and AFF2 are identical. */
3777 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3781 if (aff1
->n
!= aff2
->n
)
3784 for (i
= 0; i
< aff1
->n
; i
++)
3786 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3789 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3795 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3798 get_expr_id (struct ivopts_data
*data
, tree expr
)
3800 struct iv_inv_expr_ent ent
;
3801 struct iv_inv_expr_ent
**slot
;
3804 ent
.hash
= iterative_hash_expr (expr
, 0);
3805 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3810 *slot
= XNEW (struct iv_inv_expr_ent
);
3811 (*slot
)->expr
= expr
;
3812 (*slot
)->hash
= ent
.hash
;
3813 (*slot
)->id
= data
->inv_expr_id
++;
3817 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3818 requires a new compiler generated temporary. Returns -1 otherwise.
3819 ADDRESS_P is a flag indicating if the expression is for address
3823 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3824 tree cbase
, HOST_WIDE_INT ratio
,
3827 aff_tree ubase_aff
, cbase_aff
;
3835 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3836 && (TREE_CODE (cbase
) == INTEGER_CST
))
3839 /* Strips the constant part. */
3840 if (TREE_CODE (ubase
) == PLUS_EXPR
3841 || TREE_CODE (ubase
) == MINUS_EXPR
3842 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3844 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3845 ubase
= TREE_OPERAND (ubase
, 0);
3848 /* Strips the constant part. */
3849 if (TREE_CODE (cbase
) == PLUS_EXPR
3850 || TREE_CODE (cbase
) == MINUS_EXPR
3851 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3853 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3854 cbase
= TREE_OPERAND (cbase
, 0);
3859 if (((TREE_CODE (ubase
) == SSA_NAME
)
3860 || (TREE_CODE (ubase
) == ADDR_EXPR
3861 && is_gimple_min_invariant (ubase
)))
3862 && (TREE_CODE (cbase
) == INTEGER_CST
))
3865 if (((TREE_CODE (cbase
) == SSA_NAME
)
3866 || (TREE_CODE (cbase
) == ADDR_EXPR
3867 && is_gimple_min_invariant (cbase
)))
3868 && (TREE_CODE (ubase
) == INTEGER_CST
))
3874 if(operand_equal_p (ubase
, cbase
, 0))
3877 if (TREE_CODE (ubase
) == ADDR_EXPR
3878 && TREE_CODE (cbase
) == ADDR_EXPR
)
3882 usym
= TREE_OPERAND (ubase
, 0);
3883 csym
= TREE_OPERAND (cbase
, 0);
3884 if (TREE_CODE (usym
) == ARRAY_REF
)
3886 tree ind
= TREE_OPERAND (usym
, 1);
3887 if (TREE_CODE (ind
) == INTEGER_CST
3888 && host_integerp (ind
, 0)
3889 && TREE_INT_CST_LOW (ind
) == 0)
3890 usym
= TREE_OPERAND (usym
, 0);
3892 if (TREE_CODE (csym
) == ARRAY_REF
)
3894 tree ind
= TREE_OPERAND (csym
, 1);
3895 if (TREE_CODE (ind
) == INTEGER_CST
3896 && host_integerp (ind
, 0)
3897 && TREE_INT_CST_LOW (ind
) == 0)
3898 csym
= TREE_OPERAND (csym
, 0);
3900 if (operand_equal_p (usym
, csym
, 0))
3903 /* Now do more complex comparison */
3904 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3905 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3906 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3910 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3911 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3913 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3914 aff_combination_add (&ubase_aff
, &cbase_aff
);
3915 expr
= aff_combination_to_tree (&ubase_aff
);
3916 return get_expr_id (data
, expr
);
3921 /* Determines the cost of the computation by that USE is expressed
3922 from induction variable CAND. If ADDRESS_P is true, we just need
3923 to create an address from it, otherwise we want to get it into
3924 register. A set of invariants we depend on is stored in
3925 DEPENDS_ON. AT is the statement at that the value is computed.
3926 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3927 addressing is likely. */
3930 get_computation_cost_at (struct ivopts_data
*data
,
3931 struct iv_use
*use
, struct iv_cand
*cand
,
3932 bool address_p
, bitmap
*depends_on
, gimple at
,
3936 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3938 tree utype
= TREE_TYPE (ubase
), ctype
;
3939 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3940 HOST_WIDE_INT ratio
, aratio
;
3941 bool var_present
, symbol_present
, stmt_is_after_inc
;
3944 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3945 enum machine_mode mem_mode
= (address_p
3946 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
3951 /* Only consider real candidates. */
3953 return infinite_cost
;
3955 cbase
= cand
->iv
->base
;
3956 cstep
= cand
->iv
->step
;
3957 ctype
= TREE_TYPE (cbase
);
3959 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3961 /* We do not have a precision to express the values of use. */
3962 return infinite_cost
;
3966 || (use
->iv
->base_object
3967 && cand
->iv
->base_object
3968 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
3969 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
3971 /* Do not try to express address of an object with computation based
3972 on address of a different object. This may cause problems in rtl
3973 level alias analysis (that does not expect this to be happening,
3974 as this is illegal in C), and would be unlikely to be useful
3976 if (use
->iv
->base_object
3977 && cand
->iv
->base_object
3978 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3979 return infinite_cost
;
3982 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3984 /* TODO -- add direct handling of this case. */
3988 /* CSTEPI is removed from the offset in case statement is after the
3989 increment. If the step is not constant, we use zero instead.
3990 This is a bit imprecise (there is the extra addition), but
3991 redundancy elimination is likely to transform the code so that
3992 it uses value of the variable before increment anyway,
3993 so it is not that much unrealistic. */
3994 if (cst_and_fits_in_hwi (cstep
))
3995 cstepi
= int_cst_value (cstep
);
3999 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4000 return infinite_cost
;
4002 if (rat
.fits_shwi ())
4003 ratio
= rat
.to_shwi ();
4005 return infinite_cost
;
4008 ctype
= TREE_TYPE (cbase
);
4010 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4012 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4013 or ratio == 1, it is better to handle this like
4015 ubase - ratio * cbase + ratio * var
4017 (also holds in the case ratio == -1, TODO. */
4019 if (cst_and_fits_in_hwi (cbase
))
4021 offset
= - ratio
* int_cst_value (cbase
);
4022 cost
= difference_cost (data
,
4023 ubase
, build_int_cst (utype
, 0),
4024 &symbol_present
, &var_present
, &offset
,
4026 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4028 else if (ratio
== 1)
4030 tree real_cbase
= cbase
;
4032 /* Check to see if any adjustment is needed. */
4033 if (cstepi
== 0 && stmt_is_after_inc
)
4035 aff_tree real_cbase_aff
;
4038 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4040 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4042 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4043 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4046 cost
= difference_cost (data
,
4048 &symbol_present
, &var_present
, &offset
,
4050 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4053 && !POINTER_TYPE_P (ctype
)
4054 && multiplier_allowed_in_address_p
4056 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4059 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4060 cost
= difference_cost (data
,
4062 &symbol_present
, &var_present
, &offset
,
4064 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4068 cost
= force_var_cost (data
, cbase
, depends_on
);
4069 cost
= add_costs (cost
,
4070 difference_cost (data
,
4071 ubase
, build_int_cst (utype
, 0),
4072 &symbol_present
, &var_present
,
4073 &offset
, depends_on
));
4074 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4075 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4081 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4082 /* Clear depends on. */
4083 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4084 bitmap_clear (*depends_on
);
4087 /* If we are after the increment, the value of the candidate is higher by
4089 if (stmt_is_after_inc
)
4090 offset
-= ratio
* cstepi
;
4092 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4093 (symbol/var1/const parts may be omitted). If we are looking for an
4094 address, find the cost of addressing this. */
4096 return add_costs (cost
,
4097 get_address_cost (symbol_present
, var_present
,
4098 offset
, ratio
, cstepi
,
4100 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4101 speed
, stmt_is_after_inc
,
4104 /* Otherwise estimate the costs for computing the expression. */
4105 if (!symbol_present
&& !var_present
&& !offset
)
4108 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4112 /* Symbol + offset should be compile-time computable so consider that they
4113 are added once to the variable, if present. */
4114 if (var_present
&& (symbol_present
|| offset
))
4115 cost
.cost
+= adjust_setup_cost (data
,
4116 add_cost (speed
, TYPE_MODE (ctype
)));
4118 /* Having offset does not affect runtime cost in case it is added to
4119 symbol, but it increases complexity. */
4123 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4125 aratio
= ratio
> 0 ? ratio
: -ratio
;
4127 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4132 *can_autoinc
= false;
4135 /* Just get the expression, expand it and measure the cost. */
4136 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4139 return infinite_cost
;
4142 comp
= build_simple_mem_ref (comp
);
4144 return new_cost (computation_cost (comp
, speed
), 0);
4148 /* Determines the cost of the computation by that USE is expressed
4149 from induction variable CAND. If ADDRESS_P is true, we just need
4150 to create an address from it, otherwise we want to get it into
4151 register. A set of invariants we depend on is stored in
4152 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4153 autoinc addressing is likely. */
4156 get_computation_cost (struct ivopts_data
*data
,
4157 struct iv_use
*use
, struct iv_cand
*cand
,
4158 bool address_p
, bitmap
*depends_on
,
4159 bool *can_autoinc
, int *inv_expr_id
)
4161 return get_computation_cost_at (data
,
4162 use
, cand
, address_p
, depends_on
, use
->stmt
,
4163 can_autoinc
, inv_expr_id
);
4166 /* Determines cost of basing replacement of USE on CAND in a generic
4170 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4171 struct iv_use
*use
, struct iv_cand
*cand
)
4175 int inv_expr_id
= -1;
4177 /* The simple case first -- if we need to express value of the preserved
4178 original biv, the cost is 0. This also prevents us from counting the
4179 cost of increment twice -- once at this use and once in the cost of
4181 if (cand
->pos
== IP_ORIGINAL
4182 && cand
->incremented_at
== use
->stmt
)
4184 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4189 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4190 NULL
, &inv_expr_id
);
4192 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4195 return !infinite_cost_p (cost
);
4198 /* Determines cost of basing replacement of USE on CAND in an address. */
4201 determine_use_iv_cost_address (struct ivopts_data
*data
,
4202 struct iv_use
*use
, struct iv_cand
*cand
)
4206 int inv_expr_id
= -1;
4207 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4208 &can_autoinc
, &inv_expr_id
);
4210 if (cand
->ainc_use
== use
)
4213 cost
.cost
-= cand
->cost_step
;
4214 /* If we generated the candidate solely for exploiting autoincrement
4215 opportunities, and it turns out it can't be used, set the cost to
4216 infinity to make sure we ignore it. */
4217 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4218 cost
= infinite_cost
;
4220 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4223 return !infinite_cost_p (cost
);
4226 /* Computes value of candidate CAND at position AT in iteration NITER, and
4227 stores it to VAL. */
4230 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4233 aff_tree step
, delta
, nit
;
4234 struct iv
*iv
= cand
->iv
;
4235 tree type
= TREE_TYPE (iv
->base
);
4236 tree steptype
= type
;
4237 if (POINTER_TYPE_P (type
))
4238 steptype
= sizetype
;
4240 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4241 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4242 aff_combination_convert (&nit
, steptype
);
4243 aff_combination_mult (&nit
, &step
, &delta
);
4244 if (stmt_after_increment (loop
, cand
, at
))
4245 aff_combination_add (&delta
, &step
);
4247 tree_to_aff_combination (iv
->base
, type
, val
);
4248 aff_combination_add (val
, &delta
);
4251 /* Returns period of induction variable iv. */
4254 iv_period (struct iv
*iv
)
4256 tree step
= iv
->step
, period
, type
;
4259 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4261 type
= unsigned_type_for (TREE_TYPE (step
));
4262 /* Period of the iv is lcm (step, type_range)/step -1,
4263 i.e., N*type_range/step - 1. Since type range is power
4264 of two, N == (step >> num_of_ending_zeros_binary (step),
4265 so the final result is
4267 (type_range >> num_of_ending_zeros_binary (step)) - 1
4270 pow2div
= num_ending_zeros (step
);
4272 period
= build_low_bits_mask (type
,
4273 (TYPE_PRECISION (type
)
4274 - tree_low_cst (pow2div
, 1)));
4279 /* Returns the comparison operator used when eliminating the iv USE. */
4281 static enum tree_code
4282 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4284 struct loop
*loop
= data
->current_loop
;
4288 ex_bb
= gimple_bb (use
->stmt
);
4289 exit
= EDGE_SUCC (ex_bb
, 0);
4290 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4291 exit
= EDGE_SUCC (ex_bb
, 1);
4293 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4297 strip_wrap_conserving_type_conversions (tree exp
)
4299 while (tree_ssa_useless_type_conversion (exp
)
4300 && (nowrap_type_p (TREE_TYPE (exp
))
4301 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4302 exp
= TREE_OPERAND (exp
, 0);
4306 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4307 check for an exact match. */
4310 expr_equal_p (tree e
, tree what
)
4313 enum tree_code code
;
4315 e
= strip_wrap_conserving_type_conversions (e
);
4316 what
= strip_wrap_conserving_type_conversions (what
);
4318 code
= TREE_CODE (what
);
4319 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4322 if (operand_equal_p (e
, what
, 0))
4325 if (TREE_CODE (e
) != SSA_NAME
)
4328 stmt
= SSA_NAME_DEF_STMT (e
);
4329 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4330 || gimple_assign_rhs_code (stmt
) != code
)
4333 switch (get_gimple_rhs_class (code
))
4335 case GIMPLE_BINARY_RHS
:
4336 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4340 case GIMPLE_UNARY_RHS
:
4341 case GIMPLE_SINGLE_RHS
:
4342 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4348 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4349 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4350 calculation is performed in non-wrapping type.
4352 TODO: More generally, we could test for the situation that
4353 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4354 This would require knowing the sign of OFFSET.
4356 Also, we only look for the first addition in the computation of BASE.
4357 More complex analysis would be better, but introducing it just for
4358 this optimization seems like an overkill. */
4361 difference_cannot_overflow_p (tree base
, tree offset
)
4363 enum tree_code code
;
4366 if (!nowrap_type_p (TREE_TYPE (base
)))
4369 base
= expand_simple_operations (base
);
4371 if (TREE_CODE (base
) == SSA_NAME
)
4373 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4375 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4378 code
= gimple_assign_rhs_code (stmt
);
4379 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4382 e1
= gimple_assign_rhs1 (stmt
);
4383 e2
= gimple_assign_rhs2 (stmt
);
4387 code
= TREE_CODE (base
);
4388 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4390 e1
= TREE_OPERAND (base
, 0);
4391 e2
= TREE_OPERAND (base
, 1);
4394 /* TODO: deeper inspection may be necessary to prove the equality. */
4398 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4399 case POINTER_PLUS_EXPR
:
4400 return expr_equal_p (e2
, offset
);
4407 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4408 comparison with CAND. NITER describes the number of iterations of
4409 the loops. If successful, the comparison in COMP_P is altered accordingly.
4411 We aim to handle the following situation:
4427 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4428 We aim to optimize this to
4436 while (p < p_0 - a + b);
4438 This preserves the correctness, since the pointer arithmetics does not
4439 overflow. More precisely:
4441 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4442 overflow in computing it or the values of p.
4443 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4444 overflow. To prove this, we use the fact that p_0 = base + a. */
4447 iv_elimination_compare_lt (struct ivopts_data
*data
,
4448 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4449 struct tree_niter_desc
*niter
)
4451 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4452 struct affine_tree_combination nit
, tmpa
, tmpb
;
4453 enum tree_code comp
;
4456 /* We need to know that the candidate induction variable does not overflow.
4457 While more complex analysis may be used to prove this, for now just
4458 check that the variable appears in the original program and that it
4459 is computed in a type that guarantees no overflows. */
4460 cand_type
= TREE_TYPE (cand
->iv
->base
);
4461 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4464 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4465 the calculation of the BOUND could overflow, making the comparison
4467 if (!data
->loop_single_exit_p
)
4470 /* We need to be able to decide whether candidate is increasing or decreasing
4471 in order to choose the right comparison operator. */
4472 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4474 step
= int_cst_value (cand
->iv
->step
);
4476 /* Check that the number of iterations matches the expected pattern:
4477 a + 1 > b ? 0 : b - a - 1. */
4478 mbz
= niter
->may_be_zero
;
4479 if (TREE_CODE (mbz
) == GT_EXPR
)
4481 /* Handle a + 1 > b. */
4482 tree op0
= TREE_OPERAND (mbz
, 0);
4483 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4485 a
= TREE_OPERAND (op0
, 0);
4486 b
= TREE_OPERAND (mbz
, 1);
4491 else if (TREE_CODE (mbz
) == LT_EXPR
)
4493 tree op1
= TREE_OPERAND (mbz
, 1);
4495 /* Handle b < a + 1. */
4496 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4498 a
= TREE_OPERAND (op1
, 0);
4499 b
= TREE_OPERAND (mbz
, 0);
4507 /* Expected number of iterations is B - A - 1. Check that it matches
4508 the actual number, i.e., that B - A - NITER = 1. */
4509 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4510 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4511 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4512 aff_combination_scale (&nit
, double_int_minus_one
);
4513 aff_combination_scale (&tmpa
, double_int_minus_one
);
4514 aff_combination_add (&tmpb
, &tmpa
);
4515 aff_combination_add (&tmpb
, &nit
);
4516 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4519 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4521 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4523 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4524 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4527 /* Determine the new comparison operator. */
4528 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4529 if (*comp_p
== NE_EXPR
)
4531 else if (*comp_p
== EQ_EXPR
)
4532 *comp_p
= invert_tree_comparison (comp
, false);
4539 /* Check whether it is possible to express the condition in USE by comparison
4540 of candidate CAND. If so, store the value compared with to BOUND, and the
4541 comparison operator to COMP. */
4544 may_eliminate_iv (struct ivopts_data
*data
,
4545 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4546 enum tree_code
*comp
)
4551 struct loop
*loop
= data
->current_loop
;
4553 struct tree_niter_desc
*desc
= NULL
;
4555 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4558 /* For now works only for exits that dominate the loop latch.
4559 TODO: extend to other conditions inside loop body. */
4560 ex_bb
= gimple_bb (use
->stmt
);
4561 if (use
->stmt
!= last_stmt (ex_bb
)
4562 || gimple_code (use
->stmt
) != GIMPLE_COND
4563 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4566 exit
= EDGE_SUCC (ex_bb
, 0);
4567 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4568 exit
= EDGE_SUCC (ex_bb
, 1);
4569 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4572 desc
= niter_for_exit (data
, exit
);
4576 /* Determine whether we can use the variable to test the exit condition.
4577 This is the case iff the period of the induction variable is greater
4578 than the number of iterations for which the exit condition is true. */
4579 period
= iv_period (cand
->iv
);
4581 /* If the number of iterations is constant, compare against it directly. */
4582 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4584 /* See cand_value_at. */
4585 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4587 if (!tree_int_cst_lt (desc
->niter
, period
))
4592 if (tree_int_cst_lt (period
, desc
->niter
))
4597 /* If not, and if this is the only possible exit of the loop, see whether
4598 we can get a conservative estimate on the number of iterations of the
4599 entire loop and compare against that instead. */
4602 double_int period_value
, max_niter
;
4604 max_niter
= desc
->max
;
4605 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4606 max_niter
+= double_int_one
;
4607 period_value
= tree_to_double_int (period
);
4608 if (max_niter
.ugt (period_value
))
4610 /* See if we can take advantage of inferred loop bound information. */
4611 if (data
->loop_single_exit_p
)
4613 if (!max_loop_iterations (loop
, &max_niter
))
4615 /* The loop bound is already adjusted by adding 1. */
4616 if (max_niter
.ugt (period_value
))
4624 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4626 *bound
= aff_combination_to_tree (&bnd
);
4627 *comp
= iv_elimination_compare (data
, use
);
4629 /* It is unlikely that computing the number of iterations using division
4630 would be more profitable than keeping the original induction variable. */
4631 if (expression_expensive_p (*bound
))
4634 /* Sometimes, it is possible to handle the situation that the number of
4635 iterations may be zero unless additional assumtions by using <
4636 instead of != in the exit condition.
4638 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4639 base the exit condition on it. However, that is often too
4641 if (!integer_zerop (desc
->may_be_zero
))
4642 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4647 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4648 be copied, if is is used in the loop body and DATA->body_includes_call. */
4651 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4653 tree sbound
= bound
;
4654 STRIP_NOPS (sbound
);
4656 if (TREE_CODE (sbound
) == SSA_NAME
4657 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4658 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4659 && data
->body_includes_call
)
4660 return COSTS_N_INSNS (1);
4665 /* Determines cost of basing replacement of USE on CAND in a condition. */
4668 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4669 struct iv_use
*use
, struct iv_cand
*cand
)
4671 tree bound
= NULL_TREE
;
4673 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4674 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4676 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4677 tree
*control_var
, *bound_cst
;
4678 enum tree_code comp
= ERROR_MARK
;
4680 /* Only consider real candidates. */
4683 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4688 /* Try iv elimination. */
4689 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4691 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4692 if (elim_cost
.cost
== 0)
4693 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4694 else if (TREE_CODE (bound
) == INTEGER_CST
)
4696 /* If we replace a loop condition 'i < n' with 'p < base + n',
4697 depends_on_elim will have 'base' and 'n' set, which implies
4698 that both 'base' and 'n' will be live during the loop. More likely,
4699 'base + n' will be loop invariant, resulting in only one live value
4700 during the loop. So in that case we clear depends_on_elim and set
4701 elim_inv_expr_id instead. */
4702 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4704 elim_inv_expr_id
= get_expr_id (data
, bound
);
4705 bitmap_clear (depends_on_elim
);
4707 /* The bound is a loop invariant, so it will be only computed
4709 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4712 elim_cost
= infinite_cost
;
4714 /* Try expressing the original giv. If it is compared with an invariant,
4715 note that we cannot get rid of it. */
4716 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4720 /* When the condition is a comparison of the candidate IV against
4721 zero, prefer this IV.
4723 TODO: The constant that we're subtracting from the cost should
4724 be target-dependent. This information should be added to the
4725 target costs for each backend. */
4726 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4727 && integer_zerop (*bound_cst
)
4728 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4729 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4730 elim_cost
.cost
-= 1;
4732 express_cost
= get_computation_cost (data
, use
, cand
, false,
4733 &depends_on_express
, NULL
,
4734 &express_inv_expr_id
);
4735 fd_ivopts_data
= data
;
4736 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4738 /* Count the cost of the original bound as well. */
4739 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4740 if (bound_cost
.cost
== 0)
4741 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4742 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4743 bound_cost
.cost
= 0;
4744 express_cost
.cost
+= bound_cost
.cost
;
4746 /* Choose the better approach, preferring the eliminated IV. */
4747 if (compare_costs (elim_cost
, express_cost
) <= 0)
4750 depends_on
= depends_on_elim
;
4751 depends_on_elim
= NULL
;
4752 inv_expr_id
= elim_inv_expr_id
;
4756 cost
= express_cost
;
4757 depends_on
= depends_on_express
;
4758 depends_on_express
= NULL
;
4761 inv_expr_id
= express_inv_expr_id
;
4764 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4766 if (depends_on_elim
)
4767 BITMAP_FREE (depends_on_elim
);
4768 if (depends_on_express
)
4769 BITMAP_FREE (depends_on_express
);
4771 return !infinite_cost_p (cost
);
4774 /* Determines cost of basing replacement of USE on CAND. Returns false
4775 if USE cannot be based on CAND. */
4778 determine_use_iv_cost (struct ivopts_data
*data
,
4779 struct iv_use
*use
, struct iv_cand
*cand
)
4783 case USE_NONLINEAR_EXPR
:
4784 return determine_use_iv_cost_generic (data
, use
, cand
);
4787 return determine_use_iv_cost_address (data
, use
, cand
);
4790 return determine_use_iv_cost_condition (data
, use
, cand
);
4797 /* Return true if get_computation_cost indicates that autoincrement is
4798 a possibility for the pair of USE and CAND, false otherwise. */
4801 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4802 struct iv_cand
*cand
)
4808 if (use
->type
!= USE_ADDRESS
)
4811 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4812 &can_autoinc
, NULL
);
4814 BITMAP_FREE (depends_on
);
4816 return !infinite_cost_p (cost
) && can_autoinc
;
4819 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4820 use that allows autoincrement, and set their AINC_USE if possible. */
4823 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4827 for (i
= 0; i
< n_iv_cands (data
); i
++)
4829 struct iv_cand
*cand
= iv_cand (data
, i
);
4830 struct iv_use
*closest
= NULL
;
4831 if (cand
->pos
!= IP_ORIGINAL
)
4833 for (j
= 0; j
< n_iv_uses (data
); j
++)
4835 struct iv_use
*use
= iv_use (data
, j
);
4836 unsigned uid
= gimple_uid (use
->stmt
);
4837 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4838 || uid
> gimple_uid (cand
->incremented_at
))
4840 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4843 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4845 cand
->ainc_use
= closest
;
4849 /* Finds the candidates for the induction variables. */
4852 find_iv_candidates (struct ivopts_data
*data
)
4854 /* Add commonly used ivs. */
4855 add_standard_iv_candidates (data
);
4857 /* Add old induction variables. */
4858 add_old_ivs_candidates (data
);
4860 /* Add induction variables derived from uses. */
4861 add_derived_ivs_candidates (data
);
4863 set_autoinc_for_original_candidates (data
);
4865 /* Record the important candidates. */
4866 record_important_candidates (data
);
4869 /* Determines costs of basing the use of the iv on an iv candidate. */
4872 determine_use_iv_costs (struct ivopts_data
*data
)
4876 struct iv_cand
*cand
;
4877 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4879 alloc_use_cost_map (data
);
4881 for (i
= 0; i
< n_iv_uses (data
); i
++)
4883 use
= iv_use (data
, i
);
4885 if (data
->consider_all_candidates
)
4887 for (j
= 0; j
< n_iv_cands (data
); j
++)
4889 cand
= iv_cand (data
, j
);
4890 determine_use_iv_cost (data
, use
, cand
);
4897 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4899 cand
= iv_cand (data
, j
);
4900 if (!determine_use_iv_cost (data
, use
, cand
))
4901 bitmap_set_bit (to_clear
, j
);
4904 /* Remove the candidates for that the cost is infinite from
4905 the list of related candidates. */
4906 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4907 bitmap_clear (to_clear
);
4911 BITMAP_FREE (to_clear
);
4913 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4915 fprintf (dump_file
, "Use-candidate costs:\n");
4917 for (i
= 0; i
< n_iv_uses (data
); i
++)
4919 use
= iv_use (data
, i
);
4921 fprintf (dump_file
, "Use %d:\n", i
);
4922 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4923 for (j
= 0; j
< use
->n_map_members
; j
++)
4925 if (!use
->cost_map
[j
].cand
4926 || infinite_cost_p (use
->cost_map
[j
].cost
))
4929 fprintf (dump_file
, " %d\t%d\t%d\t",
4930 use
->cost_map
[j
].cand
->id
,
4931 use
->cost_map
[j
].cost
.cost
,
4932 use
->cost_map
[j
].cost
.complexity
);
4933 if (use
->cost_map
[j
].depends_on
)
4934 bitmap_print (dump_file
,
4935 use
->cost_map
[j
].depends_on
, "","");
4936 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4937 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4938 fprintf (dump_file
, "\n");
4941 fprintf (dump_file
, "\n");
4943 fprintf (dump_file
, "\n");
4947 /* Determines cost of the candidate CAND. */
4950 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4952 comp_cost cost_base
;
4953 unsigned cost
, cost_step
;
4962 /* There are two costs associated with the candidate -- its increment
4963 and its initialization. The second is almost negligible for any loop
4964 that rolls enough, so we take it just very little into account. */
4966 base
= cand
->iv
->base
;
4967 cost_base
= force_var_cost (data
, base
, NULL
);
4968 /* It will be exceptional that the iv register happens to be initialized with
4969 the proper value at no cost. In general, there will at least be a regcopy
4971 if (cost_base
.cost
== 0)
4972 cost_base
.cost
= COSTS_N_INSNS (1);
4973 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
4975 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4977 /* Prefer the original ivs unless we may gain something by replacing it.
4978 The reason is to make debugging simpler; so this is not relevant for
4979 artificial ivs created by other optimization passes. */
4980 if (cand
->pos
!= IP_ORIGINAL
4981 || !SSA_NAME_VAR (cand
->var_before
)
4982 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4985 /* Prefer not to insert statements into latch unless there are some
4986 already (so that we do not create unnecessary jumps). */
4987 if (cand
->pos
== IP_END
4988 && empty_block_p (ip_end_pos (data
->current_loop
)))
4992 cand
->cost_step
= cost_step
;
4995 /* Determines costs of computation of the candidates. */
4998 determine_iv_costs (struct ivopts_data
*data
)
5002 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5004 fprintf (dump_file
, "Candidate costs:\n");
5005 fprintf (dump_file
, " cand\tcost\n");
5008 for (i
= 0; i
< n_iv_cands (data
); i
++)
5010 struct iv_cand
*cand
= iv_cand (data
, i
);
5012 determine_iv_cost (data
, cand
);
5014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5015 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5019 fprintf (dump_file
, "\n");
5022 /* Calculates cost for having SIZE induction variables. */
5025 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5027 /* We add size to the cost, so that we prefer eliminating ivs
5029 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5030 data
->body_includes_call
);
5033 /* For each size of the induction variable set determine the penalty. */
5036 determine_set_costs (struct ivopts_data
*data
)
5040 gimple_stmt_iterator psi
;
5042 struct loop
*loop
= data
->current_loop
;
5045 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5047 fprintf (dump_file
, "Global costs:\n");
5048 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5049 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5050 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5051 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5055 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5057 phi
= gsi_stmt (psi
);
5058 op
= PHI_RESULT (phi
);
5060 if (virtual_operand_p (op
))
5063 if (get_iv (data
, op
))
5069 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5071 struct version_info
*info
= ver_info (data
, j
);
5073 if (info
->inv_id
&& info
->has_nonlin_use
)
5077 data
->regs_used
= n
;
5078 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5079 fprintf (dump_file
, " regs_used %d\n", n
);
5081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5083 fprintf (dump_file
, " cost for size:\n");
5084 fprintf (dump_file
, " ivs\tcost\n");
5085 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5086 fprintf (dump_file
, " %d\t%d\n", j
,
5087 ivopts_global_cost_for_size (data
, j
));
5088 fprintf (dump_file
, "\n");
5092 /* Returns true if A is a cheaper cost pair than B. */
5095 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5105 cmp
= compare_costs (a
->cost
, b
->cost
);
5112 /* In case the costs are the same, prefer the cheaper candidate. */
5113 if (a
->cand
->cost
< b
->cand
->cost
)
5120 /* Returns candidate by that USE is expressed in IVS. */
5122 static struct cost_pair
*
5123 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5125 return ivs
->cand_for_use
[use
->id
];
5128 /* Computes the cost field of IVS structure. */
5131 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5133 comp_cost cost
= ivs
->cand_use_cost
;
5135 cost
.cost
+= ivs
->cand_cost
;
5137 cost
.cost
+= ivopts_global_cost_for_size (data
,
5138 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5143 /* Remove invariants in set INVS to set IVS. */
5146 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5154 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5156 ivs
->n_invariant_uses
[iid
]--;
5157 if (ivs
->n_invariant_uses
[iid
] == 0)
5162 /* Set USE not to be expressed by any candidate in IVS. */
5165 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5168 unsigned uid
= use
->id
, cid
;
5169 struct cost_pair
*cp
;
5171 cp
= ivs
->cand_for_use
[uid
];
5177 ivs
->cand_for_use
[uid
] = NULL
;
5178 ivs
->n_cand_uses
[cid
]--;
5180 if (ivs
->n_cand_uses
[cid
] == 0)
5182 bitmap_clear_bit (ivs
->cands
, cid
);
5183 /* Do not count the pseudocandidates. */
5187 ivs
->cand_cost
-= cp
->cand
->cost
;
5189 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5192 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5194 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5196 if (cp
->inv_expr_id
!= -1)
5198 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5199 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5200 ivs
->num_used_inv_expr
--;
5202 iv_ca_recount_cost (data
, ivs
);
5205 /* Add invariants in set INVS to set IVS. */
5208 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5216 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5218 ivs
->n_invariant_uses
[iid
]++;
5219 if (ivs
->n_invariant_uses
[iid
] == 1)
5224 /* Set cost pair for USE in set IVS to CP. */
5227 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5228 struct iv_use
*use
, struct cost_pair
*cp
)
5230 unsigned uid
= use
->id
, cid
;
5232 if (ivs
->cand_for_use
[uid
] == cp
)
5235 if (ivs
->cand_for_use
[uid
])
5236 iv_ca_set_no_cp (data
, ivs
, use
);
5243 ivs
->cand_for_use
[uid
] = cp
;
5244 ivs
->n_cand_uses
[cid
]++;
5245 if (ivs
->n_cand_uses
[cid
] == 1)
5247 bitmap_set_bit (ivs
->cands
, cid
);
5248 /* Do not count the pseudocandidates. */
5252 ivs
->cand_cost
+= cp
->cand
->cost
;
5254 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5257 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5258 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5260 if (cp
->inv_expr_id
!= -1)
5262 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5263 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5264 ivs
->num_used_inv_expr
++;
5266 iv_ca_recount_cost (data
, ivs
);
5270 /* Extend set IVS by expressing USE by some of the candidates in it
5271 if possible. All important candidates will be considered
5272 if IMPORTANT_CANDIDATES is true. */
5275 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5276 struct iv_use
*use
, bool important_candidates
)
5278 struct cost_pair
*best_cp
= NULL
, *cp
;
5283 gcc_assert (ivs
->upto
>= use
->id
);
5285 if (ivs
->upto
== use
->id
)
5291 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5292 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5294 struct iv_cand
*cand
= iv_cand (data
, i
);
5296 cp
= get_use_iv_cost (data
, use
, cand
);
5298 if (cheaper_cost_pair (cp
, best_cp
))
5302 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5305 /* Get cost for assignment IVS. */
5308 iv_ca_cost (struct iv_ca
*ivs
)
5310 /* This was a conditional expression but it triggered a bug in
5313 return infinite_cost
;
5318 /* Returns true if all dependences of CP are among invariants in IVS. */
5321 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5326 if (!cp
->depends_on
)
5329 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5331 if (ivs
->n_invariant_uses
[i
] == 0)
5338 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5339 it before NEXT_CHANGE. */
5341 static struct iv_ca_delta
*
5342 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5343 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5345 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5348 change
->old_cp
= old_cp
;
5349 change
->new_cp
= new_cp
;
5350 change
->next_change
= next_change
;
5355 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5358 static struct iv_ca_delta
*
5359 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5361 struct iv_ca_delta
*last
;
5369 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5371 last
->next_change
= l2
;
5376 /* Reverse the list of changes DELTA, forming the inverse to it. */
5378 static struct iv_ca_delta
*
5379 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5381 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5382 struct cost_pair
*tmp
;
5384 for (act
= delta
; act
; act
= next
)
5386 next
= act
->next_change
;
5387 act
->next_change
= prev
;
5391 act
->old_cp
= act
->new_cp
;
5398 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5399 reverted instead. */
5402 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5403 struct iv_ca_delta
*delta
, bool forward
)
5405 struct cost_pair
*from
, *to
;
5406 struct iv_ca_delta
*act
;
5409 delta
= iv_ca_delta_reverse (delta
);
5411 for (act
= delta
; act
; act
= act
->next_change
)
5415 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5416 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5420 iv_ca_delta_reverse (delta
);
5423 /* Returns true if CAND is used in IVS. */
5426 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5428 return ivs
->n_cand_uses
[cand
->id
] > 0;
5431 /* Returns number of induction variable candidates in the set IVS. */
5434 iv_ca_n_cands (struct iv_ca
*ivs
)
5436 return ivs
->n_cands
;
5439 /* Free the list of changes DELTA. */
5442 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5444 struct iv_ca_delta
*act
, *next
;
5446 for (act
= *delta
; act
; act
= next
)
5448 next
= act
->next_change
;
5455 /* Allocates new iv candidates assignment. */
5457 static struct iv_ca
*
5458 iv_ca_new (struct ivopts_data
*data
)
5460 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5464 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5465 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5466 nw
->cands
= BITMAP_ALLOC (NULL
);
5469 nw
->cand_use_cost
= no_cost
;
5471 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5473 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5474 nw
->num_used_inv_expr
= 0;
5479 /* Free memory occupied by the set IVS. */
5482 iv_ca_free (struct iv_ca
**ivs
)
5484 free ((*ivs
)->cand_for_use
);
5485 free ((*ivs
)->n_cand_uses
);
5486 BITMAP_FREE ((*ivs
)->cands
);
5487 free ((*ivs
)->n_invariant_uses
);
5488 free ((*ivs
)->used_inv_expr
);
5493 /* Dumps IVS to FILE. */
5496 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5498 const char *pref
= " invariants ";
5500 comp_cost cost
= iv_ca_cost (ivs
);
5502 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5503 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5504 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5505 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5507 for (i
= 0; i
< ivs
->upto
; i
++)
5509 struct iv_use
*use
= iv_use (data
, i
);
5510 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5512 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5513 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5515 fprintf (file
, " use:%d --> ??\n", use
->id
);
5518 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5519 if (ivs
->n_invariant_uses
[i
])
5521 fprintf (file
, "%s%d", pref
, i
);
5524 fprintf (file
, "\n\n");
5527 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5528 new set, and store differences in DELTA. Number of induction variables
5529 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5530 the function will try to find a solution with mimimal iv candidates. */
5533 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5534 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5535 unsigned *n_ivs
, bool min_ncand
)
5540 struct cost_pair
*old_cp
, *new_cp
;
5543 for (i
= 0; i
< ivs
->upto
; i
++)
5545 use
= iv_use (data
, i
);
5546 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5549 && old_cp
->cand
== cand
)
5552 new_cp
= get_use_iv_cost (data
, use
, cand
);
5556 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5559 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5562 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5565 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5566 cost
= iv_ca_cost (ivs
);
5568 *n_ivs
= iv_ca_n_cands (ivs
);
5569 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5574 /* Try narrowing set IVS by removing CAND. Return the cost of
5575 the new set and store the differences in DELTA. */
5578 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5579 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5583 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5585 struct iv_cand
*cnd
;
5589 for (i
= 0; i
< n_iv_uses (data
); i
++)
5591 use
= iv_use (data
, i
);
5593 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5594 if (old_cp
->cand
!= cand
)
5599 if (data
->consider_all_candidates
)
5601 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5606 cnd
= iv_cand (data
, ci
);
5608 cp
= get_use_iv_cost (data
, use
, cnd
);
5612 if (!iv_ca_has_deps (ivs
, cp
))
5615 if (!cheaper_cost_pair (cp
, new_cp
))
5623 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5628 cnd
= iv_cand (data
, ci
);
5630 cp
= get_use_iv_cost (data
, use
, cnd
);
5633 if (!iv_ca_has_deps (ivs
, cp
))
5636 if (!cheaper_cost_pair (cp
, new_cp
))
5645 iv_ca_delta_free (delta
);
5646 return infinite_cost
;
5649 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5652 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5653 cost
= iv_ca_cost (ivs
);
5654 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5659 /* Try optimizing the set of candidates IVS by removing candidates different
5660 from to EXCEPT_CAND from it. Return cost of the new set, and store
5661 differences in DELTA. */
5664 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5665 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5668 struct iv_ca_delta
*act_delta
, *best_delta
;
5670 comp_cost best_cost
, acost
;
5671 struct iv_cand
*cand
;
5674 best_cost
= iv_ca_cost (ivs
);
5676 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5678 cand
= iv_cand (data
, i
);
5680 if (cand
== except_cand
)
5683 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5685 if (compare_costs (acost
, best_cost
) < 0)
5688 iv_ca_delta_free (&best_delta
);
5689 best_delta
= act_delta
;
5692 iv_ca_delta_free (&act_delta
);
5701 /* Recurse to possibly remove other unnecessary ivs. */
5702 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5703 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5704 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5705 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5709 /* Tries to extend the sets IVS in the best possible way in order
5710 to express the USE. If ORIGINALP is true, prefer candidates from
5711 the original set of IVs, otherwise favor important candidates not
5712 based on any memory object. */
5715 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5716 struct iv_use
*use
, bool originalp
)
5718 comp_cost best_cost
, act_cost
;
5721 struct iv_cand
*cand
;
5722 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5723 struct cost_pair
*cp
;
5725 iv_ca_add_use (data
, ivs
, use
, false);
5726 best_cost
= iv_ca_cost (ivs
);
5728 cp
= iv_ca_cand_for_use (ivs
, use
);
5733 iv_ca_add_use (data
, ivs
, use
, true);
5734 best_cost
= iv_ca_cost (ivs
);
5735 cp
= iv_ca_cand_for_use (ivs
, use
);
5739 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5740 iv_ca_set_no_cp (data
, ivs
, use
);
5743 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5744 first try important candidates not based on any memory object. Only if
5745 this fails, try the specific ones. Rationale -- in loops with many
5746 variables the best choice often is to use just one generic biv. If we
5747 added here many ivs specific to the uses, the optimization algorithm later
5748 would be likely to get stuck in a local minimum, thus causing us to create
5749 too many ivs. The approach from few ivs to more seems more likely to be
5750 successful -- starting from few ivs, replacing an expensive use by a
5751 specific iv should always be a win. */
5752 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5754 cand
= iv_cand (data
, i
);
5756 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5759 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5762 if (iv_ca_cand_used_p (ivs
, cand
))
5765 cp
= get_use_iv_cost (data
, use
, cand
);
5769 iv_ca_set_cp (data
, ivs
, use
, cp
);
5770 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5772 iv_ca_set_no_cp (data
, ivs
, use
);
5773 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5775 if (compare_costs (act_cost
, best_cost
) < 0)
5777 best_cost
= act_cost
;
5779 iv_ca_delta_free (&best_delta
);
5780 best_delta
= act_delta
;
5783 iv_ca_delta_free (&act_delta
);
5786 if (infinite_cost_p (best_cost
))
5788 for (i
= 0; i
< use
->n_map_members
; i
++)
5790 cp
= use
->cost_map
+ i
;
5795 /* Already tried this. */
5796 if (cand
->important
)
5798 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5800 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5804 if (iv_ca_cand_used_p (ivs
, cand
))
5808 iv_ca_set_cp (data
, ivs
, use
, cp
);
5809 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5810 iv_ca_set_no_cp (data
, ivs
, use
);
5811 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5814 if (compare_costs (act_cost
, best_cost
) < 0)
5816 best_cost
= act_cost
;
5819 iv_ca_delta_free (&best_delta
);
5820 best_delta
= act_delta
;
5823 iv_ca_delta_free (&act_delta
);
5827 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5828 iv_ca_delta_free (&best_delta
);
5830 return !infinite_cost_p (best_cost
);
5833 /* Finds an initial assignment of candidates to uses. */
5835 static struct iv_ca
*
5836 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5838 struct iv_ca
*ivs
= iv_ca_new (data
);
5841 for (i
= 0; i
< n_iv_uses (data
); i
++)
5842 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5851 /* Tries to improve set of induction variables IVS. */
5854 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5857 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5858 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5859 struct iv_cand
*cand
;
5861 /* Try extending the set of induction variables by one. */
5862 for (i
= 0; i
< n_iv_cands (data
); i
++)
5864 cand
= iv_cand (data
, i
);
5866 if (iv_ca_cand_used_p (ivs
, cand
))
5869 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5873 /* If we successfully added the candidate and the set is small enough,
5874 try optimizing it by removing other candidates. */
5875 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5877 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5878 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5879 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5880 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5883 if (compare_costs (acost
, best_cost
) < 0)
5886 iv_ca_delta_free (&best_delta
);
5887 best_delta
= act_delta
;
5890 iv_ca_delta_free (&act_delta
);
5895 /* Try removing the candidates from the set instead. */
5896 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5898 /* Nothing more we can do. */
5903 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5904 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5905 iv_ca_delta_free (&best_delta
);
5909 /* Attempts to find the optimal set of induction variables. We do simple
5910 greedy heuristic -- we try to replace at most one candidate in the selected
5911 solution and remove the unused ivs while this improves the cost. */
5913 static struct iv_ca
*
5914 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5918 /* Get the initial solution. */
5919 set
= get_initial_solution (data
, originalp
);
5922 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5923 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5927 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5929 fprintf (dump_file
, "Initial set of candidates:\n");
5930 iv_ca_dump (data
, dump_file
, set
);
5933 while (try_improve_iv_set (data
, set
))
5935 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5937 fprintf (dump_file
, "Improved to:\n");
5938 iv_ca_dump (data
, dump_file
, set
);
5945 static struct iv_ca
*
5946 find_optimal_iv_set (struct ivopts_data
*data
)
5949 struct iv_ca
*set
, *origset
;
5951 comp_cost cost
, origcost
;
5953 /* Determine the cost based on a strategy that starts with original IVs,
5954 and try again using a strategy that prefers candidates not based
5956 origset
= find_optimal_iv_set_1 (data
, true);
5957 set
= find_optimal_iv_set_1 (data
, false);
5959 if (!origset
&& !set
)
5962 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5963 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5967 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5968 origcost
.cost
, origcost
.complexity
);
5969 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5970 cost
.cost
, cost
.complexity
);
5973 /* Choose the one with the best cost. */
5974 if (compare_costs (origcost
, cost
) <= 0)
5981 iv_ca_free (&origset
);
5983 for (i
= 0; i
< n_iv_uses (data
); i
++)
5985 use
= iv_use (data
, i
);
5986 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5992 /* Creates a new induction variable corresponding to CAND. */
5995 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5997 gimple_stmt_iterator incr_pos
;
6007 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6011 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6019 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6023 /* Mark that the iv is preserved. */
6024 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6025 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6027 /* Rewrite the increment so that it uses var_before directly. */
6028 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6032 gimple_add_tmp_var (cand
->var_before
);
6034 base
= unshare_expr (cand
->iv
->base
);
6036 create_iv (base
, unshare_expr (cand
->iv
->step
),
6037 cand
->var_before
, data
->current_loop
,
6038 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6041 /* Creates new induction variables described in SET. */
6044 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6047 struct iv_cand
*cand
;
6050 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6052 cand
= iv_cand (data
, i
);
6053 create_new_iv (data
, cand
);
6056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6058 fprintf (dump_file
, "\nSelected IV set: \n");
6059 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6061 cand
= iv_cand (data
, i
);
6062 dump_cand (dump_file
, cand
);
6064 fprintf (dump_file
, "\n");
6068 /* Rewrites USE (definition of iv used in a nonlinear expression)
6069 using candidate CAND. */
6072 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6073 struct iv_use
*use
, struct iv_cand
*cand
)
6078 gimple_stmt_iterator bsi
;
6080 /* An important special case -- if we are asked to express value of
6081 the original iv by itself, just exit; there is no need to
6082 introduce a new computation (that might also need casting the
6083 variable to unsigned and back). */
6084 if (cand
->pos
== IP_ORIGINAL
6085 && cand
->incremented_at
== use
->stmt
)
6087 enum tree_code stmt_code
;
6089 gcc_assert (is_gimple_assign (use
->stmt
));
6090 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6092 /* Check whether we may leave the computation unchanged.
6093 This is the case only if it does not rely on other
6094 computations in the loop -- otherwise, the computation
6095 we rely upon may be removed in remove_unused_ivs,
6096 thus leading to ICE. */
6097 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6098 if (stmt_code
== PLUS_EXPR
6099 || stmt_code
== MINUS_EXPR
6100 || stmt_code
== POINTER_PLUS_EXPR
)
6102 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6103 op
= gimple_assign_rhs2 (use
->stmt
);
6104 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6105 op
= gimple_assign_rhs1 (use
->stmt
);
6112 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6116 comp
= get_computation (data
->current_loop
, use
, cand
);
6117 gcc_assert (comp
!= NULL_TREE
);
6119 switch (gimple_code (use
->stmt
))
6122 tgt
= PHI_RESULT (use
->stmt
);
6124 /* If we should keep the biv, do not replace it. */
6125 if (name_info (data
, tgt
)->preserve_biv
)
6128 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6132 tgt
= gimple_assign_lhs (use
->stmt
);
6133 bsi
= gsi_for_stmt (use
->stmt
);
6140 if (!valid_gimple_rhs_p (comp
)
6141 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6142 /* We can't allow re-allocating the stmt as it might be pointed
6144 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6145 >= gimple_num_ops (gsi_stmt (bsi
)))))
6147 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6148 true, GSI_SAME_STMT
);
6149 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6151 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6152 /* As this isn't a plain copy we have to reset alignment
6154 if (SSA_NAME_PTR_INFO (comp
))
6155 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6159 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6161 ass
= gimple_build_assign (tgt
, comp
);
6162 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6164 bsi
= gsi_for_stmt (use
->stmt
);
6165 remove_phi_node (&bsi
, false);
6169 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6170 use
->stmt
= gsi_stmt (bsi
);
6174 /* Performs a peephole optimization to reorder the iv update statement with
6175 a mem ref to enable instruction combining in later phases. The mem ref uses
6176 the iv value before the update, so the reordering transformation requires
6177 adjustment of the offset. CAND is the selected IV_CAND.
6181 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6189 directly propagating t over to (1) will introduce overlapping live range
6190 thus increase register pressure. This peephole transform it into:
6194 t = MEM_REF (base, iv2, 8, 8);
6201 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6204 gimple iv_update
, stmt
;
6206 gimple_stmt_iterator gsi
, gsi_iv
;
6208 if (cand
->pos
!= IP_NORMAL
)
6211 var_after
= cand
->var_after
;
6212 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6214 bb
= gimple_bb (iv_update
);
6215 gsi
= gsi_last_nondebug_bb (bb
);
6216 stmt
= gsi_stmt (gsi
);
6218 /* Only handle conditional statement for now. */
6219 if (gimple_code (stmt
) != GIMPLE_COND
)
6222 gsi_prev_nondebug (&gsi
);
6223 stmt
= gsi_stmt (gsi
);
6224 if (stmt
!= iv_update
)
6227 gsi_prev_nondebug (&gsi
);
6228 if (gsi_end_p (gsi
))
6231 stmt
= gsi_stmt (gsi
);
6232 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6235 if (stmt
!= use
->stmt
)
6238 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6243 fprintf (dump_file
, "Reordering \n");
6244 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6245 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6246 fprintf (dump_file
, "\n");
6249 gsi
= gsi_for_stmt (use
->stmt
);
6250 gsi_iv
= gsi_for_stmt (iv_update
);
6251 gsi_move_before (&gsi_iv
, &gsi
);
6253 cand
->pos
= IP_BEFORE_USE
;
6254 cand
->incremented_at
= use
->stmt
;
6257 /* Rewrites USE (address that is an iv) using candidate CAND. */
6260 rewrite_use_address (struct ivopts_data
*data
,
6261 struct iv_use
*use
, struct iv_cand
*cand
)
6264 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6265 tree base_hint
= NULL_TREE
;
6269 adjust_iv_update_pos (cand
, use
);
6270 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6272 unshare_aff_combination (&aff
);
6274 /* To avoid undefined overflow problems, all IV candidates use unsigned
6275 integer types. The drawback is that this makes it impossible for
6276 create_mem_ref to distinguish an IV that is based on a memory object
6277 from one that represents simply an offset.
6279 To work around this problem, we pass a hint to create_mem_ref that
6280 indicates which variable (if any) in aff is an IV based on a memory
6281 object. Note that we only consider the candidate. If this is not
6282 based on an object, the base of the reference is in some subexpression
6283 of the use -- but these will use pointer types, so they are recognized
6284 by the create_mem_ref heuristics anyway. */
6285 if (cand
->iv
->base_object
)
6286 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6288 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6289 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6290 reference_alias_ptr_type (*use
->op_p
),
6291 iv
, base_hint
, data
->speed
);
6292 copy_ref_info (ref
, *use
->op_p
);
6296 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6300 rewrite_use_compare (struct ivopts_data
*data
,
6301 struct iv_use
*use
, struct iv_cand
*cand
)
6303 tree comp
, *var_p
, op
, bound
;
6304 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6305 enum tree_code compare
;
6306 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6312 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6313 tree var_type
= TREE_TYPE (var
);
6316 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6318 fprintf (dump_file
, "Replacing exit test: ");
6319 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6322 bound
= unshare_expr (fold_convert (var_type
, bound
));
6323 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6325 gsi_insert_seq_on_edge_immediate (
6326 loop_preheader_edge (data
->current_loop
),
6329 gimple_cond_set_lhs (use
->stmt
, var
);
6330 gimple_cond_set_code (use
->stmt
, compare
);
6331 gimple_cond_set_rhs (use
->stmt
, op
);
6335 /* The induction variable elimination failed; just express the original
6337 comp
= get_computation (data
->current_loop
, use
, cand
);
6338 gcc_assert (comp
!= NULL_TREE
);
6340 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6343 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6344 true, GSI_SAME_STMT
);
6347 /* Rewrites USE using candidate CAND. */
6350 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6354 case USE_NONLINEAR_EXPR
:
6355 rewrite_use_nonlinear_expr (data
, use
, cand
);
6359 rewrite_use_address (data
, use
, cand
);
6363 rewrite_use_compare (data
, use
, cand
);
6370 update_stmt (use
->stmt
);
6373 /* Rewrite the uses using the selected induction variables. */
6376 rewrite_uses (struct ivopts_data
*data
)
6379 struct iv_cand
*cand
;
6382 for (i
= 0; i
< n_iv_uses (data
); i
++)
6384 use
= iv_use (data
, i
);
6385 cand
= use
->selected
;
6388 rewrite_use (data
, use
, cand
);
6392 /* Removes the ivs that are not used after rewriting. */
6395 remove_unused_ivs (struct ivopts_data
*data
)
6399 bitmap toremove
= BITMAP_ALLOC (NULL
);
6401 /* Figure out an order in which to release SSA DEFs so that we don't
6402 release something that we'd have to propagate into a debug stmt
6404 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6406 struct version_info
*info
;
6408 info
= ver_info (data
, j
);
6410 && !integer_zerop (info
->iv
->step
)
6412 && !info
->iv
->have_use_for
6413 && !info
->preserve_biv
)
6415 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6417 tree def
= info
->iv
->ssa_name
;
6419 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6421 imm_use_iterator imm_iter
;
6422 use_operand_p use_p
;
6426 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6428 if (!gimple_debug_bind_p (stmt
))
6431 /* We just want to determine whether to do nothing
6432 (count == 0), to substitute the computed
6433 expression into a single use of the SSA DEF by
6434 itself (count == 1), or to use a debug temp
6435 because the SSA DEF is used multiple times or as
6436 part of a larger expression (count > 1). */
6438 if (gimple_debug_bind_get_value (stmt
) != def
)
6442 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6448 struct iv_use dummy_use
;
6449 struct iv_cand
*best_cand
= NULL
, *cand
;
6450 unsigned i
, best_pref
= 0, cand_pref
;
6452 memset (&dummy_use
, 0, sizeof (dummy_use
));
6453 dummy_use
.iv
= info
->iv
;
6454 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6456 cand
= iv_use (data
, i
)->selected
;
6457 if (cand
== best_cand
)
6459 cand_pref
= operand_equal_p (cand
->iv
->step
,
6463 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6464 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6467 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6469 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6472 best_pref
= cand_pref
;
6479 tree comp
= get_computation_at (data
->current_loop
,
6480 &dummy_use
, best_cand
,
6481 SSA_NAME_DEF_STMT (def
));
6487 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6488 DECL_ARTIFICIAL (vexpr
) = 1;
6489 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6490 if (SSA_NAME_VAR (def
))
6491 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6493 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6494 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6495 gimple_stmt_iterator gsi
;
6497 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6498 gsi
= gsi_after_labels (gimple_bb
6499 (SSA_NAME_DEF_STMT (def
)));
6501 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6503 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6507 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6509 if (!gimple_debug_bind_p (stmt
))
6512 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6513 SET_USE (use_p
, comp
);
6521 release_defs_bitset (toremove
);
6523 BITMAP_FREE (toremove
);
6526 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6527 for pointer_map_traverse. */
6530 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6531 void *data ATTRIBUTE_UNUSED
)
6533 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6539 /* Frees data allocated by the optimization of a single loop. */
6542 free_loop_data (struct ivopts_data
*data
)
6550 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6551 pointer_map_destroy (data
->niters
);
6552 data
->niters
= NULL
;
6555 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6557 struct version_info
*info
;
6559 info
= ver_info (data
, i
);
6562 info
->has_nonlin_use
= false;
6563 info
->preserve_biv
= false;
6566 bitmap_clear (data
->relevant
);
6567 bitmap_clear (data
->important_candidates
);
6569 for (i
= 0; i
< n_iv_uses (data
); i
++)
6571 struct iv_use
*use
= iv_use (data
, i
);
6574 BITMAP_FREE (use
->related_cands
);
6575 for (j
= 0; j
< use
->n_map_members
; j
++)
6576 if (use
->cost_map
[j
].depends_on
)
6577 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6578 free (use
->cost_map
);
6581 data
->iv_uses
.truncate (0);
6583 for (i
= 0; i
< n_iv_cands (data
); i
++)
6585 struct iv_cand
*cand
= iv_cand (data
, i
);
6588 if (cand
->depends_on
)
6589 BITMAP_FREE (cand
->depends_on
);
6592 data
->iv_candidates
.truncate (0);
6594 if (data
->version_info_size
< num_ssa_names
)
6596 data
->version_info_size
= 2 * num_ssa_names
;
6597 free (data
->version_info
);
6598 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6601 data
->max_inv_id
= 0;
6603 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6604 SET_DECL_RTL (obj
, NULL_RTX
);
6606 decl_rtl_to_reset
.truncate (0);
6608 htab_empty (data
->inv_expr_tab
);
6609 data
->inv_expr_id
= 0;
6612 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6616 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6618 free_loop_data (data
);
6619 free (data
->version_info
);
6620 BITMAP_FREE (data
->relevant
);
6621 BITMAP_FREE (data
->important_candidates
);
6623 decl_rtl_to_reset
.release ();
6624 data
->iv_uses
.release ();
6625 data
->iv_candidates
.release ();
6626 htab_delete (data
->inv_expr_tab
);
6629 /* Returns true if the loop body BODY includes any function calls. */
6632 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6634 gimple_stmt_iterator gsi
;
6637 for (i
= 0; i
< num_nodes
; i
++)
6638 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6640 gimple stmt
= gsi_stmt (gsi
);
6641 if (is_gimple_call (stmt
)
6642 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6648 /* Optimizes the LOOP. Returns true if anything changed. */
6651 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6653 bool changed
= false;
6654 struct iv_ca
*iv_ca
;
6655 edge exit
= single_dom_exit (loop
);
6658 gcc_assert (!data
->niters
);
6659 data
->current_loop
= loop
;
6660 data
->speed
= optimize_loop_for_speed_p (loop
);
6662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6664 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6668 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6669 exit
->src
->index
, exit
->dest
->index
);
6670 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6671 fprintf (dump_file
, "\n");
6674 fprintf (dump_file
, "\n");
6677 body
= get_loop_body (loop
);
6678 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6679 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6682 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6684 /* For each ssa name determines whether it behaves as an induction variable
6686 if (!find_induction_variables (data
))
6689 /* Finds interesting uses (item 1). */
6690 find_interesting_uses (data
);
6691 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6694 /* Finds candidates for the induction variables (item 2). */
6695 find_iv_candidates (data
);
6697 /* Calculates the costs (item 3, part 1). */
6698 determine_iv_costs (data
);
6699 determine_use_iv_costs (data
);
6700 determine_set_costs (data
);
6702 /* Find the optimal set of induction variables (item 3, part 2). */
6703 iv_ca
= find_optimal_iv_set (data
);
6708 /* Create the new induction variables (item 4, part 1). */
6709 create_new_ivs (data
, iv_ca
);
6710 iv_ca_free (&iv_ca
);
6712 /* Rewrite the uses (item 4, part 2). */
6713 rewrite_uses (data
);
6715 /* Remove the ivs that are unused after rewriting. */
6716 remove_unused_ivs (data
);
6718 /* We have changed the structure of induction variables; it might happen
6719 that definitions in the scev database refer to some of them that were
6724 free_loop_data (data
);
6729 /* Main entry point. Optimizes induction variables in loops. */
6732 tree_ssa_iv_optimize (void)
6735 struct ivopts_data data
;
6738 tree_ssa_iv_optimize_init (&data
);
6740 /* Optimize the loops starting with the innermost ones. */
6741 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6744 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6746 tree_ssa_iv_optimize_loop (&data
, loop
);
6749 tree_ssa_iv_optimize_finalize (&data
);