1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-flow.h"
74 #include "tree-pass.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
91 cost of different addressing modes. This should be moved to a TBD
92 interface between the GIMPLE and RTL worlds. */
96 /* The infinite cost. */
97 #define INFTY 10000000
99 #define AVG_LOOP_NITER(LOOP) 5
101 /* Returns the expected number of loop iterations for LOOP.
102 The average trip count is computed from profile data if it
105 static inline HOST_WIDE_INT
106 avg_loop_niter (struct loop
*loop
)
108 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
110 return AVG_LOOP_NITER (loop
);
115 /* Representation of the induction variable. */
118 tree base
; /* Initial value of the iv. */
119 tree base_object
; /* A memory object to that the induction variable points. */
120 tree step
; /* Step of the iv (constant only). */
121 tree ssa_name
; /* The ssa name with the value. */
122 bool biv_p
; /* Is it a biv? */
123 bool have_use_for
; /* Do we already have a use for it? */
124 unsigned use_id
; /* The identifier in the use if it is the case. */
127 /* Per-ssa version information (induction variable descriptions, etc.). */
130 tree name
; /* The ssa name. */
131 struct iv
*iv
; /* Induction variable description. */
132 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
133 an expression that is not an induction variable. */
134 bool preserve_biv
; /* For the original biv, whether to preserve it. */
135 unsigned inv_id
; /* Id of an invariant. */
141 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
142 USE_ADDRESS
, /* Use in an address. */
143 USE_COMPARE
/* Use is a compare. */
146 /* Cost of a computation. */
149 int cost
; /* The runtime cost. */
150 unsigned complexity
; /* The estimate of the complexity of the code for
151 the computation (in no concrete units --
152 complexity field should be larger for more
153 complex expressions and addressing modes). */
156 static const comp_cost no_cost
= {0, 0};
157 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
159 /* The candidate - cost pair. */
162 struct iv_cand
*cand
; /* The candidate. */
163 comp_cost cost
; /* The cost. */
164 bitmap depends_on
; /* The list of invariants that have to be
166 tree value
; /* For final value elimination, the expression for
167 the final value of the iv. For iv elimination,
168 the new bound to compare with. */
169 enum tree_code comp
; /* For iv elimination, the comparison. */
170 int inv_expr_id
; /* Loop invariant expression id. */
176 unsigned id
; /* The id of the use. */
177 enum use_type type
; /* Type of the use. */
178 struct iv
*iv
; /* The induction variable it is based on. */
179 gimple stmt
; /* Statement in that it occurs. */
180 tree
*op_p
; /* The place where it occurs. */
181 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
184 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
185 struct cost_pair
*cost_map
;
186 /* The costs wrto the iv candidates. */
188 struct iv_cand
*selected
;
189 /* The selected candidate. */
192 /* The position where the iv is computed. */
195 IP_NORMAL
, /* At the end, just before the exit condition. */
196 IP_END
, /* At the end of the latch block. */
197 IP_BEFORE_USE
, /* Immediately before a specific use. */
198 IP_AFTER_USE
, /* Immediately after a specific use. */
199 IP_ORIGINAL
/* The original biv. */
202 /* The induction variable candidate. */
205 unsigned id
; /* The number of the candidate. */
206 bool important
; /* Whether this is an "important" candidate, i.e. such
207 that it should be considered by all uses. */
208 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
209 gimple incremented_at
;/* For original biv, the statement where it is
211 tree var_before
; /* The variable used for it before increment. */
212 tree var_after
; /* The variable used for it after increment. */
213 struct iv
*iv
; /* The value of the candidate. NULL for
214 "pseudocandidate" used to indicate the possibility
215 to replace the final value of an iv by direct
216 computation of the value. */
217 unsigned cost
; /* Cost of the candidate. */
218 unsigned cost_step
; /* Cost of the candidate's increment operation. */
219 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
220 where it is incremented. */
221 bitmap depends_on
; /* The list of invariants that are used in step of the
225 /* Loop invariant expression hashtable entry. */
226 struct iv_inv_expr_ent
233 /* The data used by the induction variable optimizations. */
235 typedef struct iv_use
*iv_use_p
;
237 typedef struct iv_cand
*iv_cand_p
;
241 /* The currently optimized loop. */
242 struct loop
*current_loop
;
244 /* Numbers of iterations for all exits of the current loop. */
245 struct pointer_map_t
*niters
;
247 /* Number of registers used in it. */
250 /* The size of version_info array allocated. */
251 unsigned version_info_size
;
253 /* The array of information for the ssa names. */
254 struct version_info
*version_info
;
256 /* The hashtable of loop invariant expressions created
260 /* Loop invariant expression id. */
263 /* The bitmap of indices in version_info whose value was changed. */
266 /* The uses of induction variables. */
267 vec
<iv_use_p
> iv_uses
;
269 /* The candidates. */
270 vec
<iv_cand_p
> iv_candidates
;
272 /* A bitmap of important candidates. */
273 bitmap important_candidates
;
275 /* The maximum invariant id. */
278 /* Whether to consider just related and important candidates when replacing a
280 bool consider_all_candidates
;
282 /* Are we optimizing for speed? */
285 /* Whether the loop body includes any function calls. */
286 bool body_includes_call
;
288 /* Whether the loop body can only be exited via single exit. */
289 bool loop_single_exit_p
;
292 /* An assignment of iv candidates to uses. */
296 /* The number of uses covered by the assignment. */
299 /* Number of uses that cannot be expressed by the candidates in the set. */
302 /* Candidate assigned to a use, together with the related costs. */
303 struct cost_pair
**cand_for_use
;
305 /* Number of times each candidate is used. */
306 unsigned *n_cand_uses
;
308 /* The candidates used. */
311 /* The number of candidates in the set. */
314 /* Total number of registers needed. */
317 /* Total cost of expressing uses. */
318 comp_cost cand_use_cost
;
320 /* Total cost of candidates. */
323 /* Number of times each invariant is used. */
324 unsigned *n_invariant_uses
;
326 /* The array holding the number of uses of each loop
327 invariant expressions created by ivopt. */
328 unsigned *used_inv_expr
;
330 /* The number of created loop invariants. */
331 unsigned num_used_inv_expr
;
333 /* Total cost of the assignment. */
337 /* Difference of two iv candidate assignments. */
344 /* An old assignment (for rollback purposes). */
345 struct cost_pair
*old_cp
;
347 /* A new assignment. */
348 struct cost_pair
*new_cp
;
350 /* Next change in the list. */
351 struct iv_ca_delta
*next_change
;
354 /* Bound on number of candidates below that all candidates are considered. */
356 #define CONSIDER_ALL_CANDIDATES_BOUND \
357 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
360 optimizing such a loop would help, and it would take ages). */
362 #define MAX_CONSIDERED_USES \
363 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
365 /* If there are at most this number of ivs in the set, try removing unnecessary
366 ivs from the set always. */
368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
369 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
371 /* The list of trees for that the decl_rtl field must be reset is stored
374 static vec
<tree
> decl_rtl_to_reset
;
376 static comp_cost
force_expr_to_var_cost (tree
, bool);
378 /* Number of uses recorded in DATA. */
380 static inline unsigned
381 n_iv_uses (struct ivopts_data
*data
)
383 return data
->iv_uses
.length ();
386 /* Ith use recorded in DATA. */
388 static inline struct iv_use
*
389 iv_use (struct ivopts_data
*data
, unsigned i
)
391 return data
->iv_uses
[i
];
394 /* Number of candidates recorded in DATA. */
396 static inline unsigned
397 n_iv_cands (struct ivopts_data
*data
)
399 return data
->iv_candidates
.length ();
402 /* Ith candidate recorded in DATA. */
404 static inline struct iv_cand
*
405 iv_cand (struct ivopts_data
*data
, unsigned i
)
407 return data
->iv_candidates
[i
];
410 /* The single loop exit if it dominates the latch, NULL otherwise. */
413 single_dom_exit (struct loop
*loop
)
415 edge exit
= single_exit (loop
);
420 if (!just_once_each_iteration_p (loop
, exit
->src
))
426 /* Dumps information about the induction variable IV to FILE. */
428 extern void dump_iv (FILE *, struct iv
*);
430 dump_iv (FILE *file
, struct iv
*iv
)
434 fprintf (file
, "ssa name ");
435 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
436 fprintf (file
, "\n");
439 fprintf (file
, " type ");
440 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
441 fprintf (file
, "\n");
445 fprintf (file
, " base ");
446 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
447 fprintf (file
, "\n");
449 fprintf (file
, " step ");
450 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
451 fprintf (file
, "\n");
455 fprintf (file
, " invariant ");
456 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
457 fprintf (file
, "\n");
462 fprintf (file
, " base object ");
463 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
464 fprintf (file
, "\n");
468 fprintf (file
, " is a biv\n");
471 /* Dumps information about the USE to FILE. */
473 extern void dump_use (FILE *, struct iv_use
*);
475 dump_use (FILE *file
, struct iv_use
*use
)
477 fprintf (file
, "use %d\n", use
->id
);
481 case USE_NONLINEAR_EXPR
:
482 fprintf (file
, " generic\n");
486 fprintf (file
, " address\n");
490 fprintf (file
, " compare\n");
497 fprintf (file
, " in statement ");
498 print_gimple_stmt (file
, use
->stmt
, 0, 0);
499 fprintf (file
, "\n");
501 fprintf (file
, " at position ");
503 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
504 fprintf (file
, "\n");
506 dump_iv (file
, use
->iv
);
508 if (use
->related_cands
)
510 fprintf (file
, " related candidates ");
511 dump_bitmap (file
, use
->related_cands
);
515 /* Dumps information about the uses to FILE. */
517 extern void dump_uses (FILE *, struct ivopts_data
*);
519 dump_uses (FILE *file
, struct ivopts_data
*data
)
524 for (i
= 0; i
< n_iv_uses (data
); i
++)
526 use
= iv_use (data
, i
);
528 dump_use (file
, use
);
529 fprintf (file
, "\n");
533 /* Dumps information about induction variable candidate CAND to FILE. */
535 extern void dump_cand (FILE *, struct iv_cand
*);
537 dump_cand (FILE *file
, struct iv_cand
*cand
)
539 struct iv
*iv
= cand
->iv
;
541 fprintf (file
, "candidate %d%s\n",
542 cand
->id
, cand
->important
? " (important)" : "");
544 if (cand
->depends_on
)
546 fprintf (file
, " depends on ");
547 dump_bitmap (file
, cand
->depends_on
);
552 fprintf (file
, " final value replacement\n");
556 if (cand
->var_before
)
558 fprintf (file
, " var_before ");
559 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
560 fprintf (file
, "\n");
564 fprintf (file
, " var_after ");
565 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
566 fprintf (file
, "\n");
572 fprintf (file
, " incremented before exit test\n");
576 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
580 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
584 fprintf (file
, " incremented at end\n");
588 fprintf (file
, " original biv\n");
595 /* Returns the info for ssa version VER. */
597 static inline struct version_info
*
598 ver_info (struct ivopts_data
*data
, unsigned ver
)
600 return data
->version_info
+ ver
;
603 /* Returns the info for ssa name NAME. */
605 static inline struct version_info
*
606 name_info (struct ivopts_data
*data
, tree name
)
608 return ver_info (data
, SSA_NAME_VERSION (name
));
611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
615 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
617 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
621 if (sbb
== loop
->latch
)
627 return stmt
== last_stmt (bb
);
630 /* Returns true if STMT if after the place where the original induction
631 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
632 if the positions are identical. */
635 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
637 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
638 basic_block stmt_bb
= gimple_bb (stmt
);
640 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
643 if (stmt_bb
!= cand_bb
)
647 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
649 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
652 /* Returns true if STMT if after the place where the induction variable
653 CAND is incremented in LOOP. */
656 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
664 return stmt_after_ip_normal_pos (loop
, stmt
);
668 return stmt_after_inc_pos (cand
, stmt
, false);
671 return stmt_after_inc_pos (cand
, stmt
, true);
678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
681 abnormal_ssa_name_p (tree exp
)
686 if (TREE_CODE (exp
) != SSA_NAME
)
689 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
696 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
697 void *data ATTRIBUTE_UNUSED
)
699 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
701 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
707 return !abnormal_ssa_name_p (*index
);
710 /* Returns true if EXPR contains a ssa name that occurs in an
711 abnormal phi node. */
714 contains_abnormal_ssa_name_p (tree expr
)
717 enum tree_code_class codeclass
;
722 code
= TREE_CODE (expr
);
723 codeclass
= TREE_CODE_CLASS (code
);
725 if (code
== SSA_NAME
)
726 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
728 if (code
== INTEGER_CST
729 || is_gimple_min_invariant (expr
))
732 if (code
== ADDR_EXPR
)
733 return !for_each_index (&TREE_OPERAND (expr
, 0),
734 idx_contains_abnormal_ssa_name_p
,
737 if (code
== COND_EXPR
)
738 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
739 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
763 /* Returns the structure describing number of iterations determined from
764 EXIT of DATA->current_loop, or NULL if something goes wrong. */
766 static struct tree_niter_desc
*
767 niter_for_exit (struct ivopts_data
*data
, edge exit
)
769 struct tree_niter_desc
*desc
;
774 data
->niters
= pointer_map_create ();
778 slot
= pointer_map_contains (data
->niters
, exit
);
782 /* Try to determine number of iterations. We cannot safely work with ssa
783 names that appear in phi nodes on abnormal edges, so that we do not
784 create overlapping life ranges for them (PR 27283). */
785 desc
= XNEW (struct tree_niter_desc
);
786 if (!number_of_iterations_exit (data
->current_loop
,
788 || contains_abnormal_ssa_name_p (desc
->niter
))
793 slot
= pointer_map_insert (data
->niters
, exit
);
797 desc
= (struct tree_niter_desc
*) *slot
;
802 /* Returns the structure describing number of iterations determined from
803 single dominating exit of DATA->current_loop, or NULL if something
806 static struct tree_niter_desc
*
807 niter_for_single_dom_exit (struct ivopts_data
*data
)
809 edge exit
= single_dom_exit (data
->current_loop
);
814 return niter_for_exit (data
, exit
);
817 /* Hash table equality function for expressions. */
820 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
822 const struct iv_inv_expr_ent
*expr1
=
823 (const struct iv_inv_expr_ent
*)ent1
;
824 const struct iv_inv_expr_ent
*expr2
=
825 (const struct iv_inv_expr_ent
*)ent2
;
827 return expr1
->hash
== expr2
->hash
828 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
831 /* Hash function for loop invariant expressions. */
834 htab_inv_expr_hash (const void *ent
)
836 const struct iv_inv_expr_ent
*expr
=
837 (const struct iv_inv_expr_ent
*)ent
;
841 /* Initializes data structures used by the iv optimization pass, stored
845 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
847 data
->version_info_size
= 2 * num_ssa_names
;
848 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
849 data
->relevant
= BITMAP_ALLOC (NULL
);
850 data
->important_candidates
= BITMAP_ALLOC (NULL
);
851 data
->max_inv_id
= 0;
853 data
->iv_uses
.create (20);
854 data
->iv_candidates
.create (20);
855 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
856 htab_inv_expr_eq
, free
);
857 data
->inv_expr_id
= 0;
858 decl_rtl_to_reset
.create (20);
861 /* Returns a memory object to that EXPR points. In case we are able to
862 determine that it does not point to any such object, NULL is returned. */
865 determine_base_object (tree expr
)
867 enum tree_code code
= TREE_CODE (expr
);
870 /* If this is a pointer casted to any type, we need to determine
871 the base object for the pointer; so handle conversions before
872 throwing away non-pointer expressions. */
873 if (CONVERT_EXPR_P (expr
))
874 return determine_base_object (TREE_OPERAND (expr
, 0));
876 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
885 obj
= TREE_OPERAND (expr
, 0);
886 base
= get_base_address (obj
);
891 if (TREE_CODE (base
) == MEM_REF
)
892 return determine_base_object (TREE_OPERAND (base
, 0));
894 return fold_convert (ptr_type_node
,
895 build_fold_addr_expr (base
));
897 case POINTER_PLUS_EXPR
:
898 return determine_base_object (TREE_OPERAND (expr
, 0));
902 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
906 return fold_convert (ptr_type_node
, expr
);
910 /* Allocates an induction variable with given initial value BASE and step STEP
914 alloc_iv (tree base
, tree step
)
916 struct iv
*iv
= XCNEW (struct iv
);
917 gcc_assert (step
!= NULL_TREE
);
920 iv
->base_object
= determine_base_object (base
);
923 iv
->have_use_for
= false;
925 iv
->ssa_name
= NULL_TREE
;
930 /* Sets STEP and BASE for induction variable IV. */
933 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
935 struct version_info
*info
= name_info (data
, iv
);
937 gcc_assert (!info
->iv
);
939 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
940 info
->iv
= alloc_iv (base
, step
);
941 info
->iv
->ssa_name
= iv
;
944 /* Finds induction variable declaration for VAR. */
947 get_iv (struct ivopts_data
*data
, tree var
)
950 tree type
= TREE_TYPE (var
);
952 if (!POINTER_TYPE_P (type
)
953 && !INTEGRAL_TYPE_P (type
))
956 if (!name_info (data
, var
)->iv
)
958 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
961 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
962 set_iv (data
, var
, var
, build_int_cst (type
, 0));
965 return name_info (data
, var
)->iv
;
968 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
969 not define a simple affine biv with nonzero step. */
972 determine_biv_step (gimple phi
)
974 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
975 tree name
= PHI_RESULT (phi
);
978 if (virtual_operand_p (name
))
981 if (!simple_iv (loop
, loop
, name
, &iv
, true))
984 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
987 /* Finds basic ivs. */
990 find_bivs (struct ivopts_data
*data
)
993 tree step
, type
, base
;
995 struct loop
*loop
= data
->current_loop
;
996 gimple_stmt_iterator psi
;
998 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1000 phi
= gsi_stmt (psi
);
1002 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1005 step
= determine_biv_step (phi
);
1009 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1010 base
= expand_simple_operations (base
);
1011 if (contains_abnormal_ssa_name_p (base
)
1012 || contains_abnormal_ssa_name_p (step
))
1015 type
= TREE_TYPE (PHI_RESULT (phi
));
1016 base
= fold_convert (type
, base
);
1019 if (POINTER_TYPE_P (type
))
1020 step
= convert_to_ptrofftype (step
);
1022 step
= fold_convert (type
, step
);
1025 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1032 /* Marks basic ivs. */
1035 mark_bivs (struct ivopts_data
*data
)
1039 struct iv
*iv
, *incr_iv
;
1040 struct loop
*loop
= data
->current_loop
;
1041 basic_block incr_bb
;
1042 gimple_stmt_iterator psi
;
1044 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1046 phi
= gsi_stmt (psi
);
1048 iv
= get_iv (data
, PHI_RESULT (phi
));
1052 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1053 incr_iv
= get_iv (data
, var
);
1057 /* If the increment is in the subloop, ignore it. */
1058 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1059 if (incr_bb
->loop_father
!= data
->current_loop
1060 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1064 incr_iv
->biv_p
= true;
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069 parameters to IV. */
1072 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1075 struct loop
*loop
= data
->current_loop
;
1077 iv
->base
= NULL_TREE
;
1078 iv
->step
= NULL_TREE
;
1080 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1083 lhs
= gimple_assign_lhs (stmt
);
1084 if (TREE_CODE (lhs
) != SSA_NAME
)
1087 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1089 iv
->base
= expand_simple_operations (iv
->base
);
1091 if (contains_abnormal_ssa_name_p (iv
->base
)
1092 || contains_abnormal_ssa_name_p (iv
->step
))
1095 /* If STMT could throw, then do not consider STMT as defining a GIV.
1096 While this will suppress optimizations, we can not safely delete this
1097 GIV and associated statements, even if it appears it is not used. */
1098 if (stmt_could_throw_p (stmt
))
1104 /* Finds general ivs in statement STMT. */
1107 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1111 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1114 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1117 /* Finds general ivs in basic block BB. */
1120 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1122 gimple_stmt_iterator bsi
;
1124 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1125 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1128 /* Finds general ivs. */
1131 find_givs (struct ivopts_data
*data
)
1133 struct loop
*loop
= data
->current_loop
;
1134 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1137 for (i
= 0; i
< loop
->num_nodes
; i
++)
1138 find_givs_in_bb (data
, body
[i
]);
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143 variable and if so, its initial value and step. */
1146 find_induction_variables (struct ivopts_data
*data
)
1151 if (!find_bivs (data
))
1157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1159 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1163 fprintf (dump_file
, " number of iterations ");
1164 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1165 if (!integer_zerop (niter
->may_be_zero
))
1167 fprintf (dump_file
, "; zero if ");
1168 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1170 fprintf (dump_file
, "\n\n");
1173 fprintf (dump_file
, "Induction variables:\n\n");
1175 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1177 if (ver_info (data
, i
)->iv
)
1178 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1187 static struct iv_use
*
1188 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1189 gimple stmt
, enum use_type use_type
)
1191 struct iv_use
*use
= XCNEW (struct iv_use
);
1193 use
->id
= n_iv_uses (data
);
1194 use
->type
= use_type
;
1198 use
->related_cands
= BITMAP_ALLOC (NULL
);
1200 /* To avoid showing ssa name in the dumps, if it was not reset by the
1202 iv
->ssa_name
= NULL_TREE
;
1204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1205 dump_use (dump_file
, use
);
1207 data
->iv_uses
.safe_push (use
);
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213 NONLINEAR_USE is true if the invariant is used in a way we do not
1214 handle specially. */
1217 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1220 struct version_info
*info
;
1222 if (TREE_CODE (op
) != SSA_NAME
1223 || virtual_operand_p (op
))
1226 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1228 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1231 info
= name_info (data
, op
);
1233 info
->has_nonlin_use
|= nonlinear_use
;
1235 info
->inv_id
= ++data
->max_inv_id
;
1236 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1239 /* Checks whether the use OP is interesting and if so, records it. */
1241 static struct iv_use
*
1242 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1249 if (TREE_CODE (op
) != SSA_NAME
)
1252 iv
= get_iv (data
, op
);
1256 if (iv
->have_use_for
)
1258 use
= iv_use (data
, iv
->use_id
);
1260 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1264 if (integer_zerop (iv
->step
))
1266 record_invariant (data
, op
, true);
1269 iv
->have_use_for
= true;
1271 civ
= XNEW (struct iv
);
1274 stmt
= SSA_NAME_DEF_STMT (op
);
1275 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1276 || is_gimple_assign (stmt
));
1278 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1279 iv
->use_id
= use
->id
;
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285 of an induction variable and an invariant. If this is the case,
1286 CONTROL_VAR is set to location of the iv, BOUND to the location of
1287 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288 induction variable descriptions, and true is returned. If this is not
1289 the case, CONTROL_VAR and BOUND are set to the arguments of the
1290 condition and false is returned. */
1293 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1294 tree
**control_var
, tree
**bound
,
1295 struct iv
**iv_var
, struct iv
**iv_bound
)
1297 /* The objects returned when COND has constant operands. */
1298 static struct iv const_iv
;
1300 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1301 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1304 if (gimple_code (stmt
) == GIMPLE_COND
)
1306 op0
= gimple_cond_lhs_ptr (stmt
);
1307 op1
= gimple_cond_rhs_ptr (stmt
);
1311 op0
= gimple_assign_rhs1_ptr (stmt
);
1312 op1
= gimple_assign_rhs2_ptr (stmt
);
1315 zero
= integer_zero_node
;
1316 const_iv
.step
= integer_zero_node
;
1318 if (TREE_CODE (*op0
) == SSA_NAME
)
1319 iv0
= get_iv (data
, *op0
);
1320 if (TREE_CODE (*op1
) == SSA_NAME
)
1321 iv1
= get_iv (data
, *op1
);
1323 /* Exactly one of the compared values must be an iv, and the other one must
1328 if (integer_zerop (iv0
->step
))
1330 /* Control variable may be on the other side. */
1331 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1332 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1334 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1338 *control_var
= op0
;;
1349 /* Checks whether the condition in STMT is interesting and if so,
1353 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1355 tree
*var_p
, *bound_p
;
1356 struct iv
*var_iv
, *civ
;
1358 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1360 find_interesting_uses_op (data
, *var_p
);
1361 find_interesting_uses_op (data
, *bound_p
);
1365 civ
= XNEW (struct iv
);
1367 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371 i.e. if all its operands are defined outside of the LOOP. LOOP
1372 should not be the function body. */
1375 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1380 gcc_assert (loop_depth (loop
) > 0);
1382 if (is_gimple_min_invariant (expr
))
1385 if (TREE_CODE (expr
) == SSA_NAME
)
1387 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1389 && flow_bb_inside_loop_p (loop
, def_bb
))
1398 len
= TREE_OPERAND_LENGTH (expr
);
1399 for (i
= 0; i
< len
; i
++)
1400 if (TREE_OPERAND (expr
, i
)
1401 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408 i.e. if all its operands on the RHS are defined outside of the LOOP.
1409 LOOP should not be the function body. */
1412 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1417 gcc_assert (loop_depth (loop
) > 0);
1419 lhs
= gimple_get_lhs (stmt
);
1420 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1422 tree op
= gimple_op (stmt
, i
);
1423 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431 initial ones. Returns false when the value of the index cannot be determined.
1432 Callback for for_each_index. */
1434 struct ifs_ivopts_data
1436 struct ivopts_data
*ivopts_data
;
1442 idx_find_step (tree base
, tree
*idx
, void *data
)
1444 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1446 tree step
, iv_base
, iv_step
, lbound
, off
;
1447 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1449 /* If base is a component ref, require that the offset of the reference
1451 if (TREE_CODE (base
) == COMPONENT_REF
)
1453 off
= component_ref_field_offset (base
);
1454 return expr_invariant_in_loop_p (loop
, off
);
1457 /* If base is array, first check whether we will be able to move the
1458 reference out of the loop (in order to take its address in strength
1459 reduction). In order for this to work we need both lower bound
1460 and step to be loop invariants. */
1461 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1463 /* Moreover, for a range, the size needs to be invariant as well. */
1464 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1465 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1468 step
= array_ref_element_size (base
);
1469 lbound
= array_ref_low_bound (base
);
1471 if (!expr_invariant_in_loop_p (loop
, step
)
1472 || !expr_invariant_in_loop_p (loop
, lbound
))
1476 if (TREE_CODE (*idx
) != SSA_NAME
)
1479 iv
= get_iv (dta
->ivopts_data
, *idx
);
1483 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1484 *&x[0], which is not folded and does not trigger the
1485 ARRAY_REF path below. */
1488 if (integer_zerop (iv
->step
))
1491 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1493 step
= array_ref_element_size (base
);
1495 /* We only handle addresses whose step is an integer constant. */
1496 if (TREE_CODE (step
) != INTEGER_CST
)
1500 /* The step for pointer arithmetics already is 1 byte. */
1501 step
= size_one_node
;
1505 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1506 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1509 /* The index might wrap. */
1513 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1514 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1520 object is passed to it in DATA. */
1523 idx_record_use (tree base
, tree
*idx
,
1526 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1527 find_interesting_uses_op (data
, *idx
);
1528 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1530 find_interesting_uses_op (data
, array_ref_element_size (base
));
1531 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537 store cst to MUL and return true. Otherwise return false.
1538 The returned value is always sign-extended, regardless of the
1539 signedness of TOP and BOT. */
1542 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1545 enum tree_code code
;
1546 double_int res
, p0
, p1
;
1547 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1552 if (operand_equal_p (top
, bot
, 0))
1554 *mul
= double_int_one
;
1558 code
= TREE_CODE (top
);
1562 mby
= TREE_OPERAND (top
, 1);
1563 if (TREE_CODE (mby
) != INTEGER_CST
)
1566 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1569 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1574 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1575 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1578 if (code
== MINUS_EXPR
)
1580 *mul
= (p0
+ p1
).sext (precision
);
1584 if (TREE_CODE (bot
) != INTEGER_CST
)
1587 p0
= tree_to_double_int (top
).sext (precision
);
1588 p1
= tree_to_double_int (bot
).sext (precision
);
1591 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1592 return res
.is_zero ();
1599 /* Returns true if memory reference REF with step STEP may be unaligned. */
1602 may_be_unaligned_p (tree ref
, tree step
)
1606 HOST_WIDE_INT bitsize
;
1607 HOST_WIDE_INT bitpos
;
1609 enum machine_mode mode
;
1610 int unsignedp
, volatilep
;
1611 unsigned base_align
;
1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614 thus they are not misaligned. */
1615 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1618 /* The test below is basically copy of what expr.c:normal_inner_ref
1619 does to check whether the object must be loaded by parts when
1620 STRICT_ALIGNMENT is true. */
1621 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1622 &unsignedp
, &volatilep
, true);
1623 base_type
= TREE_TYPE (base
);
1624 base_align
= get_object_alignment (base
);
1625 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1627 if (mode
!= BLKmode
)
1629 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1631 if (base_align
< mode_align
1632 || (bitpos
% mode_align
) != 0
1633 || (bitpos
% BITS_PER_UNIT
) != 0)
1637 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1640 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1647 /* Return true if EXPR may be non-addressable. */
1650 may_be_nonaddressable_p (tree expr
)
1652 switch (TREE_CODE (expr
))
1654 case TARGET_MEM_REF
:
1655 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656 target, thus they are always addressable. */
1660 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1661 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1663 case VIEW_CONVERT_EXPR
:
1664 /* This kind of view-conversions may wrap non-addressable objects
1665 and make them look addressable. After some processing the
1666 non-addressability may be uncovered again, causing ADDR_EXPRs
1667 of inappropriate objects to be built. */
1668 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1669 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1672 /* ... fall through ... */
1675 case ARRAY_RANGE_REF
:
1676 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1688 /* Finds addresses in *OP_P inside STMT. */
1691 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1693 tree base
= *op_p
, step
= size_zero_node
;
1695 struct ifs_ivopts_data ifs_ivopts_data
;
1697 /* Do not play with volatile memory references. A bit too conservative,
1698 perhaps, but safe. */
1699 if (gimple_has_volatile_ops (stmt
))
1702 /* Ignore bitfields for now. Not really something terribly complicated
1704 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1707 base
= unshare_expr (base
);
1709 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1711 tree type
= build_pointer_type (TREE_TYPE (base
));
1715 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1717 civ
= get_iv (data
, TMR_BASE (base
));
1721 TMR_BASE (base
) = civ
->base
;
1724 if (TMR_INDEX2 (base
)
1725 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1727 civ
= get_iv (data
, TMR_INDEX2 (base
));
1731 TMR_INDEX2 (base
) = civ
->base
;
1734 if (TMR_INDEX (base
)
1735 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1737 civ
= get_iv (data
, TMR_INDEX (base
));
1741 TMR_INDEX (base
) = civ
->base
;
1746 if (TMR_STEP (base
))
1747 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1749 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1753 if (integer_zerop (step
))
1755 base
= tree_mem_ref_addr (type
, base
);
1759 ifs_ivopts_data
.ivopts_data
= data
;
1760 ifs_ivopts_data
.stmt
= stmt
;
1761 ifs_ivopts_data
.step
= size_zero_node
;
1762 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1763 || integer_zerop (ifs_ivopts_data
.step
))
1765 step
= ifs_ivopts_data
.step
;
1767 /* Check that the base expression is addressable. This needs
1768 to be done after substituting bases of IVs into it. */
1769 if (may_be_nonaddressable_p (base
))
1772 /* Moreover, on strict alignment platforms, check that it is
1773 sufficiently aligned. */
1774 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1777 base
= build_fold_addr_expr (base
);
1779 /* Substituting bases of IVs into the base expression might
1780 have caused folding opportunities. */
1781 if (TREE_CODE (base
) == ADDR_EXPR
)
1783 tree
*ref
= &TREE_OPERAND (base
, 0);
1784 while (handled_component_p (*ref
))
1785 ref
= &TREE_OPERAND (*ref
, 0);
1786 if (TREE_CODE (*ref
) == MEM_REF
)
1788 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1789 TREE_OPERAND (*ref
, 0),
1790 TREE_OPERAND (*ref
, 1));
1797 civ
= alloc_iv (base
, step
);
1798 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1802 for_each_index (op_p
, idx_record_use
, data
);
1805 /* Finds and records invariants used in STMT. */
1808 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1811 use_operand_p use_p
;
1814 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1816 op
= USE_FROM_PTR (use_p
);
1817 record_invariant (data
, op
, false);
1821 /* Finds interesting uses of induction variables in the statement STMT. */
1824 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1827 tree op
, *lhs
, *rhs
;
1829 use_operand_p use_p
;
1830 enum tree_code code
;
1832 find_invariants_stmt (data
, stmt
);
1834 if (gimple_code (stmt
) == GIMPLE_COND
)
1836 find_interesting_uses_cond (data
, stmt
);
1840 if (is_gimple_assign (stmt
))
1842 lhs
= gimple_assign_lhs_ptr (stmt
);
1843 rhs
= gimple_assign_rhs1_ptr (stmt
);
1845 if (TREE_CODE (*lhs
) == SSA_NAME
)
1847 /* If the statement defines an induction variable, the uses are not
1848 interesting by themselves. */
1850 iv
= get_iv (data
, *lhs
);
1852 if (iv
&& !integer_zerop (iv
->step
))
1856 code
= gimple_assign_rhs_code (stmt
);
1857 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1858 && (REFERENCE_CLASS_P (*rhs
)
1859 || is_gimple_val (*rhs
)))
1861 if (REFERENCE_CLASS_P (*rhs
))
1862 find_interesting_uses_address (data
, stmt
, rhs
);
1864 find_interesting_uses_op (data
, *rhs
);
1866 if (REFERENCE_CLASS_P (*lhs
))
1867 find_interesting_uses_address (data
, stmt
, lhs
);
1870 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1872 find_interesting_uses_cond (data
, stmt
);
1876 /* TODO -- we should also handle address uses of type
1878 memory = call (whatever);
1885 if (gimple_code (stmt
) == GIMPLE_PHI
1886 && gimple_bb (stmt
) == data
->current_loop
->header
)
1888 iv
= get_iv (data
, PHI_RESULT (stmt
));
1890 if (iv
&& !integer_zerop (iv
->step
))
1894 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1896 op
= USE_FROM_PTR (use_p
);
1898 if (TREE_CODE (op
) != SSA_NAME
)
1901 iv
= get_iv (data
, op
);
1905 find_interesting_uses_op (data
, op
);
1909 /* Finds interesting uses of induction variables outside of loops
1910 on loop exit edge EXIT. */
1913 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1916 gimple_stmt_iterator psi
;
1919 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1921 phi
= gsi_stmt (psi
);
1922 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1923 if (!virtual_operand_p (def
))
1924 find_interesting_uses_op (data
, def
);
1928 /* Finds uses of the induction variables that are interesting. */
1931 find_interesting_uses (struct ivopts_data
*data
)
1934 gimple_stmt_iterator bsi
;
1935 basic_block
*body
= get_loop_body (data
->current_loop
);
1937 struct version_info
*info
;
1940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1941 fprintf (dump_file
, "Uses:\n\n");
1943 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1948 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1949 if (e
->dest
!= EXIT_BLOCK_PTR
1950 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1951 find_interesting_uses_outside (data
, e
);
1953 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1954 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1955 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1956 if (!is_gimple_debug (gsi_stmt (bsi
)))
1957 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1960 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1964 fprintf (dump_file
, "\n");
1966 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1968 info
= ver_info (data
, i
);
1971 fprintf (dump_file
, " ");
1972 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1973 fprintf (dump_file
, " is invariant (%d)%s\n",
1974 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1978 fprintf (dump_file
, "\n");
1984 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1985 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1986 we are at the top-level of the processed address. */
1989 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1990 unsigned HOST_WIDE_INT
*offset
)
1992 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1993 enum tree_code code
;
1994 tree type
, orig_type
= TREE_TYPE (expr
);
1995 unsigned HOST_WIDE_INT off0
, off1
, st
;
1996 tree orig_expr
= expr
;
2000 type
= TREE_TYPE (expr
);
2001 code
= TREE_CODE (expr
);
2007 if (!cst_and_fits_in_hwi (expr
)
2008 || integer_zerop (expr
))
2011 *offset
= int_cst_value (expr
);
2012 return build_int_cst (orig_type
, 0);
2014 case POINTER_PLUS_EXPR
:
2017 op0
= TREE_OPERAND (expr
, 0);
2018 op1
= TREE_OPERAND (expr
, 1);
2020 op0
= strip_offset_1 (op0
, false, false, &off0
);
2021 op1
= strip_offset_1 (op1
, false, false, &off1
);
2023 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2024 if (op0
== TREE_OPERAND (expr
, 0)
2025 && op1
== TREE_OPERAND (expr
, 1))
2028 if (integer_zerop (op1
))
2030 else if (integer_zerop (op0
))
2032 if (code
== MINUS_EXPR
)
2033 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2038 expr
= fold_build2 (code
, type
, op0
, op1
);
2040 return fold_convert (orig_type
, expr
);
2043 op1
= TREE_OPERAND (expr
, 1);
2044 if (!cst_and_fits_in_hwi (op1
))
2047 op0
= TREE_OPERAND (expr
, 0);
2048 op0
= strip_offset_1 (op0
, false, false, &off0
);
2049 if (op0
== TREE_OPERAND (expr
, 0))
2052 *offset
= off0
* int_cst_value (op1
);
2053 if (integer_zerop (op0
))
2056 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2058 return fold_convert (orig_type
, expr
);
2061 case ARRAY_RANGE_REF
:
2065 step
= array_ref_element_size (expr
);
2066 if (!cst_and_fits_in_hwi (step
))
2069 st
= int_cst_value (step
);
2070 op1
= TREE_OPERAND (expr
, 1);
2071 op1
= strip_offset_1 (op1
, false, false, &off1
);
2072 *offset
= off1
* st
;
2075 && integer_zerop (op1
))
2077 /* Strip the component reference completely. */
2078 op0
= TREE_OPERAND (expr
, 0);
2079 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2089 tmp
= component_ref_field_offset (expr
);
2091 && cst_and_fits_in_hwi (tmp
))
2093 /* Strip the component reference completely. */
2094 op0
= TREE_OPERAND (expr
, 0);
2095 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2096 *offset
= off0
+ int_cst_value (tmp
);
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, true, true, &off0
);
2106 if (op0
== TREE_OPERAND (expr
, 0))
2109 expr
= build_fold_addr_expr (op0
);
2110 return fold_convert (orig_type
, expr
);
2113 /* ??? Offset operand? */
2114 inside_addr
= false;
2121 /* Default handling of expressions for that we want to recurse into
2122 the first operand. */
2123 op0
= TREE_OPERAND (expr
, 0);
2124 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2127 if (op0
== TREE_OPERAND (expr
, 0)
2128 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2131 expr
= copy_node (expr
);
2132 TREE_OPERAND (expr
, 0) = op0
;
2134 TREE_OPERAND (expr
, 1) = op1
;
2136 /* Inside address, we might strip the top level component references,
2137 thus changing type of the expression. Handling of ADDR_EXPR
2139 expr
= fold_convert (orig_type
, expr
);
2144 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2147 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2149 return strip_offset_1 (expr
, false, false, offset
);
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153 We return unsigned type with the same precision, which avoids problems
2157 generic_type_for (tree type
)
2159 if (POINTER_TYPE_P (type
))
2160 return unsigned_type_for (type
);
2162 if (TYPE_UNSIGNED (type
))
2165 return unsigned_type_for (type
);
2168 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2169 the bitmap to that we should store it. */
2171 static struct ivopts_data
*fd_ivopts_data
;
2173 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2175 bitmap
*depends_on
= (bitmap
*) data
;
2176 struct version_info
*info
;
2178 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2180 info
= name_info (fd_ivopts_data
, *expr_p
);
2182 if (!info
->inv_id
|| info
->has_nonlin_use
)
2186 *depends_on
= BITMAP_ALLOC (NULL
);
2187 bitmap_set_bit (*depends_on
, info
->inv_id
);
2192 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2193 position to POS. If USE is not NULL, the candidate is set as related to
2194 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2195 replacement of the final value of the iv by a direct computation. */
2197 static struct iv_cand
*
2198 add_candidate_1 (struct ivopts_data
*data
,
2199 tree base
, tree step
, bool important
, enum iv_position pos
,
2200 struct iv_use
*use
, gimple incremented_at
)
2203 struct iv_cand
*cand
= NULL
;
2204 tree type
, orig_type
;
2206 /* For non-original variables, make sure their values are computed in a type
2207 that does not invoke undefined behavior on overflows (since in general,
2208 we cannot prove that these induction variables are non-wrapping). */
2209 if (pos
!= IP_ORIGINAL
)
2211 orig_type
= TREE_TYPE (base
);
2212 type
= generic_type_for (orig_type
);
2213 if (type
!= orig_type
)
2215 base
= fold_convert (type
, base
);
2216 step
= fold_convert (type
, step
);
2220 for (i
= 0; i
< n_iv_cands (data
); i
++)
2222 cand
= iv_cand (data
, i
);
2224 if (cand
->pos
!= pos
)
2227 if (cand
->incremented_at
!= incremented_at
2228 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2229 && cand
->ainc_use
!= use
))
2243 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2244 && operand_equal_p (step
, cand
->iv
->step
, 0)
2245 && (TYPE_PRECISION (TREE_TYPE (base
))
2246 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2250 if (i
== n_iv_cands (data
))
2252 cand
= XCNEW (struct iv_cand
);
2258 cand
->iv
= alloc_iv (base
, step
);
2261 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2263 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2264 cand
->var_after
= cand
->var_before
;
2266 cand
->important
= important
;
2267 cand
->incremented_at
= incremented_at
;
2268 data
->iv_candidates
.safe_push (cand
);
2271 && TREE_CODE (step
) != INTEGER_CST
)
2273 fd_ivopts_data
= data
;
2274 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2277 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2278 cand
->ainc_use
= use
;
2280 cand
->ainc_use
= NULL
;
2282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2283 dump_cand (dump_file
, cand
);
2286 if (important
&& !cand
->important
)
2288 cand
->important
= true;
2289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2290 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2295 bitmap_set_bit (use
->related_cands
, i
);
2296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2297 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2307 The purpose is to avoid splitting latch edge with a biv increment, thus
2308 creating a jump, possibly confusing other optimization passes and leaving
2309 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2310 is not available (so we do not have a better alternative), or if the latch
2311 edge is already nonempty. */
2314 allow_ip_end_pos_p (struct loop
*loop
)
2316 if (!ip_normal_pos (loop
))
2319 if (!empty_block_p (ip_end_pos (loop
)))
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326 Important field is set to IMPORTANT. */
2329 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2330 bool important
, struct iv_use
*use
)
2332 basic_block use_bb
= gimple_bb (use
->stmt
);
2333 enum machine_mode mem_mode
;
2334 unsigned HOST_WIDE_INT cstepi
;
2336 /* If we insert the increment in any position other than the standard
2337 ones, we must ensure that it is incremented once per iteration.
2338 It must not be in an inner nested loop, or one side of an if
2340 if (use_bb
->loop_father
!= data
->current_loop
2341 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2342 || stmt_could_throw_p (use
->stmt
)
2343 || !cst_and_fits_in_hwi (step
))
2346 cstepi
= int_cst_value (step
);
2348 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2349 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2350 || USE_STORE_PRE_INCREMENT (mem_mode
))
2351 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2352 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2353 || USE_STORE_PRE_DECREMENT (mem_mode
))
2354 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2356 enum tree_code code
= MINUS_EXPR
;
2358 tree new_step
= step
;
2360 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2362 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2363 code
= POINTER_PLUS_EXPR
;
2366 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2367 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2368 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2371 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2372 || USE_STORE_POST_INCREMENT (mem_mode
))
2373 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2374 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2375 || USE_STORE_POST_DECREMENT (mem_mode
))
2376 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2378 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2383 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2384 position to POS. If USE is not NULL, the candidate is set as related to
2385 it. The candidate computation is scheduled on all available positions. */
2388 add_candidate (struct ivopts_data
*data
,
2389 tree base
, tree step
, bool important
, struct iv_use
*use
)
2391 if (ip_normal_pos (data
->current_loop
))
2392 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2393 if (ip_end_pos (data
->current_loop
)
2394 && allow_ip_end_pos_p (data
->current_loop
))
2395 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2397 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2398 add_autoinc_candidates (data
, base
, step
, important
, use
);
2401 /* Adds standard iv candidates. */
2404 add_standard_iv_candidates (struct ivopts_data
*data
)
2406 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2408 /* The same for a double-integer type if it is still fast enough. */
2410 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2411 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2412 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2413 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2415 /* The same for a double-integer type if it is still fast enough. */
2417 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2418 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2419 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2420 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2424 /* Adds candidates bases on the old induction variable IV. */
2427 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2431 struct iv_cand
*cand
;
2433 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2435 /* The same, but with initial value zero. */
2436 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2437 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2439 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2440 iv
->step
, true, NULL
);
2442 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2443 if (gimple_code (phi
) == GIMPLE_PHI
)
2445 /* Additionally record the possibility of leaving the original iv
2447 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2448 cand
= add_candidate_1 (data
,
2449 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2450 SSA_NAME_DEF_STMT (def
));
2451 cand
->var_before
= iv
->ssa_name
;
2452 cand
->var_after
= def
;
2456 /* Adds candidates based on the old induction variables. */
2459 add_old_ivs_candidates (struct ivopts_data
*data
)
2465 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2467 iv
= ver_info (data
, i
)->iv
;
2468 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2469 add_old_iv_candidates (data
, iv
);
2473 /* Adds candidates based on the value of the induction variable IV and USE. */
2476 add_iv_value_candidates (struct ivopts_data
*data
,
2477 struct iv
*iv
, struct iv_use
*use
)
2479 unsigned HOST_WIDE_INT offset
;
2483 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2485 /* The same, but with initial value zero. Make such variable important,
2486 since it is generic enough so that possibly many uses may be based
2488 basetype
= TREE_TYPE (iv
->base
);
2489 if (POINTER_TYPE_P (basetype
))
2490 basetype
= sizetype
;
2491 add_candidate (data
, build_int_cst (basetype
, 0),
2492 iv
->step
, true, use
);
2494 /* Third, try removing the constant offset. Make sure to even
2495 add a candidate for &a[0] vs. (T *)&a. */
2496 base
= strip_offset (iv
->base
, &offset
);
2498 || base
!= iv
->base
)
2499 add_candidate (data
, base
, iv
->step
, false, use
);
2502 /* Adds candidates based on the uses. */
2505 add_derived_ivs_candidates (struct ivopts_data
*data
)
2509 for (i
= 0; i
< n_iv_uses (data
); i
++)
2511 struct iv_use
*use
= iv_use (data
, i
);
2518 case USE_NONLINEAR_EXPR
:
2521 /* Just add the ivs based on the value of the iv used here. */
2522 add_iv_value_candidates (data
, use
->iv
, use
);
2531 /* Record important candidates and add them to related_cands bitmaps
2535 record_important_candidates (struct ivopts_data
*data
)
2540 for (i
= 0; i
< n_iv_cands (data
); i
++)
2542 struct iv_cand
*cand
= iv_cand (data
, i
);
2544 if (cand
->important
)
2545 bitmap_set_bit (data
->important_candidates
, i
);
2548 data
->consider_all_candidates
= (n_iv_cands (data
)
2549 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2551 if (data
->consider_all_candidates
)
2553 /* We will not need "related_cands" bitmaps in this case,
2554 so release them to decrease peak memory consumption. */
2555 for (i
= 0; i
< n_iv_uses (data
); i
++)
2557 use
= iv_use (data
, i
);
2558 BITMAP_FREE (use
->related_cands
);
2563 /* Add important candidates to the related_cands bitmaps. */
2564 for (i
= 0; i
< n_iv_uses (data
); i
++)
2565 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2566 data
->important_candidates
);
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572 we allocate a simple list to every use. */
2575 alloc_use_cost_map (struct ivopts_data
*data
)
2577 unsigned i
, size
, s
, j
;
2579 for (i
= 0; i
< n_iv_uses (data
); i
++)
2581 struct iv_use
*use
= iv_use (data
, i
);
2584 if (data
->consider_all_candidates
)
2585 size
= n_iv_cands (data
);
2589 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2594 /* Round up to the power of two, so that moduling by it is fast. */
2595 for (size
= 1; size
< s
; size
<<= 1)
2599 use
->n_map_members
= size
;
2600 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2604 /* Returns description of computation cost of expression whose runtime
2605 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2608 new_cost (unsigned runtime
, unsigned complexity
)
2612 cost
.cost
= runtime
;
2613 cost
.complexity
= complexity
;
2618 /* Adds costs COST1 and COST2. */
2621 add_costs (comp_cost cost1
, comp_cost cost2
)
2623 cost1
.cost
+= cost2
.cost
;
2624 cost1
.complexity
+= cost2
.complexity
;
2628 /* Subtracts costs COST1 and COST2. */
2631 sub_costs (comp_cost cost1
, comp_cost cost2
)
2633 cost1
.cost
-= cost2
.cost
;
2634 cost1
.complexity
-= cost2
.complexity
;
2639 /* Returns a negative number if COST1 < COST2, a positive number if
2640 COST1 > COST2, and 0 if COST1 = COST2. */
2643 compare_costs (comp_cost cost1
, comp_cost cost2
)
2645 if (cost1
.cost
== cost2
.cost
)
2646 return cost1
.complexity
- cost2
.complexity
;
2648 return cost1
.cost
- cost2
.cost
;
2651 /* Returns true if COST is infinite. */
2654 infinite_cost_p (comp_cost cost
)
2656 return cost
.cost
== INFTY
;
2659 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2660 on invariants DEPENDS_ON and that the value used in expressing it
2661 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2664 set_use_iv_cost (struct ivopts_data
*data
,
2665 struct iv_use
*use
, struct iv_cand
*cand
,
2666 comp_cost cost
, bitmap depends_on
, tree value
,
2667 enum tree_code comp
, int inv_expr_id
)
2671 if (infinite_cost_p (cost
))
2673 BITMAP_FREE (depends_on
);
2677 if (data
->consider_all_candidates
)
2679 use
->cost_map
[cand
->id
].cand
= cand
;
2680 use
->cost_map
[cand
->id
].cost
= cost
;
2681 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2682 use
->cost_map
[cand
->id
].value
= value
;
2683 use
->cost_map
[cand
->id
].comp
= comp
;
2684 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2688 /* n_map_members is a power of two, so this computes modulo. */
2689 s
= cand
->id
& (use
->n_map_members
- 1);
2690 for (i
= s
; i
< use
->n_map_members
; i
++)
2691 if (!use
->cost_map
[i
].cand
)
2693 for (i
= 0; i
< s
; i
++)
2694 if (!use
->cost_map
[i
].cand
)
2700 use
->cost_map
[i
].cand
= cand
;
2701 use
->cost_map
[i
].cost
= cost
;
2702 use
->cost_map
[i
].depends_on
= depends_on
;
2703 use
->cost_map
[i
].value
= value
;
2704 use
->cost_map
[i
].comp
= comp
;
2705 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2708 /* Gets cost of (USE, CANDIDATE) pair. */
2710 static struct cost_pair
*
2711 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2712 struct iv_cand
*cand
)
2715 struct cost_pair
*ret
;
2720 if (data
->consider_all_candidates
)
2722 ret
= use
->cost_map
+ cand
->id
;
2729 /* n_map_members is a power of two, so this computes modulo. */
2730 s
= cand
->id
& (use
->n_map_members
- 1);
2731 for (i
= s
; i
< use
->n_map_members
; i
++)
2732 if (use
->cost_map
[i
].cand
== cand
)
2733 return use
->cost_map
+ i
;
2735 for (i
= 0; i
< s
; i
++)
2736 if (use
->cost_map
[i
].cand
== cand
)
2737 return use
->cost_map
+ i
;
2742 /* Returns estimate on cost of computing SEQ. */
2745 seq_cost (rtx seq
, bool speed
)
2750 for (; seq
; seq
= NEXT_INSN (seq
))
2752 set
= single_set (seq
);
2754 cost
+= set_src_cost (SET_SRC (set
), speed
);
2762 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2764 produce_memory_decl_rtl (tree obj
, int *regno
)
2766 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2767 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2771 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2773 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2774 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2775 SET_SYMBOL_REF_DECL (x
, obj
);
2776 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2777 set_mem_addr_space (x
, as
);
2778 targetm
.encode_section_info (obj
, x
, true);
2782 x
= gen_raw_REG (address_mode
, (*regno
)++);
2783 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2784 set_mem_addr_space (x
, as
);
2790 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2791 walk_tree. DATA contains the actual fake register number. */
2794 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2796 tree obj
= NULL_TREE
;
2798 int *regno
= (int *) data
;
2800 switch (TREE_CODE (*expr_p
))
2803 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2804 handled_component_p (*expr_p
);
2805 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2808 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2809 x
= produce_memory_decl_rtl (obj
, regno
);
2814 obj
= SSA_NAME_VAR (*expr_p
);
2815 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2818 if (!DECL_RTL_SET_P (obj
))
2819 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2828 if (DECL_RTL_SET_P (obj
))
2831 if (DECL_MODE (obj
) == BLKmode
)
2832 x
= produce_memory_decl_rtl (obj
, regno
);
2834 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2844 decl_rtl_to_reset
.safe_push (obj
);
2845 SET_DECL_RTL (obj
, x
);
2851 /* Determines cost of the computation of EXPR. */
2854 computation_cost (tree expr
, bool speed
)
2857 tree type
= TREE_TYPE (expr
);
2859 /* Avoid using hard regs in ways which may be unsupported. */
2860 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2861 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2862 enum node_frequency real_frequency
= node
->frequency
;
2864 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2865 crtl
->maybe_hot_insn_p
= speed
;
2866 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2868 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2871 default_rtl_profile ();
2872 node
->frequency
= real_frequency
;
2874 cost
= seq_cost (seq
, speed
);
2876 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2877 TYPE_ADDR_SPACE (type
), speed
);
2878 else if (!REG_P (rslt
))
2879 cost
+= set_src_cost (rslt
, speed
);
2884 /* Returns variable containing the value of candidate CAND at statement AT. */
2887 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2889 if (stmt_after_increment (loop
, cand
, stmt
))
2890 return cand
->var_after
;
2892 return cand
->var_before
;
2895 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2896 same precision that is at least as wide as the precision of TYPE, stores
2897 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2901 determine_common_wider_type (tree
*a
, tree
*b
)
2903 tree wider_type
= NULL
;
2905 tree atype
= TREE_TYPE (*a
);
2907 if (CONVERT_EXPR_P (*a
))
2909 suba
= TREE_OPERAND (*a
, 0);
2910 wider_type
= TREE_TYPE (suba
);
2911 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2917 if (CONVERT_EXPR_P (*b
))
2919 subb
= TREE_OPERAND (*b
, 0);
2920 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2931 /* Determines the expression by that USE is expressed from induction variable
2932 CAND at statement AT in LOOP. The expression is stored in a decomposed
2933 form into AFF. Returns false if USE cannot be expressed using CAND. */
2936 get_computation_aff (struct loop
*loop
,
2937 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2938 struct affine_tree_combination
*aff
)
2940 tree ubase
= use
->iv
->base
;
2941 tree ustep
= use
->iv
->step
;
2942 tree cbase
= cand
->iv
->base
;
2943 tree cstep
= cand
->iv
->step
, cstep_common
;
2944 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2945 tree common_type
, var
;
2947 aff_tree cbase_aff
, var_aff
;
2950 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2952 /* We do not have a precision to express the values of use. */
2956 var
= var_at_stmt (loop
, cand
, at
);
2957 uutype
= unsigned_type_for (utype
);
2959 /* If the conversion is not noop, perform it. */
2960 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2962 cstep
= fold_convert (uutype
, cstep
);
2963 cbase
= fold_convert (uutype
, cbase
);
2964 var
= fold_convert (uutype
, var
);
2967 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2970 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2971 type, we achieve better folding by computing their difference in this
2972 wider type, and cast the result to UUTYPE. We do not need to worry about
2973 overflows, as all the arithmetics will in the end be performed in UUTYPE
2975 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2977 /* use = ubase - ratio * cbase + ratio * var. */
2978 tree_to_aff_combination (ubase
, common_type
, aff
);
2979 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2980 tree_to_aff_combination (var
, uutype
, &var_aff
);
2982 /* We need to shift the value if we are after the increment. */
2983 if (stmt_after_increment (loop
, cand
, at
))
2987 if (common_type
!= uutype
)
2988 cstep_common
= fold_convert (common_type
, cstep
);
2990 cstep_common
= cstep
;
2992 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2993 aff_combination_add (&cbase_aff
, &cstep_aff
);
2996 aff_combination_scale (&cbase_aff
, -rat
);
2997 aff_combination_add (aff
, &cbase_aff
);
2998 if (common_type
!= uutype
)
2999 aff_combination_convert (aff
, uutype
);
3001 aff_combination_scale (&var_aff
, rat
);
3002 aff_combination_add (aff
, &var_aff
);
3007 /* Return the type of USE. */
3010 get_use_type (struct iv_use
*use
)
3012 tree base_type
= TREE_TYPE (use
->iv
->base
);
3015 if (use
->type
== USE_ADDRESS
)
3017 /* The base_type may be a void pointer. Create a pointer type based on
3018 the mem_ref instead. */
3019 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3020 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3021 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3029 /* Determines the expression by that USE is expressed from induction variable
3030 CAND at statement AT in LOOP. The computation is unshared. */
3033 get_computation_at (struct loop
*loop
,
3034 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3037 tree type
= get_use_type (use
);
3039 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3041 unshare_aff_combination (&aff
);
3042 return fold_convert (type
, aff_combination_to_tree (&aff
));
3045 /* Determines the expression by that USE is expressed from induction variable
3046 CAND in LOOP. The computation is unshared. */
3049 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3051 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3054 /* Adjust the cost COST for being in loop setup rather than loop body.
3055 If we're optimizing for space, the loop setup overhead is constant;
3056 if we're optimizing for speed, amortize it over the per-iteration cost. */
3058 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3062 else if (optimize_loop_for_speed_p (data
->current_loop
))
3063 return cost
/ avg_loop_niter (data
->current_loop
);
3068 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3069 validity for a memory reference accessing memory of mode MODE in
3070 address space AS. */
3074 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3077 #define MAX_RATIO 128
3078 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3079 static vec
<sbitmap
> valid_mult_list
;
3082 if (data_index
>= valid_mult_list
.length ())
3083 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3085 valid_mult
= valid_mult_list
[data_index
];
3088 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3089 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3093 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3094 bitmap_clear (valid_mult
);
3095 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3096 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3098 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3099 if (memory_address_addr_space_p (mode
, addr
, as
))
3100 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3105 fprintf (dump_file
, " allowed multipliers:");
3106 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3107 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3108 fprintf (dump_file
, " %d", (int) i
);
3109 fprintf (dump_file
, "\n");
3110 fprintf (dump_file
, "\n");
3113 valid_mult_list
[data_index
] = valid_mult
;
3116 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3119 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3122 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3123 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3124 variable is omitted. Compute the cost for a memory reference that accesses
3125 a memory location of mode MEM_MODE in address space AS.
3127 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3128 size of MEM_MODE / RATIO) is available. To make this determination, we
3129 look at the size of the increment to be made, which is given in CSTEP.
3130 CSTEP may be zero if the step is unknown.
3131 STMT_AFTER_INC is true iff the statement we're looking at is after the
3132 increment of the original biv.
3134 TODO -- there must be some better way. This all is quite crude. */
3136 typedef struct address_cost_data_s
3138 HOST_WIDE_INT min_offset
, max_offset
;
3139 unsigned costs
[2][2][2][2];
3140 } *address_cost_data
;
3144 get_address_cost (bool symbol_present
, bool var_present
,
3145 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3146 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3147 addr_space_t as
, bool speed
,
3148 bool stmt_after_inc
, bool *may_autoinc
)
3150 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3151 static vec
<address_cost_data
> address_cost_data_list
;
3152 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3153 address_cost_data data
;
3154 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3155 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3156 unsigned cost
, acost
, complexity
;
3157 bool offset_p
, ratio_p
, autoinc
;
3158 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3159 unsigned HOST_WIDE_INT mask
;
3162 if (data_index
>= address_cost_data_list
.length ())
3163 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3165 data
= address_cost_data_list
[data_index
];
3169 HOST_WIDE_INT rat
, off
= 0;
3170 int old_cse_not_expected
, width
;
3171 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3172 rtx seq
, addr
, base
;
3175 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3177 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3179 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3180 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3181 width
= HOST_BITS_PER_WIDE_INT
- 1;
3182 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3184 for (i
= width
; i
>= 0; i
--)
3186 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3187 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3188 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3191 data
->min_offset
= (i
== -1? 0 : off
);
3193 for (i
= width
; i
>= 0; i
--)
3195 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3196 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3197 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3202 data
->max_offset
= off
;
3204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3206 fprintf (dump_file
, "get_address_cost:\n");
3207 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3208 GET_MODE_NAME (mem_mode
),
3210 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3211 GET_MODE_NAME (mem_mode
),
3216 for (i
= 2; i
<= MAX_RATIO
; i
++)
3217 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3223 /* Compute the cost of various addressing modes. */
3225 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3226 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3228 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3229 || USE_STORE_PRE_DECREMENT (mem_mode
))
3231 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3232 has_predec
[mem_mode
]
3233 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3235 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3236 || USE_STORE_POST_DECREMENT (mem_mode
))
3238 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3239 has_postdec
[mem_mode
]
3240 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3242 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3243 || USE_STORE_PRE_DECREMENT (mem_mode
))
3245 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3246 has_preinc
[mem_mode
]
3247 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3249 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3250 || USE_STORE_POST_INCREMENT (mem_mode
))
3252 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3253 has_postinc
[mem_mode
]
3254 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3256 for (i
= 0; i
< 16; i
++)
3259 var_p
= (i
>> 1) & 1;
3260 off_p
= (i
>> 2) & 1;
3261 rat_p
= (i
>> 3) & 1;
3265 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3266 gen_int_mode (rat
, address_mode
));
3269 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3273 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3274 /* ??? We can run into trouble with some backends by presenting
3275 it with symbols which haven't been properly passed through
3276 targetm.encode_section_info. By setting the local bit, we
3277 enhance the probability of things working. */
3278 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3281 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3283 (PLUS
, address_mode
, base
,
3284 gen_int_mode (off
, address_mode
)));
3287 base
= gen_int_mode (off
, address_mode
);
3292 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3295 /* To avoid splitting addressing modes, pretend that no cse will
3297 old_cse_not_expected
= cse_not_expected
;
3298 cse_not_expected
= true;
3299 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3300 cse_not_expected
= old_cse_not_expected
;
3304 acost
= seq_cost (seq
, speed
);
3305 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3309 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3312 /* On some targets, it is quite expensive to load symbol to a register,
3313 which makes addresses that contain symbols look much more expensive.
3314 However, the symbol will have to be loaded in any case before the
3315 loop (and quite likely we have it in register already), so it does not
3316 make much sense to penalize them too heavily. So make some final
3317 tweaks for the SYMBOL_PRESENT modes:
3319 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3320 var is cheaper, use this mode with small penalty.
3321 If VAR_PRESENT is true, try whether the mode with
3322 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3323 if this is the case, use it. */
3324 add_c
= add_cost (speed
, address_mode
);
3325 for (i
= 0; i
< 8; i
++)
3328 off_p
= (i
>> 1) & 1;
3329 rat_p
= (i
>> 2) & 1;
3331 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3335 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3336 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3341 fprintf (dump_file
, "Address costs:\n");
3343 for (i
= 0; i
< 16; i
++)
3346 var_p
= (i
>> 1) & 1;
3347 off_p
= (i
>> 2) & 1;
3348 rat_p
= (i
>> 3) & 1;
3350 fprintf (dump_file
, " ");
3352 fprintf (dump_file
, "sym + ");
3354 fprintf (dump_file
, "var + ");
3356 fprintf (dump_file
, "cst + ");
3358 fprintf (dump_file
, "rat * ");
3360 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3361 fprintf (dump_file
, "index costs %d\n", acost
);
3363 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3364 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3365 fprintf (dump_file
, " May include autoinc/dec\n");
3366 fprintf (dump_file
, "\n");
3369 address_cost_data_list
[data_index
] = data
;
3372 bits
= GET_MODE_BITSIZE (address_mode
);
3373 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3375 if ((offset
>> (bits
- 1) & 1))
3380 msize
= GET_MODE_SIZE (mem_mode
);
3381 autoinc_offset
= offset
;
3383 autoinc_offset
+= ratio
* cstep
;
3384 if (symbol_present
|| var_present
|| ratio
!= 1)
3386 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3388 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3390 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3392 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3393 && msize
== -cstep
))
3397 offset_p
= (s_offset
!= 0
3398 && data
->min_offset
<= s_offset
3399 && s_offset
<= data
->max_offset
);
3400 ratio_p
= (ratio
!= 1
3401 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3403 if (ratio
!= 1 && !ratio_p
)
3404 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3406 if (s_offset
&& !offset_p
&& !symbol_present
)
3407 cost
+= add_cost (speed
, address_mode
);
3410 *may_autoinc
= autoinc
;
3411 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3412 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3413 return new_cost (cost
+ acost
, complexity
);
3416 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3417 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3418 calculating the operands of EXPR. Returns true if successful, and returns
3419 the cost in COST. */
3422 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3423 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3426 tree op1
= TREE_OPERAND (expr
, 1);
3427 tree cst
= TREE_OPERAND (mult
, 1);
3428 tree multop
= TREE_OPERAND (mult
, 0);
3429 int m
= exact_log2 (int_cst_value (cst
));
3430 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3433 if (!(m
>= 0 && m
< maxm
))
3436 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3437 ? shiftadd_cost (speed
, mode
, m
)
3439 ? shiftsub1_cost (speed
, mode
, m
)
3440 : shiftsub0_cost (speed
, mode
, m
)));
3441 res
= new_cost (sa_cost
, 0);
3442 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3444 STRIP_NOPS (multop
);
3445 if (!is_gimple_val (multop
))
3446 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3452 /* Estimates cost of forcing expression EXPR into a variable. */
3455 force_expr_to_var_cost (tree expr
, bool speed
)
3457 static bool costs_initialized
= false;
3458 static unsigned integer_cost
[2];
3459 static unsigned symbol_cost
[2];
3460 static unsigned address_cost
[2];
3462 comp_cost cost0
, cost1
, cost
;
3463 enum machine_mode mode
;
3465 if (!costs_initialized
)
3467 tree type
= build_pointer_type (integer_type_node
);
3472 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3473 TREE_STATIC (var
) = 1;
3474 x
= produce_memory_decl_rtl (var
, NULL
);
3475 SET_DECL_RTL (var
, x
);
3477 addr
= build1 (ADDR_EXPR
, type
, var
);
3480 for (i
= 0; i
< 2; i
++)
3482 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3485 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3488 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3489 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3491 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3492 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3493 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3494 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3495 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3496 fprintf (dump_file
, "\n");
3500 costs_initialized
= true;
3505 if (SSA_VAR_P (expr
))
3508 if (is_gimple_min_invariant (expr
))
3510 if (TREE_CODE (expr
) == INTEGER_CST
)
3511 return new_cost (integer_cost
[speed
], 0);
3513 if (TREE_CODE (expr
) == ADDR_EXPR
)
3515 tree obj
= TREE_OPERAND (expr
, 0);
3517 if (TREE_CODE (obj
) == VAR_DECL
3518 || TREE_CODE (obj
) == PARM_DECL
3519 || TREE_CODE (obj
) == RESULT_DECL
)
3520 return new_cost (symbol_cost
[speed
], 0);
3523 return new_cost (address_cost
[speed
], 0);
3526 switch (TREE_CODE (expr
))
3528 case POINTER_PLUS_EXPR
:
3532 op0
= TREE_OPERAND (expr
, 0);
3533 op1
= TREE_OPERAND (expr
, 1);
3537 if (is_gimple_val (op0
))
3540 cost0
= force_expr_to_var_cost (op0
, speed
);
3542 if (is_gimple_val (op1
))
3545 cost1
= force_expr_to_var_cost (op1
, speed
);
3550 op0
= TREE_OPERAND (expr
, 0);
3554 if (is_gimple_val (op0
))
3557 cost0
= force_expr_to_var_cost (op0
, speed
);
3563 /* Just an arbitrary value, FIXME. */
3564 return new_cost (target_spill_cost
[speed
], 0);
3567 mode
= TYPE_MODE (TREE_TYPE (expr
));
3568 switch (TREE_CODE (expr
))
3570 case POINTER_PLUS_EXPR
:
3574 cost
= new_cost (add_cost (speed
, mode
), 0);
3575 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3577 tree mult
= NULL_TREE
;
3579 if (TREE_CODE (op1
) == MULT_EXPR
)
3581 else if (TREE_CODE (op0
) == MULT_EXPR
)
3584 if (mult
!= NULL_TREE
3585 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3586 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3593 if (cst_and_fits_in_hwi (op0
))
3594 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3596 else if (cst_and_fits_in_hwi (op1
))
3597 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3600 return new_cost (target_spill_cost
[speed
], 0);
3607 cost
= add_costs (cost
, cost0
);
3608 cost
= add_costs (cost
, cost1
);
3610 /* Bound the cost by target_spill_cost. The parts of complicated
3611 computations often are either loop invariant or at least can
3612 be shared between several iv uses, so letting this grow without
3613 limits would not give reasonable results. */
3614 if (cost
.cost
> (int) target_spill_cost
[speed
])
3615 cost
.cost
= target_spill_cost
[speed
];
3620 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3621 invariants the computation depends on. */
3624 force_var_cost (struct ivopts_data
*data
,
3625 tree expr
, bitmap
*depends_on
)
3629 fd_ivopts_data
= data
;
3630 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3633 return force_expr_to_var_cost (expr
, data
->speed
);
3636 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3637 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3638 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3639 invariants the computation depends on. */
3642 split_address_cost (struct ivopts_data
*data
,
3643 tree addr
, bool *symbol_present
, bool *var_present
,
3644 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3647 HOST_WIDE_INT bitsize
;
3648 HOST_WIDE_INT bitpos
;
3650 enum machine_mode mode
;
3651 int unsignedp
, volatilep
;
3653 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3654 &unsignedp
, &volatilep
, false);
3657 || bitpos
% BITS_PER_UNIT
!= 0
3658 || TREE_CODE (core
) != VAR_DECL
)
3660 *symbol_present
= false;
3661 *var_present
= true;
3662 fd_ivopts_data
= data
;
3663 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3664 return new_cost (target_spill_cost
[data
->speed
], 0);
3667 *offset
+= bitpos
/ BITS_PER_UNIT
;
3668 if (TREE_STATIC (core
)
3669 || DECL_EXTERNAL (core
))
3671 *symbol_present
= true;
3672 *var_present
= false;
3676 *symbol_present
= false;
3677 *var_present
= true;
3681 /* Estimates cost of expressing difference of addresses E1 - E2 as
3682 var + symbol + offset. The value of offset is added to OFFSET,
3683 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3684 part is missing. DEPENDS_ON is a set of the invariants the computation
3688 ptr_difference_cost (struct ivopts_data
*data
,
3689 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3690 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3692 HOST_WIDE_INT diff
= 0;
3693 aff_tree aff_e1
, aff_e2
;
3696 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3698 if (ptr_difference_const (e1
, e2
, &diff
))
3701 *symbol_present
= false;
3702 *var_present
= false;
3706 if (integer_zerop (e2
))
3707 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3708 symbol_present
, var_present
, offset
, depends_on
);
3710 *symbol_present
= false;
3711 *var_present
= true;
3713 type
= signed_type_for (TREE_TYPE (e1
));
3714 tree_to_aff_combination (e1
, type
, &aff_e1
);
3715 tree_to_aff_combination (e2
, type
, &aff_e2
);
3716 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3717 aff_combination_add (&aff_e1
, &aff_e2
);
3719 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3722 /* Estimates cost of expressing difference E1 - E2 as
3723 var + symbol + offset. The value of offset is added to OFFSET,
3724 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3725 part is missing. DEPENDS_ON is a set of the invariants the computation
3729 difference_cost (struct ivopts_data
*data
,
3730 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3731 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3733 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3734 unsigned HOST_WIDE_INT off1
, off2
;
3735 aff_tree aff_e1
, aff_e2
;
3738 e1
= strip_offset (e1
, &off1
);
3739 e2
= strip_offset (e2
, &off2
);
3740 *offset
+= off1
- off2
;
3745 if (TREE_CODE (e1
) == ADDR_EXPR
)
3746 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3747 offset
, depends_on
);
3748 *symbol_present
= false;
3750 if (operand_equal_p (e1
, e2
, 0))
3752 *var_present
= false;
3756 *var_present
= true;
3758 if (integer_zerop (e2
))
3759 return force_var_cost (data
, e1
, depends_on
);
3761 if (integer_zerop (e1
))
3763 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3764 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3768 type
= signed_type_for (TREE_TYPE (e1
));
3769 tree_to_aff_combination (e1
, type
, &aff_e1
);
3770 tree_to_aff_combination (e2
, type
, &aff_e2
);
3771 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3772 aff_combination_add (&aff_e1
, &aff_e2
);
3774 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3777 /* Returns true if AFF1 and AFF2 are identical. */
3780 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3784 if (aff1
->n
!= aff2
->n
)
3787 for (i
= 0; i
< aff1
->n
; i
++)
3789 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3792 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3798 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3801 get_expr_id (struct ivopts_data
*data
, tree expr
)
3803 struct iv_inv_expr_ent ent
;
3804 struct iv_inv_expr_ent
**slot
;
3807 ent
.hash
= iterative_hash_expr (expr
, 0);
3808 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3813 *slot
= XNEW (struct iv_inv_expr_ent
);
3814 (*slot
)->expr
= expr
;
3815 (*slot
)->hash
= ent
.hash
;
3816 (*slot
)->id
= data
->inv_expr_id
++;
3820 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3821 requires a new compiler generated temporary. Returns -1 otherwise.
3822 ADDRESS_P is a flag indicating if the expression is for address
3826 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3827 tree cbase
, HOST_WIDE_INT ratio
,
3830 aff_tree ubase_aff
, cbase_aff
;
3838 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3839 && (TREE_CODE (cbase
) == INTEGER_CST
))
3842 /* Strips the constant part. */
3843 if (TREE_CODE (ubase
) == PLUS_EXPR
3844 || TREE_CODE (ubase
) == MINUS_EXPR
3845 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3847 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3848 ubase
= TREE_OPERAND (ubase
, 0);
3851 /* Strips the constant part. */
3852 if (TREE_CODE (cbase
) == PLUS_EXPR
3853 || TREE_CODE (cbase
) == MINUS_EXPR
3854 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3856 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3857 cbase
= TREE_OPERAND (cbase
, 0);
3862 if (((TREE_CODE (ubase
) == SSA_NAME
)
3863 || (TREE_CODE (ubase
) == ADDR_EXPR
3864 && is_gimple_min_invariant (ubase
)))
3865 && (TREE_CODE (cbase
) == INTEGER_CST
))
3868 if (((TREE_CODE (cbase
) == SSA_NAME
)
3869 || (TREE_CODE (cbase
) == ADDR_EXPR
3870 && is_gimple_min_invariant (cbase
)))
3871 && (TREE_CODE (ubase
) == INTEGER_CST
))
3877 if(operand_equal_p (ubase
, cbase
, 0))
3880 if (TREE_CODE (ubase
) == ADDR_EXPR
3881 && TREE_CODE (cbase
) == ADDR_EXPR
)
3885 usym
= TREE_OPERAND (ubase
, 0);
3886 csym
= TREE_OPERAND (cbase
, 0);
3887 if (TREE_CODE (usym
) == ARRAY_REF
)
3889 tree ind
= TREE_OPERAND (usym
, 1);
3890 if (TREE_CODE (ind
) == INTEGER_CST
3891 && host_integerp (ind
, 0)
3892 && TREE_INT_CST_LOW (ind
) == 0)
3893 usym
= TREE_OPERAND (usym
, 0);
3895 if (TREE_CODE (csym
) == ARRAY_REF
)
3897 tree ind
= TREE_OPERAND (csym
, 1);
3898 if (TREE_CODE (ind
) == INTEGER_CST
3899 && host_integerp (ind
, 0)
3900 && TREE_INT_CST_LOW (ind
) == 0)
3901 csym
= TREE_OPERAND (csym
, 0);
3903 if (operand_equal_p (usym
, csym
, 0))
3906 /* Now do more complex comparison */
3907 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3908 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3909 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3913 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3914 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3916 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3917 aff_combination_add (&ubase_aff
, &cbase_aff
);
3918 expr
= aff_combination_to_tree (&ubase_aff
);
3919 return get_expr_id (data
, expr
);
3924 /* Determines the cost of the computation by that USE is expressed
3925 from induction variable CAND. If ADDRESS_P is true, we just need
3926 to create an address from it, otherwise we want to get it into
3927 register. A set of invariants we depend on is stored in
3928 DEPENDS_ON. AT is the statement at that the value is computed.
3929 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3930 addressing is likely. */
3933 get_computation_cost_at (struct ivopts_data
*data
,
3934 struct iv_use
*use
, struct iv_cand
*cand
,
3935 bool address_p
, bitmap
*depends_on
, gimple at
,
3939 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3941 tree utype
= TREE_TYPE (ubase
), ctype
;
3942 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3943 HOST_WIDE_INT ratio
, aratio
;
3944 bool var_present
, symbol_present
, stmt_is_after_inc
;
3947 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3948 enum machine_mode mem_mode
= (address_p
3949 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
3954 /* Only consider real candidates. */
3956 return infinite_cost
;
3958 cbase
= cand
->iv
->base
;
3959 cstep
= cand
->iv
->step
;
3960 ctype
= TREE_TYPE (cbase
);
3962 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3964 /* We do not have a precision to express the values of use. */
3965 return infinite_cost
;
3969 || (use
->iv
->base_object
3970 && cand
->iv
->base_object
3971 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
3972 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
3974 /* Do not try to express address of an object with computation based
3975 on address of a different object. This may cause problems in rtl
3976 level alias analysis (that does not expect this to be happening,
3977 as this is illegal in C), and would be unlikely to be useful
3979 if (use
->iv
->base_object
3980 && cand
->iv
->base_object
3981 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3982 return infinite_cost
;
3985 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3987 /* TODO -- add direct handling of this case. */
3991 /* CSTEPI is removed from the offset in case statement is after the
3992 increment. If the step is not constant, we use zero instead.
3993 This is a bit imprecise (there is the extra addition), but
3994 redundancy elimination is likely to transform the code so that
3995 it uses value of the variable before increment anyway,
3996 so it is not that much unrealistic. */
3997 if (cst_and_fits_in_hwi (cstep
))
3998 cstepi
= int_cst_value (cstep
);
4002 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4003 return infinite_cost
;
4005 if (rat
.fits_shwi ())
4006 ratio
= rat
.to_shwi ();
4008 return infinite_cost
;
4011 ctype
= TREE_TYPE (cbase
);
4013 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4015 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4016 or ratio == 1, it is better to handle this like
4018 ubase - ratio * cbase + ratio * var
4020 (also holds in the case ratio == -1, TODO. */
4022 if (cst_and_fits_in_hwi (cbase
))
4024 offset
= - ratio
* int_cst_value (cbase
);
4025 cost
= difference_cost (data
,
4026 ubase
, build_int_cst (utype
, 0),
4027 &symbol_present
, &var_present
, &offset
,
4029 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4031 else if (ratio
== 1)
4033 tree real_cbase
= cbase
;
4035 /* Check to see if any adjustment is needed. */
4036 if (cstepi
== 0 && stmt_is_after_inc
)
4038 aff_tree real_cbase_aff
;
4041 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4043 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4045 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4046 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4049 cost
= difference_cost (data
,
4051 &symbol_present
, &var_present
, &offset
,
4053 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4056 && !POINTER_TYPE_P (ctype
)
4057 && multiplier_allowed_in_address_p
4059 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4062 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4063 cost
= difference_cost (data
,
4065 &symbol_present
, &var_present
, &offset
,
4067 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4071 cost
= force_var_cost (data
, cbase
, depends_on
);
4072 cost
= add_costs (cost
,
4073 difference_cost (data
,
4074 ubase
, build_int_cst (utype
, 0),
4075 &symbol_present
, &var_present
,
4076 &offset
, depends_on
));
4077 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4078 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4084 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4085 /* Clear depends on. */
4086 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4087 bitmap_clear (*depends_on
);
4090 /* If we are after the increment, the value of the candidate is higher by
4092 if (stmt_is_after_inc
)
4093 offset
-= ratio
* cstepi
;
4095 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4096 (symbol/var1/const parts may be omitted). If we are looking for an
4097 address, find the cost of addressing this. */
4099 return add_costs (cost
,
4100 get_address_cost (symbol_present
, var_present
,
4101 offset
, ratio
, cstepi
,
4103 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4104 speed
, stmt_is_after_inc
,
4107 /* Otherwise estimate the costs for computing the expression. */
4108 if (!symbol_present
&& !var_present
&& !offset
)
4111 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4115 /* Symbol + offset should be compile-time computable so consider that they
4116 are added once to the variable, if present. */
4117 if (var_present
&& (symbol_present
|| offset
))
4118 cost
.cost
+= adjust_setup_cost (data
,
4119 add_cost (speed
, TYPE_MODE (ctype
)));
4121 /* Having offset does not affect runtime cost in case it is added to
4122 symbol, but it increases complexity. */
4126 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4128 aratio
= ratio
> 0 ? ratio
: -ratio
;
4130 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4135 *can_autoinc
= false;
4138 /* Just get the expression, expand it and measure the cost. */
4139 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4142 return infinite_cost
;
4145 comp
= build_simple_mem_ref (comp
);
4147 return new_cost (computation_cost (comp
, speed
), 0);
4151 /* Determines the cost of the computation by that USE is expressed
4152 from induction variable CAND. If ADDRESS_P is true, we just need
4153 to create an address from it, otherwise we want to get it into
4154 register. A set of invariants we depend on is stored in
4155 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4156 autoinc addressing is likely. */
4159 get_computation_cost (struct ivopts_data
*data
,
4160 struct iv_use
*use
, struct iv_cand
*cand
,
4161 bool address_p
, bitmap
*depends_on
,
4162 bool *can_autoinc
, int *inv_expr_id
)
4164 return get_computation_cost_at (data
,
4165 use
, cand
, address_p
, depends_on
, use
->stmt
,
4166 can_autoinc
, inv_expr_id
);
4169 /* Determines cost of basing replacement of USE on CAND in a generic
4173 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4174 struct iv_use
*use
, struct iv_cand
*cand
)
4178 int inv_expr_id
= -1;
4180 /* The simple case first -- if we need to express value of the preserved
4181 original biv, the cost is 0. This also prevents us from counting the
4182 cost of increment twice -- once at this use and once in the cost of
4184 if (cand
->pos
== IP_ORIGINAL
4185 && cand
->incremented_at
== use
->stmt
)
4187 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4192 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4193 NULL
, &inv_expr_id
);
4195 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4198 return !infinite_cost_p (cost
);
4201 /* Determines cost of basing replacement of USE on CAND in an address. */
4204 determine_use_iv_cost_address (struct ivopts_data
*data
,
4205 struct iv_use
*use
, struct iv_cand
*cand
)
4209 int inv_expr_id
= -1;
4210 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4211 &can_autoinc
, &inv_expr_id
);
4213 if (cand
->ainc_use
== use
)
4216 cost
.cost
-= cand
->cost_step
;
4217 /* If we generated the candidate solely for exploiting autoincrement
4218 opportunities, and it turns out it can't be used, set the cost to
4219 infinity to make sure we ignore it. */
4220 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4221 cost
= infinite_cost
;
4223 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4226 return !infinite_cost_p (cost
);
4229 /* Computes value of candidate CAND at position AT in iteration NITER, and
4230 stores it to VAL. */
4233 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4236 aff_tree step
, delta
, nit
;
4237 struct iv
*iv
= cand
->iv
;
4238 tree type
= TREE_TYPE (iv
->base
);
4239 tree steptype
= type
;
4240 if (POINTER_TYPE_P (type
))
4241 steptype
= sizetype
;
4243 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4244 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4245 aff_combination_convert (&nit
, steptype
);
4246 aff_combination_mult (&nit
, &step
, &delta
);
4247 if (stmt_after_increment (loop
, cand
, at
))
4248 aff_combination_add (&delta
, &step
);
4250 tree_to_aff_combination (iv
->base
, type
, val
);
4251 aff_combination_add (val
, &delta
);
4254 /* Returns period of induction variable iv. */
4257 iv_period (struct iv
*iv
)
4259 tree step
= iv
->step
, period
, type
;
4262 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4264 type
= unsigned_type_for (TREE_TYPE (step
));
4265 /* Period of the iv is lcm (step, type_range)/step -1,
4266 i.e., N*type_range/step - 1. Since type range is power
4267 of two, N == (step >> num_of_ending_zeros_binary (step),
4268 so the final result is
4270 (type_range >> num_of_ending_zeros_binary (step)) - 1
4273 pow2div
= num_ending_zeros (step
);
4275 period
= build_low_bits_mask (type
,
4276 (TYPE_PRECISION (type
)
4277 - tree_low_cst (pow2div
, 1)));
4282 /* Returns the comparison operator used when eliminating the iv USE. */
4284 static enum tree_code
4285 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4287 struct loop
*loop
= data
->current_loop
;
4291 ex_bb
= gimple_bb (use
->stmt
);
4292 exit
= EDGE_SUCC (ex_bb
, 0);
4293 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4294 exit
= EDGE_SUCC (ex_bb
, 1);
4296 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4300 strip_wrap_conserving_type_conversions (tree exp
)
4302 while (tree_ssa_useless_type_conversion (exp
)
4303 && (nowrap_type_p (TREE_TYPE (exp
))
4304 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4305 exp
= TREE_OPERAND (exp
, 0);
4309 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4310 check for an exact match. */
4313 expr_equal_p (tree e
, tree what
)
4316 enum tree_code code
;
4318 e
= strip_wrap_conserving_type_conversions (e
);
4319 what
= strip_wrap_conserving_type_conversions (what
);
4321 code
= TREE_CODE (what
);
4322 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4325 if (operand_equal_p (e
, what
, 0))
4328 if (TREE_CODE (e
) != SSA_NAME
)
4331 stmt
= SSA_NAME_DEF_STMT (e
);
4332 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4333 || gimple_assign_rhs_code (stmt
) != code
)
4336 switch (get_gimple_rhs_class (code
))
4338 case GIMPLE_BINARY_RHS
:
4339 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4343 case GIMPLE_UNARY_RHS
:
4344 case GIMPLE_SINGLE_RHS
:
4345 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4351 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4352 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4353 calculation is performed in non-wrapping type.
4355 TODO: More generally, we could test for the situation that
4356 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4357 This would require knowing the sign of OFFSET.
4359 Also, we only look for the first addition in the computation of BASE.
4360 More complex analysis would be better, but introducing it just for
4361 this optimization seems like an overkill. */
4364 difference_cannot_overflow_p (tree base
, tree offset
)
4366 enum tree_code code
;
4369 if (!nowrap_type_p (TREE_TYPE (base
)))
4372 base
= expand_simple_operations (base
);
4374 if (TREE_CODE (base
) == SSA_NAME
)
4376 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4378 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4381 code
= gimple_assign_rhs_code (stmt
);
4382 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4385 e1
= gimple_assign_rhs1 (stmt
);
4386 e2
= gimple_assign_rhs2 (stmt
);
4390 code
= TREE_CODE (base
);
4391 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4393 e1
= TREE_OPERAND (base
, 0);
4394 e2
= TREE_OPERAND (base
, 1);
4397 /* TODO: deeper inspection may be necessary to prove the equality. */
4401 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4402 case POINTER_PLUS_EXPR
:
4403 return expr_equal_p (e2
, offset
);
4410 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4411 comparison with CAND. NITER describes the number of iterations of
4412 the loops. If successful, the comparison in COMP_P is altered accordingly.
4414 We aim to handle the following situation:
4430 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4431 We aim to optimize this to
4439 while (p < p_0 - a + b);
4441 This preserves the correctness, since the pointer arithmetics does not
4442 overflow. More precisely:
4444 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4445 overflow in computing it or the values of p.
4446 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4447 overflow. To prove this, we use the fact that p_0 = base + a. */
4450 iv_elimination_compare_lt (struct ivopts_data
*data
,
4451 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4452 struct tree_niter_desc
*niter
)
4454 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4455 struct affine_tree_combination nit
, tmpa
, tmpb
;
4456 enum tree_code comp
;
4459 /* We need to know that the candidate induction variable does not overflow.
4460 While more complex analysis may be used to prove this, for now just
4461 check that the variable appears in the original program and that it
4462 is computed in a type that guarantees no overflows. */
4463 cand_type
= TREE_TYPE (cand
->iv
->base
);
4464 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4467 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4468 the calculation of the BOUND could overflow, making the comparison
4470 if (!data
->loop_single_exit_p
)
4473 /* We need to be able to decide whether candidate is increasing or decreasing
4474 in order to choose the right comparison operator. */
4475 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4477 step
= int_cst_value (cand
->iv
->step
);
4479 /* Check that the number of iterations matches the expected pattern:
4480 a + 1 > b ? 0 : b - a - 1. */
4481 mbz
= niter
->may_be_zero
;
4482 if (TREE_CODE (mbz
) == GT_EXPR
)
4484 /* Handle a + 1 > b. */
4485 tree op0
= TREE_OPERAND (mbz
, 0);
4486 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4488 a
= TREE_OPERAND (op0
, 0);
4489 b
= TREE_OPERAND (mbz
, 1);
4494 else if (TREE_CODE (mbz
) == LT_EXPR
)
4496 tree op1
= TREE_OPERAND (mbz
, 1);
4498 /* Handle b < a + 1. */
4499 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4501 a
= TREE_OPERAND (op1
, 0);
4502 b
= TREE_OPERAND (mbz
, 0);
4510 /* Expected number of iterations is B - A - 1. Check that it matches
4511 the actual number, i.e., that B - A - NITER = 1. */
4512 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4513 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4514 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4515 aff_combination_scale (&nit
, double_int_minus_one
);
4516 aff_combination_scale (&tmpa
, double_int_minus_one
);
4517 aff_combination_add (&tmpb
, &tmpa
);
4518 aff_combination_add (&tmpb
, &nit
);
4519 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4522 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4526 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4527 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4530 /* Determine the new comparison operator. */
4531 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4532 if (*comp_p
== NE_EXPR
)
4534 else if (*comp_p
== EQ_EXPR
)
4535 *comp_p
= invert_tree_comparison (comp
, false);
4542 /* Check whether it is possible to express the condition in USE by comparison
4543 of candidate CAND. If so, store the value compared with to BOUND, and the
4544 comparison operator to COMP. */
4547 may_eliminate_iv (struct ivopts_data
*data
,
4548 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4549 enum tree_code
*comp
)
4554 struct loop
*loop
= data
->current_loop
;
4556 struct tree_niter_desc
*desc
= NULL
;
4558 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4561 /* For now works only for exits that dominate the loop latch.
4562 TODO: extend to other conditions inside loop body. */
4563 ex_bb
= gimple_bb (use
->stmt
);
4564 if (use
->stmt
!= last_stmt (ex_bb
)
4565 || gimple_code (use
->stmt
) != GIMPLE_COND
4566 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4569 exit
= EDGE_SUCC (ex_bb
, 0);
4570 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4571 exit
= EDGE_SUCC (ex_bb
, 1);
4572 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4575 desc
= niter_for_exit (data
, exit
);
4579 /* Determine whether we can use the variable to test the exit condition.
4580 This is the case iff the period of the induction variable is greater
4581 than the number of iterations for which the exit condition is true. */
4582 period
= iv_period (cand
->iv
);
4584 /* If the number of iterations is constant, compare against it directly. */
4585 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4587 /* See cand_value_at. */
4588 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4590 if (!tree_int_cst_lt (desc
->niter
, period
))
4595 if (tree_int_cst_lt (period
, desc
->niter
))
4600 /* If not, and if this is the only possible exit of the loop, see whether
4601 we can get a conservative estimate on the number of iterations of the
4602 entire loop and compare against that instead. */
4605 double_int period_value
, max_niter
;
4607 max_niter
= desc
->max
;
4608 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4609 max_niter
+= double_int_one
;
4610 period_value
= tree_to_double_int (period
);
4611 if (max_niter
.ugt (period_value
))
4613 /* See if we can take advantage of inferred loop bound information. */
4614 if (data
->loop_single_exit_p
)
4616 if (!max_loop_iterations (loop
, &max_niter
))
4618 /* The loop bound is already adjusted by adding 1. */
4619 if (max_niter
.ugt (period_value
))
4627 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4629 *bound
= aff_combination_to_tree (&bnd
);
4630 *comp
= iv_elimination_compare (data
, use
);
4632 /* It is unlikely that computing the number of iterations using division
4633 would be more profitable than keeping the original induction variable. */
4634 if (expression_expensive_p (*bound
))
4637 /* Sometimes, it is possible to handle the situation that the number of
4638 iterations may be zero unless additional assumtions by using <
4639 instead of != in the exit condition.
4641 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4642 base the exit condition on it. However, that is often too
4644 if (!integer_zerop (desc
->may_be_zero
))
4645 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4650 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4651 be copied, if is is used in the loop body and DATA->body_includes_call. */
4654 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4656 tree sbound
= bound
;
4657 STRIP_NOPS (sbound
);
4659 if (TREE_CODE (sbound
) == SSA_NAME
4660 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4661 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4662 && data
->body_includes_call
)
4663 return COSTS_N_INSNS (1);
4668 /* Determines cost of basing replacement of USE on CAND in a condition. */
4671 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4672 struct iv_use
*use
, struct iv_cand
*cand
)
4674 tree bound
= NULL_TREE
;
4676 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4677 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4679 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4680 tree
*control_var
, *bound_cst
;
4681 enum tree_code comp
= ERROR_MARK
;
4683 /* Only consider real candidates. */
4686 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4691 /* Try iv elimination. */
4692 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4694 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4695 if (elim_cost
.cost
== 0)
4696 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4697 else if (TREE_CODE (bound
) == INTEGER_CST
)
4699 /* If we replace a loop condition 'i < n' with 'p < base + n',
4700 depends_on_elim will have 'base' and 'n' set, which implies
4701 that both 'base' and 'n' will be live during the loop. More likely,
4702 'base + n' will be loop invariant, resulting in only one live value
4703 during the loop. So in that case we clear depends_on_elim and set
4704 elim_inv_expr_id instead. */
4705 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4707 elim_inv_expr_id
= get_expr_id (data
, bound
);
4708 bitmap_clear (depends_on_elim
);
4710 /* The bound is a loop invariant, so it will be only computed
4712 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4715 elim_cost
= infinite_cost
;
4717 /* Try expressing the original giv. If it is compared with an invariant,
4718 note that we cannot get rid of it. */
4719 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4723 /* When the condition is a comparison of the candidate IV against
4724 zero, prefer this IV.
4726 TODO: The constant that we're subtracting from the cost should
4727 be target-dependent. This information should be added to the
4728 target costs for each backend. */
4729 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4730 && integer_zerop (*bound_cst
)
4731 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4732 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4733 elim_cost
.cost
-= 1;
4735 express_cost
= get_computation_cost (data
, use
, cand
, false,
4736 &depends_on_express
, NULL
,
4737 &express_inv_expr_id
);
4738 fd_ivopts_data
= data
;
4739 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4741 /* Count the cost of the original bound as well. */
4742 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4743 if (bound_cost
.cost
== 0)
4744 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4745 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4746 bound_cost
.cost
= 0;
4747 express_cost
.cost
+= bound_cost
.cost
;
4749 /* Choose the better approach, preferring the eliminated IV. */
4750 if (compare_costs (elim_cost
, express_cost
) <= 0)
4753 depends_on
= depends_on_elim
;
4754 depends_on_elim
= NULL
;
4755 inv_expr_id
= elim_inv_expr_id
;
4759 cost
= express_cost
;
4760 depends_on
= depends_on_express
;
4761 depends_on_express
= NULL
;
4764 inv_expr_id
= express_inv_expr_id
;
4767 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4769 if (depends_on_elim
)
4770 BITMAP_FREE (depends_on_elim
);
4771 if (depends_on_express
)
4772 BITMAP_FREE (depends_on_express
);
4774 return !infinite_cost_p (cost
);
4777 /* Determines cost of basing replacement of USE on CAND. Returns false
4778 if USE cannot be based on CAND. */
4781 determine_use_iv_cost (struct ivopts_data
*data
,
4782 struct iv_use
*use
, struct iv_cand
*cand
)
4786 case USE_NONLINEAR_EXPR
:
4787 return determine_use_iv_cost_generic (data
, use
, cand
);
4790 return determine_use_iv_cost_address (data
, use
, cand
);
4793 return determine_use_iv_cost_condition (data
, use
, cand
);
4800 /* Return true if get_computation_cost indicates that autoincrement is
4801 a possibility for the pair of USE and CAND, false otherwise. */
4804 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4805 struct iv_cand
*cand
)
4811 if (use
->type
!= USE_ADDRESS
)
4814 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4815 &can_autoinc
, NULL
);
4817 BITMAP_FREE (depends_on
);
4819 return !infinite_cost_p (cost
) && can_autoinc
;
4822 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4823 use that allows autoincrement, and set their AINC_USE if possible. */
4826 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4830 for (i
= 0; i
< n_iv_cands (data
); i
++)
4832 struct iv_cand
*cand
= iv_cand (data
, i
);
4833 struct iv_use
*closest
= NULL
;
4834 if (cand
->pos
!= IP_ORIGINAL
)
4836 for (j
= 0; j
< n_iv_uses (data
); j
++)
4838 struct iv_use
*use
= iv_use (data
, j
);
4839 unsigned uid
= gimple_uid (use
->stmt
);
4840 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4841 || uid
> gimple_uid (cand
->incremented_at
))
4843 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4846 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4848 cand
->ainc_use
= closest
;
4852 /* Finds the candidates for the induction variables. */
4855 find_iv_candidates (struct ivopts_data
*data
)
4857 /* Add commonly used ivs. */
4858 add_standard_iv_candidates (data
);
4860 /* Add old induction variables. */
4861 add_old_ivs_candidates (data
);
4863 /* Add induction variables derived from uses. */
4864 add_derived_ivs_candidates (data
);
4866 set_autoinc_for_original_candidates (data
);
4868 /* Record the important candidates. */
4869 record_important_candidates (data
);
4872 /* Determines costs of basing the use of the iv on an iv candidate. */
4875 determine_use_iv_costs (struct ivopts_data
*data
)
4879 struct iv_cand
*cand
;
4880 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4882 alloc_use_cost_map (data
);
4884 for (i
= 0; i
< n_iv_uses (data
); i
++)
4886 use
= iv_use (data
, i
);
4888 if (data
->consider_all_candidates
)
4890 for (j
= 0; j
< n_iv_cands (data
); j
++)
4892 cand
= iv_cand (data
, j
);
4893 determine_use_iv_cost (data
, use
, cand
);
4900 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4902 cand
= iv_cand (data
, j
);
4903 if (!determine_use_iv_cost (data
, use
, cand
))
4904 bitmap_set_bit (to_clear
, j
);
4907 /* Remove the candidates for that the cost is infinite from
4908 the list of related candidates. */
4909 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4910 bitmap_clear (to_clear
);
4914 BITMAP_FREE (to_clear
);
4916 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4918 fprintf (dump_file
, "Use-candidate costs:\n");
4920 for (i
= 0; i
< n_iv_uses (data
); i
++)
4922 use
= iv_use (data
, i
);
4924 fprintf (dump_file
, "Use %d:\n", i
);
4925 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4926 for (j
= 0; j
< use
->n_map_members
; j
++)
4928 if (!use
->cost_map
[j
].cand
4929 || infinite_cost_p (use
->cost_map
[j
].cost
))
4932 fprintf (dump_file
, " %d\t%d\t%d\t",
4933 use
->cost_map
[j
].cand
->id
,
4934 use
->cost_map
[j
].cost
.cost
,
4935 use
->cost_map
[j
].cost
.complexity
);
4936 if (use
->cost_map
[j
].depends_on
)
4937 bitmap_print (dump_file
,
4938 use
->cost_map
[j
].depends_on
, "","");
4939 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4940 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4941 fprintf (dump_file
, "\n");
4944 fprintf (dump_file
, "\n");
4946 fprintf (dump_file
, "\n");
4950 /* Determines cost of the candidate CAND. */
4953 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4955 comp_cost cost_base
;
4956 unsigned cost
, cost_step
;
4965 /* There are two costs associated with the candidate -- its increment
4966 and its initialization. The second is almost negligible for any loop
4967 that rolls enough, so we take it just very little into account. */
4969 base
= cand
->iv
->base
;
4970 cost_base
= force_var_cost (data
, base
, NULL
);
4971 /* It will be exceptional that the iv register happens to be initialized with
4972 the proper value at no cost. In general, there will at least be a regcopy
4974 if (cost_base
.cost
== 0)
4975 cost_base
.cost
= COSTS_N_INSNS (1);
4976 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
4978 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4980 /* Prefer the original ivs unless we may gain something by replacing it.
4981 The reason is to make debugging simpler; so this is not relevant for
4982 artificial ivs created by other optimization passes. */
4983 if (cand
->pos
!= IP_ORIGINAL
4984 || !SSA_NAME_VAR (cand
->var_before
)
4985 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4988 /* Prefer not to insert statements into latch unless there are some
4989 already (so that we do not create unnecessary jumps). */
4990 if (cand
->pos
== IP_END
4991 && empty_block_p (ip_end_pos (data
->current_loop
)))
4995 cand
->cost_step
= cost_step
;
4998 /* Determines costs of computation of the candidates. */
5001 determine_iv_costs (struct ivopts_data
*data
)
5005 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5007 fprintf (dump_file
, "Candidate costs:\n");
5008 fprintf (dump_file
, " cand\tcost\n");
5011 for (i
= 0; i
< n_iv_cands (data
); i
++)
5013 struct iv_cand
*cand
= iv_cand (data
, i
);
5015 determine_iv_cost (data
, cand
);
5017 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5018 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5021 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5022 fprintf (dump_file
, "\n");
5025 /* Calculates cost for having SIZE induction variables. */
5028 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5030 /* We add size to the cost, so that we prefer eliminating ivs
5032 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5033 data
->body_includes_call
);
5036 /* For each size of the induction variable set determine the penalty. */
5039 determine_set_costs (struct ivopts_data
*data
)
5043 gimple_stmt_iterator psi
;
5045 struct loop
*loop
= data
->current_loop
;
5048 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5050 fprintf (dump_file
, "Global costs:\n");
5051 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5052 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5053 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5054 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5058 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5060 phi
= gsi_stmt (psi
);
5061 op
= PHI_RESULT (phi
);
5063 if (virtual_operand_p (op
))
5066 if (get_iv (data
, op
))
5072 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5074 struct version_info
*info
= ver_info (data
, j
);
5076 if (info
->inv_id
&& info
->has_nonlin_use
)
5080 data
->regs_used
= n
;
5081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5082 fprintf (dump_file
, " regs_used %d\n", n
);
5084 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5086 fprintf (dump_file
, " cost for size:\n");
5087 fprintf (dump_file
, " ivs\tcost\n");
5088 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5089 fprintf (dump_file
, " %d\t%d\n", j
,
5090 ivopts_global_cost_for_size (data
, j
));
5091 fprintf (dump_file
, "\n");
5095 /* Returns true if A is a cheaper cost pair than B. */
5098 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5108 cmp
= compare_costs (a
->cost
, b
->cost
);
5115 /* In case the costs are the same, prefer the cheaper candidate. */
5116 if (a
->cand
->cost
< b
->cand
->cost
)
5123 /* Returns candidate by that USE is expressed in IVS. */
5125 static struct cost_pair
*
5126 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5128 return ivs
->cand_for_use
[use
->id
];
5131 /* Computes the cost field of IVS structure. */
5134 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5136 comp_cost cost
= ivs
->cand_use_cost
;
5138 cost
.cost
+= ivs
->cand_cost
;
5140 cost
.cost
+= ivopts_global_cost_for_size (data
,
5141 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5146 /* Remove invariants in set INVS to set IVS. */
5149 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5157 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5159 ivs
->n_invariant_uses
[iid
]--;
5160 if (ivs
->n_invariant_uses
[iid
] == 0)
5165 /* Set USE not to be expressed by any candidate in IVS. */
5168 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5171 unsigned uid
= use
->id
, cid
;
5172 struct cost_pair
*cp
;
5174 cp
= ivs
->cand_for_use
[uid
];
5180 ivs
->cand_for_use
[uid
] = NULL
;
5181 ivs
->n_cand_uses
[cid
]--;
5183 if (ivs
->n_cand_uses
[cid
] == 0)
5185 bitmap_clear_bit (ivs
->cands
, cid
);
5186 /* Do not count the pseudocandidates. */
5190 ivs
->cand_cost
-= cp
->cand
->cost
;
5192 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5195 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5197 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5199 if (cp
->inv_expr_id
!= -1)
5201 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5202 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5203 ivs
->num_used_inv_expr
--;
5205 iv_ca_recount_cost (data
, ivs
);
5208 /* Add invariants in set INVS to set IVS. */
5211 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5219 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5221 ivs
->n_invariant_uses
[iid
]++;
5222 if (ivs
->n_invariant_uses
[iid
] == 1)
5227 /* Set cost pair for USE in set IVS to CP. */
5230 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5231 struct iv_use
*use
, struct cost_pair
*cp
)
5233 unsigned uid
= use
->id
, cid
;
5235 if (ivs
->cand_for_use
[uid
] == cp
)
5238 if (ivs
->cand_for_use
[uid
])
5239 iv_ca_set_no_cp (data
, ivs
, use
);
5246 ivs
->cand_for_use
[uid
] = cp
;
5247 ivs
->n_cand_uses
[cid
]++;
5248 if (ivs
->n_cand_uses
[cid
] == 1)
5250 bitmap_set_bit (ivs
->cands
, cid
);
5251 /* Do not count the pseudocandidates. */
5255 ivs
->cand_cost
+= cp
->cand
->cost
;
5257 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5260 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5261 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5263 if (cp
->inv_expr_id
!= -1)
5265 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5266 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5267 ivs
->num_used_inv_expr
++;
5269 iv_ca_recount_cost (data
, ivs
);
5273 /* Extend set IVS by expressing USE by some of the candidates in it
5274 if possible. All important candidates will be considered
5275 if IMPORTANT_CANDIDATES is true. */
5278 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5279 struct iv_use
*use
, bool important_candidates
)
5281 struct cost_pair
*best_cp
= NULL
, *cp
;
5286 gcc_assert (ivs
->upto
>= use
->id
);
5288 if (ivs
->upto
== use
->id
)
5294 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5295 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5297 struct iv_cand
*cand
= iv_cand (data
, i
);
5299 cp
= get_use_iv_cost (data
, use
, cand
);
5301 if (cheaper_cost_pair (cp
, best_cp
))
5305 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5308 /* Get cost for assignment IVS. */
5311 iv_ca_cost (struct iv_ca
*ivs
)
5313 /* This was a conditional expression but it triggered a bug in
5316 return infinite_cost
;
5321 /* Returns true if all dependences of CP are among invariants in IVS. */
5324 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5329 if (!cp
->depends_on
)
5332 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5334 if (ivs
->n_invariant_uses
[i
] == 0)
5341 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5342 it before NEXT_CHANGE. */
5344 static struct iv_ca_delta
*
5345 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5346 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5348 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5351 change
->old_cp
= old_cp
;
5352 change
->new_cp
= new_cp
;
5353 change
->next_change
= next_change
;
5358 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5361 static struct iv_ca_delta
*
5362 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5364 struct iv_ca_delta
*last
;
5372 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5374 last
->next_change
= l2
;
5379 /* Reverse the list of changes DELTA, forming the inverse to it. */
5381 static struct iv_ca_delta
*
5382 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5384 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5385 struct cost_pair
*tmp
;
5387 for (act
= delta
; act
; act
= next
)
5389 next
= act
->next_change
;
5390 act
->next_change
= prev
;
5394 act
->old_cp
= act
->new_cp
;
5401 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5402 reverted instead. */
5405 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5406 struct iv_ca_delta
*delta
, bool forward
)
5408 struct cost_pair
*from
, *to
;
5409 struct iv_ca_delta
*act
;
5412 delta
= iv_ca_delta_reverse (delta
);
5414 for (act
= delta
; act
; act
= act
->next_change
)
5418 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5419 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5423 iv_ca_delta_reverse (delta
);
5426 /* Returns true if CAND is used in IVS. */
5429 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5431 return ivs
->n_cand_uses
[cand
->id
] > 0;
5434 /* Returns number of induction variable candidates in the set IVS. */
5437 iv_ca_n_cands (struct iv_ca
*ivs
)
5439 return ivs
->n_cands
;
5442 /* Free the list of changes DELTA. */
5445 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5447 struct iv_ca_delta
*act
, *next
;
5449 for (act
= *delta
; act
; act
= next
)
5451 next
= act
->next_change
;
5458 /* Allocates new iv candidates assignment. */
5460 static struct iv_ca
*
5461 iv_ca_new (struct ivopts_data
*data
)
5463 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5467 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5468 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5469 nw
->cands
= BITMAP_ALLOC (NULL
);
5472 nw
->cand_use_cost
= no_cost
;
5474 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5476 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5477 nw
->num_used_inv_expr
= 0;
5482 /* Free memory occupied by the set IVS. */
5485 iv_ca_free (struct iv_ca
**ivs
)
5487 free ((*ivs
)->cand_for_use
);
5488 free ((*ivs
)->n_cand_uses
);
5489 BITMAP_FREE ((*ivs
)->cands
);
5490 free ((*ivs
)->n_invariant_uses
);
5491 free ((*ivs
)->used_inv_expr
);
5496 /* Dumps IVS to FILE. */
5499 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5501 const char *pref
= " invariants ";
5503 comp_cost cost
= iv_ca_cost (ivs
);
5505 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5506 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5507 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5508 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5510 for (i
= 0; i
< ivs
->upto
; i
++)
5512 struct iv_use
*use
= iv_use (data
, i
);
5513 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5515 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5516 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5518 fprintf (file
, " use:%d --> ??\n", use
->id
);
5521 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5522 if (ivs
->n_invariant_uses
[i
])
5524 fprintf (file
, "%s%d", pref
, i
);
5527 fprintf (file
, "\n\n");
5530 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5531 new set, and store differences in DELTA. Number of induction variables
5532 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5533 the function will try to find a solution with mimimal iv candidates. */
5536 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5537 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5538 unsigned *n_ivs
, bool min_ncand
)
5543 struct cost_pair
*old_cp
, *new_cp
;
5546 for (i
= 0; i
< ivs
->upto
; i
++)
5548 use
= iv_use (data
, i
);
5549 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5552 && old_cp
->cand
== cand
)
5555 new_cp
= get_use_iv_cost (data
, use
, cand
);
5559 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5562 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5565 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5568 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5569 cost
= iv_ca_cost (ivs
);
5571 *n_ivs
= iv_ca_n_cands (ivs
);
5572 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5577 /* Try narrowing set IVS by removing CAND. Return the cost of
5578 the new set and store the differences in DELTA. */
5581 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5582 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5586 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5588 struct iv_cand
*cnd
;
5592 for (i
= 0; i
< n_iv_uses (data
); i
++)
5594 use
= iv_use (data
, i
);
5596 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5597 if (old_cp
->cand
!= cand
)
5602 if (data
->consider_all_candidates
)
5604 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5609 cnd
= iv_cand (data
, ci
);
5611 cp
= get_use_iv_cost (data
, use
, cnd
);
5615 if (!iv_ca_has_deps (ivs
, cp
))
5618 if (!cheaper_cost_pair (cp
, new_cp
))
5626 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5631 cnd
= iv_cand (data
, ci
);
5633 cp
= get_use_iv_cost (data
, use
, cnd
);
5636 if (!iv_ca_has_deps (ivs
, cp
))
5639 if (!cheaper_cost_pair (cp
, new_cp
))
5648 iv_ca_delta_free (delta
);
5649 return infinite_cost
;
5652 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5655 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5656 cost
= iv_ca_cost (ivs
);
5657 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5662 /* Try optimizing the set of candidates IVS by removing candidates different
5663 from to EXCEPT_CAND from it. Return cost of the new set, and store
5664 differences in DELTA. */
5667 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5668 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5671 struct iv_ca_delta
*act_delta
, *best_delta
;
5673 comp_cost best_cost
, acost
;
5674 struct iv_cand
*cand
;
5677 best_cost
= iv_ca_cost (ivs
);
5679 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5681 cand
= iv_cand (data
, i
);
5683 if (cand
== except_cand
)
5686 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5688 if (compare_costs (acost
, best_cost
) < 0)
5691 iv_ca_delta_free (&best_delta
);
5692 best_delta
= act_delta
;
5695 iv_ca_delta_free (&act_delta
);
5704 /* Recurse to possibly remove other unnecessary ivs. */
5705 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5706 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5707 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5708 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5712 /* Tries to extend the sets IVS in the best possible way in order
5713 to express the USE. If ORIGINALP is true, prefer candidates from
5714 the original set of IVs, otherwise favor important candidates not
5715 based on any memory object. */
5718 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5719 struct iv_use
*use
, bool originalp
)
5721 comp_cost best_cost
, act_cost
;
5724 struct iv_cand
*cand
;
5725 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5726 struct cost_pair
*cp
;
5728 iv_ca_add_use (data
, ivs
, use
, false);
5729 best_cost
= iv_ca_cost (ivs
);
5731 cp
= iv_ca_cand_for_use (ivs
, use
);
5736 iv_ca_add_use (data
, ivs
, use
, true);
5737 best_cost
= iv_ca_cost (ivs
);
5738 cp
= iv_ca_cand_for_use (ivs
, use
);
5742 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5743 iv_ca_set_no_cp (data
, ivs
, use
);
5746 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5747 first try important candidates not based on any memory object. Only if
5748 this fails, try the specific ones. Rationale -- in loops with many
5749 variables the best choice often is to use just one generic biv. If we
5750 added here many ivs specific to the uses, the optimization algorithm later
5751 would be likely to get stuck in a local minimum, thus causing us to create
5752 too many ivs. The approach from few ivs to more seems more likely to be
5753 successful -- starting from few ivs, replacing an expensive use by a
5754 specific iv should always be a win. */
5755 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5757 cand
= iv_cand (data
, i
);
5759 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5762 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5765 if (iv_ca_cand_used_p (ivs
, cand
))
5768 cp
= get_use_iv_cost (data
, use
, cand
);
5772 iv_ca_set_cp (data
, ivs
, use
, cp
);
5773 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5775 iv_ca_set_no_cp (data
, ivs
, use
);
5776 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5778 if (compare_costs (act_cost
, best_cost
) < 0)
5780 best_cost
= act_cost
;
5782 iv_ca_delta_free (&best_delta
);
5783 best_delta
= act_delta
;
5786 iv_ca_delta_free (&act_delta
);
5789 if (infinite_cost_p (best_cost
))
5791 for (i
= 0; i
< use
->n_map_members
; i
++)
5793 cp
= use
->cost_map
+ i
;
5798 /* Already tried this. */
5799 if (cand
->important
)
5801 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5803 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5807 if (iv_ca_cand_used_p (ivs
, cand
))
5811 iv_ca_set_cp (data
, ivs
, use
, cp
);
5812 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5813 iv_ca_set_no_cp (data
, ivs
, use
);
5814 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5817 if (compare_costs (act_cost
, best_cost
) < 0)
5819 best_cost
= act_cost
;
5822 iv_ca_delta_free (&best_delta
);
5823 best_delta
= act_delta
;
5826 iv_ca_delta_free (&act_delta
);
5830 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5831 iv_ca_delta_free (&best_delta
);
5833 return !infinite_cost_p (best_cost
);
5836 /* Finds an initial assignment of candidates to uses. */
5838 static struct iv_ca
*
5839 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5841 struct iv_ca
*ivs
= iv_ca_new (data
);
5844 for (i
= 0; i
< n_iv_uses (data
); i
++)
5845 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5854 /* Tries to improve set of induction variables IVS. */
5857 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5860 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5861 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5862 struct iv_cand
*cand
;
5864 /* Try extending the set of induction variables by one. */
5865 for (i
= 0; i
< n_iv_cands (data
); i
++)
5867 cand
= iv_cand (data
, i
);
5869 if (iv_ca_cand_used_p (ivs
, cand
))
5872 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5876 /* If we successfully added the candidate and the set is small enough,
5877 try optimizing it by removing other candidates. */
5878 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5880 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5881 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5882 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5883 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5886 if (compare_costs (acost
, best_cost
) < 0)
5889 iv_ca_delta_free (&best_delta
);
5890 best_delta
= act_delta
;
5893 iv_ca_delta_free (&act_delta
);
5898 /* Try removing the candidates from the set instead. */
5899 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5901 /* Nothing more we can do. */
5906 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5907 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5908 iv_ca_delta_free (&best_delta
);
5912 /* Attempts to find the optimal set of induction variables. We do simple
5913 greedy heuristic -- we try to replace at most one candidate in the selected
5914 solution and remove the unused ivs while this improves the cost. */
5916 static struct iv_ca
*
5917 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5921 /* Get the initial solution. */
5922 set
= get_initial_solution (data
, originalp
);
5925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5926 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5932 fprintf (dump_file
, "Initial set of candidates:\n");
5933 iv_ca_dump (data
, dump_file
, set
);
5936 while (try_improve_iv_set (data
, set
))
5938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5940 fprintf (dump_file
, "Improved to:\n");
5941 iv_ca_dump (data
, dump_file
, set
);
5948 static struct iv_ca
*
5949 find_optimal_iv_set (struct ivopts_data
*data
)
5952 struct iv_ca
*set
, *origset
;
5954 comp_cost cost
, origcost
;
5956 /* Determine the cost based on a strategy that starts with original IVs,
5957 and try again using a strategy that prefers candidates not based
5959 origset
= find_optimal_iv_set_1 (data
, true);
5960 set
= find_optimal_iv_set_1 (data
, false);
5962 if (!origset
&& !set
)
5965 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5966 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5970 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5971 origcost
.cost
, origcost
.complexity
);
5972 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5973 cost
.cost
, cost
.complexity
);
5976 /* Choose the one with the best cost. */
5977 if (compare_costs (origcost
, cost
) <= 0)
5984 iv_ca_free (&origset
);
5986 for (i
= 0; i
< n_iv_uses (data
); i
++)
5988 use
= iv_use (data
, i
);
5989 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5995 /* Creates a new induction variable corresponding to CAND. */
5998 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6000 gimple_stmt_iterator incr_pos
;
6010 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6014 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6022 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6026 /* Mark that the iv is preserved. */
6027 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6028 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6030 /* Rewrite the increment so that it uses var_before directly. */
6031 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6035 gimple_add_tmp_var (cand
->var_before
);
6037 base
= unshare_expr (cand
->iv
->base
);
6039 create_iv (base
, unshare_expr (cand
->iv
->step
),
6040 cand
->var_before
, data
->current_loop
,
6041 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6044 /* Creates new induction variables described in SET. */
6047 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6050 struct iv_cand
*cand
;
6053 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6055 cand
= iv_cand (data
, i
);
6056 create_new_iv (data
, cand
);
6059 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6061 fprintf (dump_file
, "\nSelected IV set: \n");
6062 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6064 cand
= iv_cand (data
, i
);
6065 dump_cand (dump_file
, cand
);
6067 fprintf (dump_file
, "\n");
6071 /* Rewrites USE (definition of iv used in a nonlinear expression)
6072 using candidate CAND. */
6075 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6076 struct iv_use
*use
, struct iv_cand
*cand
)
6081 gimple_stmt_iterator bsi
;
6083 /* An important special case -- if we are asked to express value of
6084 the original iv by itself, just exit; there is no need to
6085 introduce a new computation (that might also need casting the
6086 variable to unsigned and back). */
6087 if (cand
->pos
== IP_ORIGINAL
6088 && cand
->incremented_at
== use
->stmt
)
6090 enum tree_code stmt_code
;
6092 gcc_assert (is_gimple_assign (use
->stmt
));
6093 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6095 /* Check whether we may leave the computation unchanged.
6096 This is the case only if it does not rely on other
6097 computations in the loop -- otherwise, the computation
6098 we rely upon may be removed in remove_unused_ivs,
6099 thus leading to ICE. */
6100 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6101 if (stmt_code
== PLUS_EXPR
6102 || stmt_code
== MINUS_EXPR
6103 || stmt_code
== POINTER_PLUS_EXPR
)
6105 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6106 op
= gimple_assign_rhs2 (use
->stmt
);
6107 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6108 op
= gimple_assign_rhs1 (use
->stmt
);
6115 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6119 comp
= get_computation (data
->current_loop
, use
, cand
);
6120 gcc_assert (comp
!= NULL_TREE
);
6122 switch (gimple_code (use
->stmt
))
6125 tgt
= PHI_RESULT (use
->stmt
);
6127 /* If we should keep the biv, do not replace it. */
6128 if (name_info (data
, tgt
)->preserve_biv
)
6131 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6135 tgt
= gimple_assign_lhs (use
->stmt
);
6136 bsi
= gsi_for_stmt (use
->stmt
);
6143 if (!valid_gimple_rhs_p (comp
)
6144 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6145 /* We can't allow re-allocating the stmt as it might be pointed
6147 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6148 >= gimple_num_ops (gsi_stmt (bsi
)))))
6150 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6151 true, GSI_SAME_STMT
);
6152 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6154 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6155 /* As this isn't a plain copy we have to reset alignment
6157 if (SSA_NAME_PTR_INFO (comp
))
6158 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6162 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6164 ass
= gimple_build_assign (tgt
, comp
);
6165 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6167 bsi
= gsi_for_stmt (use
->stmt
);
6168 remove_phi_node (&bsi
, false);
6172 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6173 use
->stmt
= gsi_stmt (bsi
);
6177 /* Performs a peephole optimization to reorder the iv update statement with
6178 a mem ref to enable instruction combining in later phases. The mem ref uses
6179 the iv value before the update, so the reordering transformation requires
6180 adjustment of the offset. CAND is the selected IV_CAND.
6184 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6192 directly propagating t over to (1) will introduce overlapping live range
6193 thus increase register pressure. This peephole transform it into:
6197 t = MEM_REF (base, iv2, 8, 8);
6204 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6207 gimple iv_update
, stmt
;
6209 gimple_stmt_iterator gsi
, gsi_iv
;
6211 if (cand
->pos
!= IP_NORMAL
)
6214 var_after
= cand
->var_after
;
6215 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6217 bb
= gimple_bb (iv_update
);
6218 gsi
= gsi_last_nondebug_bb (bb
);
6219 stmt
= gsi_stmt (gsi
);
6221 /* Only handle conditional statement for now. */
6222 if (gimple_code (stmt
) != GIMPLE_COND
)
6225 gsi_prev_nondebug (&gsi
);
6226 stmt
= gsi_stmt (gsi
);
6227 if (stmt
!= iv_update
)
6230 gsi_prev_nondebug (&gsi
);
6231 if (gsi_end_p (gsi
))
6234 stmt
= gsi_stmt (gsi
);
6235 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6238 if (stmt
!= use
->stmt
)
6241 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6246 fprintf (dump_file
, "Reordering \n");
6247 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6248 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6249 fprintf (dump_file
, "\n");
6252 gsi
= gsi_for_stmt (use
->stmt
);
6253 gsi_iv
= gsi_for_stmt (iv_update
);
6254 gsi_move_before (&gsi_iv
, &gsi
);
6256 cand
->pos
= IP_BEFORE_USE
;
6257 cand
->incremented_at
= use
->stmt
;
6260 /* Rewrites USE (address that is an iv) using candidate CAND. */
6263 rewrite_use_address (struct ivopts_data
*data
,
6264 struct iv_use
*use
, struct iv_cand
*cand
)
6267 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6268 tree base_hint
= NULL_TREE
;
6272 adjust_iv_update_pos (cand
, use
);
6273 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6275 unshare_aff_combination (&aff
);
6277 /* To avoid undefined overflow problems, all IV candidates use unsigned
6278 integer types. The drawback is that this makes it impossible for
6279 create_mem_ref to distinguish an IV that is based on a memory object
6280 from one that represents simply an offset.
6282 To work around this problem, we pass a hint to create_mem_ref that
6283 indicates which variable (if any) in aff is an IV based on a memory
6284 object. Note that we only consider the candidate. If this is not
6285 based on an object, the base of the reference is in some subexpression
6286 of the use -- but these will use pointer types, so they are recognized
6287 by the create_mem_ref heuristics anyway. */
6288 if (cand
->iv
->base_object
)
6289 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6291 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6292 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6293 reference_alias_ptr_type (*use
->op_p
),
6294 iv
, base_hint
, data
->speed
);
6295 copy_ref_info (ref
, *use
->op_p
);
6299 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6303 rewrite_use_compare (struct ivopts_data
*data
,
6304 struct iv_use
*use
, struct iv_cand
*cand
)
6306 tree comp
, *var_p
, op
, bound
;
6307 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6308 enum tree_code compare
;
6309 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6315 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6316 tree var_type
= TREE_TYPE (var
);
6319 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6321 fprintf (dump_file
, "Replacing exit test: ");
6322 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6325 bound
= unshare_expr (fold_convert (var_type
, bound
));
6326 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6328 gsi_insert_seq_on_edge_immediate (
6329 loop_preheader_edge (data
->current_loop
),
6332 gimple_cond_set_lhs (use
->stmt
, var
);
6333 gimple_cond_set_code (use
->stmt
, compare
);
6334 gimple_cond_set_rhs (use
->stmt
, op
);
6338 /* The induction variable elimination failed; just express the original
6340 comp
= get_computation (data
->current_loop
, use
, cand
);
6341 gcc_assert (comp
!= NULL_TREE
);
6343 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6346 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6347 true, GSI_SAME_STMT
);
6350 /* Rewrites USE using candidate CAND. */
6353 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6357 case USE_NONLINEAR_EXPR
:
6358 rewrite_use_nonlinear_expr (data
, use
, cand
);
6362 rewrite_use_address (data
, use
, cand
);
6366 rewrite_use_compare (data
, use
, cand
);
6373 update_stmt (use
->stmt
);
6376 /* Rewrite the uses using the selected induction variables. */
6379 rewrite_uses (struct ivopts_data
*data
)
6382 struct iv_cand
*cand
;
6385 for (i
= 0; i
< n_iv_uses (data
); i
++)
6387 use
= iv_use (data
, i
);
6388 cand
= use
->selected
;
6391 rewrite_use (data
, use
, cand
);
6395 /* Removes the ivs that are not used after rewriting. */
6398 remove_unused_ivs (struct ivopts_data
*data
)
6402 bitmap toremove
= BITMAP_ALLOC (NULL
);
6404 /* Figure out an order in which to release SSA DEFs so that we don't
6405 release something that we'd have to propagate into a debug stmt
6407 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6409 struct version_info
*info
;
6411 info
= ver_info (data
, j
);
6413 && !integer_zerop (info
->iv
->step
)
6415 && !info
->iv
->have_use_for
6416 && !info
->preserve_biv
)
6418 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6420 tree def
= info
->iv
->ssa_name
;
6422 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6424 imm_use_iterator imm_iter
;
6425 use_operand_p use_p
;
6429 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6431 if (!gimple_debug_bind_p (stmt
))
6434 /* We just want to determine whether to do nothing
6435 (count == 0), to substitute the computed
6436 expression into a single use of the SSA DEF by
6437 itself (count == 1), or to use a debug temp
6438 because the SSA DEF is used multiple times or as
6439 part of a larger expression (count > 1). */
6441 if (gimple_debug_bind_get_value (stmt
) != def
)
6445 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6451 struct iv_use dummy_use
;
6452 struct iv_cand
*best_cand
= NULL
, *cand
;
6453 unsigned i
, best_pref
= 0, cand_pref
;
6455 memset (&dummy_use
, 0, sizeof (dummy_use
));
6456 dummy_use
.iv
= info
->iv
;
6457 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6459 cand
= iv_use (data
, i
)->selected
;
6460 if (cand
== best_cand
)
6462 cand_pref
= operand_equal_p (cand
->iv
->step
,
6466 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6467 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6470 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6472 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6475 best_pref
= cand_pref
;
6482 tree comp
= get_computation_at (data
->current_loop
,
6483 &dummy_use
, best_cand
,
6484 SSA_NAME_DEF_STMT (def
));
6490 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6491 DECL_ARTIFICIAL (vexpr
) = 1;
6492 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6493 if (SSA_NAME_VAR (def
))
6494 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6496 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6497 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6498 gimple_stmt_iterator gsi
;
6500 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6501 gsi
= gsi_after_labels (gimple_bb
6502 (SSA_NAME_DEF_STMT (def
)));
6504 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6506 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6510 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6512 if (!gimple_debug_bind_p (stmt
))
6515 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6516 SET_USE (use_p
, comp
);
6524 release_defs_bitset (toremove
);
6526 BITMAP_FREE (toremove
);
6529 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6530 for pointer_map_traverse. */
6533 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6534 void *data ATTRIBUTE_UNUSED
)
6536 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6542 /* Frees data allocated by the optimization of a single loop. */
6545 free_loop_data (struct ivopts_data
*data
)
6553 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6554 pointer_map_destroy (data
->niters
);
6555 data
->niters
= NULL
;
6558 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6560 struct version_info
*info
;
6562 info
= ver_info (data
, i
);
6565 info
->has_nonlin_use
= false;
6566 info
->preserve_biv
= false;
6569 bitmap_clear (data
->relevant
);
6570 bitmap_clear (data
->important_candidates
);
6572 for (i
= 0; i
< n_iv_uses (data
); i
++)
6574 struct iv_use
*use
= iv_use (data
, i
);
6577 BITMAP_FREE (use
->related_cands
);
6578 for (j
= 0; j
< use
->n_map_members
; j
++)
6579 if (use
->cost_map
[j
].depends_on
)
6580 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6581 free (use
->cost_map
);
6584 data
->iv_uses
.truncate (0);
6586 for (i
= 0; i
< n_iv_cands (data
); i
++)
6588 struct iv_cand
*cand
= iv_cand (data
, i
);
6591 if (cand
->depends_on
)
6592 BITMAP_FREE (cand
->depends_on
);
6595 data
->iv_candidates
.truncate (0);
6597 if (data
->version_info_size
< num_ssa_names
)
6599 data
->version_info_size
= 2 * num_ssa_names
;
6600 free (data
->version_info
);
6601 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6604 data
->max_inv_id
= 0;
6606 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6607 SET_DECL_RTL (obj
, NULL_RTX
);
6609 decl_rtl_to_reset
.truncate (0);
6611 htab_empty (data
->inv_expr_tab
);
6612 data
->inv_expr_id
= 0;
6615 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6619 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6621 free_loop_data (data
);
6622 free (data
->version_info
);
6623 BITMAP_FREE (data
->relevant
);
6624 BITMAP_FREE (data
->important_candidates
);
6626 decl_rtl_to_reset
.release ();
6627 data
->iv_uses
.release ();
6628 data
->iv_candidates
.release ();
6629 htab_delete (data
->inv_expr_tab
);
6632 /* Returns true if the loop body BODY includes any function calls. */
6635 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6637 gimple_stmt_iterator gsi
;
6640 for (i
= 0; i
< num_nodes
; i
++)
6641 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6643 gimple stmt
= gsi_stmt (gsi
);
6644 if (is_gimple_call (stmt
)
6645 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6651 /* Optimizes the LOOP. Returns true if anything changed. */
6654 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6656 bool changed
= false;
6657 struct iv_ca
*iv_ca
;
6658 edge exit
= single_dom_exit (loop
);
6661 gcc_assert (!data
->niters
);
6662 data
->current_loop
= loop
;
6663 data
->speed
= optimize_loop_for_speed_p (loop
);
6665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6667 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6671 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6672 exit
->src
->index
, exit
->dest
->index
);
6673 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6674 fprintf (dump_file
, "\n");
6677 fprintf (dump_file
, "\n");
6680 body
= get_loop_body (loop
);
6681 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6682 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6685 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6687 /* For each ssa name determines whether it behaves as an induction variable
6689 if (!find_induction_variables (data
))
6692 /* Finds interesting uses (item 1). */
6693 find_interesting_uses (data
);
6694 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6697 /* Finds candidates for the induction variables (item 2). */
6698 find_iv_candidates (data
);
6700 /* Calculates the costs (item 3, part 1). */
6701 determine_iv_costs (data
);
6702 determine_use_iv_costs (data
);
6703 determine_set_costs (data
);
6705 /* Find the optimal set of induction variables (item 3, part 2). */
6706 iv_ca
= find_optimal_iv_set (data
);
6711 /* Create the new induction variables (item 4, part 1). */
6712 create_new_ivs (data
, iv_ca
);
6713 iv_ca_free (&iv_ca
);
6715 /* Rewrite the uses (item 4, part 2). */
6716 rewrite_uses (data
);
6718 /* Remove the ivs that are unused after rewriting. */
6719 remove_unused_ivs (data
);
6721 /* We have changed the structure of induction variables; it might happen
6722 that definitions in the scev database refer to some of them that were
6727 free_loop_data (data
);
6732 /* Main entry point. Optimizes induction variables in loops. */
6735 tree_ssa_iv_optimize (void)
6738 struct ivopts_data data
;
6741 tree_ssa_iv_optimize_init (&data
);
6743 /* Optimize the loops starting with the innermost ones. */
6744 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6747 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6749 tree_ssa_iv_optimize_loop (&data
, loop
);
6752 tree_ssa_iv_optimize_finalize (&data
);