1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
103 tree base
; /* Initial value of the iv. */
104 tree base_object
; /* A memory object to that the induction variable points. */
105 tree step
; /* Step of the iv (constant only). */
106 tree ssa_name
; /* The ssa name with the value. */
107 bool biv_p
; /* Is it a biv? */
108 bool have_use_for
; /* Do we already have a use for it? */
109 unsigned use_id
; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
115 tree name
; /* The ssa name. */
116 struct iv
*iv
; /* Induction variable description. */
117 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id
; /* Id of an invariant. */
120 bool preserve_biv
; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
126 struct tree_niter_desc niter
;
127 /* Number of iterations. */
129 unsigned regs_used
; /* Number of registers used. */
135 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
136 USE_OUTER
, /* The induction variable is used outside the loop. */
137 USE_ADDRESS
, /* Use in an address. */
138 USE_COMPARE
/* Use is a compare. */
141 /* The candidate - cost pair. */
144 struct iv_cand
*cand
; /* The candidate. */
145 unsigned cost
; /* The cost. */
146 bitmap depends_on
; /* The list of invariants that have to be
153 unsigned id
; /* The id of the use. */
154 enum use_type type
; /* Type of the use. */
155 struct iv
*iv
; /* The induction variable it is based on. */
156 tree stmt
; /* Statement in that it occurs. */
157 tree
*op_p
; /* The place where it occurs. */
158 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
161 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
162 struct cost_pair
*cost_map
;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand
*selected
;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
172 IP_NORMAL
, /* At the end, just before the exit condition. */
173 IP_END
, /* At the end of the latch block. */
174 IP_ORIGINAL
/* The original biv. */
177 /* The induction variable candidate. */
180 unsigned id
; /* The number of the candidate. */
181 bool important
; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos
; /* Where it is computed. */
184 tree incremented_at
; /* For original biv, the statement where it is
186 tree var_before
; /* The variable used for it before increment. */
187 tree var_after
; /* The variable used for it after increment. */
188 struct iv
*iv
; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost
; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
199 /* The currently optimized loop. */
200 struct loop
*current_loop
;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size
;
205 /* The array of information for the ssa names. */
206 struct version_info
*version_info
;
208 /* The bitmap of indices in version_info whose value was changed. */
211 /* The maximum invariant id. */
214 /* The uses of induction variables. */
217 /* The candidates. */
218 varray_type iv_candidates
;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates
;
223 /* Whether to consider just related and important candidates when replacing a
225 bool consider_all_candidates
;
228 /* An assignment of iv candidates to uses. */
232 /* The number of uses covered by the assignment. */
235 /* Number of uses that cannot be expressed by the candidates in the set. */
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair
**cand_for_use
;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses
;
244 /* The candidates used. */
247 /* The number of candidates in the set. */
250 /* Total number of registers needed. */
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost
;
256 /* Total cost of candidates. */
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses
;
262 /* Total cost of the assignment. */
266 /* Difference of two iv candidate assignments. */
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair
*old_cp
;
276 /* A new assignment. */
277 struct cost_pair
*new_cp
;
279 /* Next change in the list. */
280 struct iv_ca_delta
*next_change
;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
303 static varray_type decl_rtl_to_reset
;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data
*data
)
310 return VARRAY_ACTIVE_SIZE (data
->iv_uses
);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use
*
316 iv_use (struct ivopts_data
*data
, unsigned i
)
318 return VARRAY_GENERIC_PTR_NOGC (data
->iv_uses
, i
);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data
*data
)
326 return VARRAY_ACTIVE_SIZE (data
->iv_candidates
);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand
*
332 iv_cand (struct ivopts_data
*data
, unsigned i
)
334 return VARRAY_GENERIC_PTR_NOGC (data
->iv_candidates
, i
);
337 /* The data for LOOP. */
339 static inline struct loop_data
*
340 loop_data (struct loop
*loop
)
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 single_dom_exit (struct loop
*loop
)
350 edge exit
= loop
->single_exit
;
355 if (!just_once_each_iteration_p (loop
, exit
->src
))
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv
*);
365 dump_iv (FILE *file
, struct iv
*iv
)
369 fprintf (file
, "ssa name ");
370 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
371 fprintf (file
, "\n");
374 fprintf (file
, " type ");
375 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
376 fprintf (file
, "\n");
380 fprintf (file
, " base ");
381 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
382 fprintf (file
, "\n");
384 fprintf (file
, " step ");
385 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
386 fprintf (file
, "\n");
390 fprintf (file
, " invariant ");
391 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
392 fprintf (file
, "\n");
397 fprintf (file
, " base object ");
398 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
399 fprintf (file
, "\n");
403 fprintf (file
, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use
*);
410 dump_use (FILE *file
, struct iv_use
*use
)
412 fprintf (file
, "use %d\n", use
->id
);
416 case USE_NONLINEAR_EXPR
:
417 fprintf (file
, " generic\n");
421 fprintf (file
, " outside\n");
425 fprintf (file
, " address\n");
429 fprintf (file
, " compare\n");
436 fprintf (file
, " in statement ");
437 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
438 fprintf (file
, "\n");
440 fprintf (file
, " at position ");
442 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
443 fprintf (file
, "\n");
445 dump_iv (file
, use
->iv
);
447 if (use
->related_cands
)
449 fprintf (file
, " related candidates ");
450 dump_bitmap (file
, use
->related_cands
);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data
*);
458 dump_uses (FILE *file
, struct ivopts_data
*data
)
463 for (i
= 0; i
< n_iv_uses (data
); i
++)
465 use
= iv_use (data
, i
);
467 dump_use (file
, use
);
468 fprintf (file
, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand
*);
476 dump_cand (FILE *file
, struct iv_cand
*cand
)
478 struct iv
*iv
= cand
->iv
;
480 fprintf (file
, "candidate %d%s\n",
481 cand
->id
, cand
->important
? " (important)" : "");
485 fprintf (file
, " final value replacement\n");
492 fprintf (file
, " incremented before exit test\n");
496 fprintf (file
, " incremented at end\n");
500 fprintf (file
, " original biv\n");
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info
*
510 ver_info (struct ivopts_data
*data
, unsigned ver
)
512 return data
->version_info
+ ver
;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info
*
518 name_info (struct ivopts_data
*data
, tree name
)
520 return ver_info (data
, SSA_NAME_VERSION (name
));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
527 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
530 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
531 unsigned HOST_WIDE_INT inv
, ex
, val
;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a
& 1) && !(b
& 1))
548 /* If b is still even, a is odd and there is no such x. */
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
556 for (i
= 0; i
< bits
- 1; i
++)
558 inv
= (inv
* ex
) & mask
;
559 ex
= (ex
* ex
) & mask
;
562 val
= (a
* inv
) & mask
;
564 gcc_assert (((val
* b
) & mask
) == a
);
566 if ((val
>> (bits
- 1)) & 1)
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
578 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
580 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
584 if (sbb
== loop
->latch
)
590 return stmt
== last_stmt (bb
);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
597 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
599 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
600 basic_block stmt_bb
= bb_for_stmt (stmt
);
601 block_stmt_iterator bsi
;
603 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
606 if (stmt_bb
!= cand_bb
)
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
613 if (bsi_stmt (bsi
) == cand
->incremented_at
)
615 if (bsi_stmt (bsi
) == stmt
)
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
624 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
632 return stmt_after_ip_normal_pos (loop
, stmt
);
635 return stmt_after_ip_original_pos (cand
, stmt
);
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
646 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
650 data
->version_info_size
= 2 * num_ssa_names
;
651 data
->version_info
= xcalloc (data
->version_info_size
,
652 sizeof (struct version_info
));
653 data
->relevant
= BITMAP_XMALLOC ();
654 data
->important_candidates
= BITMAP_XMALLOC ();
655 data
->max_inv_id
= 0;
657 for (i
= 1; i
< loops
->num
; i
++)
658 if (loops
->parray
[i
])
659 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_uses
, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_candidates
, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset
, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
670 determine_base_object (tree expr
)
672 enum tree_code code
= TREE_CODE (expr
);
673 tree base
, obj
, op0
, op1
;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
684 obj
= TREE_OPERAND (expr
, 0);
685 base
= get_base_address (obj
);
688 return fold_convert (ptr_type_node
, expr
);
690 if (TREE_CODE (base
) == INDIRECT_REF
)
691 return fold_convert (ptr_type_node
, TREE_OPERAND (base
, 0));
693 return fold (build1 (ADDR_EXPR
, ptr_type_node
, base
));
697 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
698 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
704 return (code
== PLUS_EXPR
706 : fold (build1 (NEGATE_EXPR
, ptr_type_node
, op1
)));
708 return fold (build (code
, ptr_type_node
, op0
, op1
));
711 return fold_convert (ptr_type_node
, expr
);
715 /* Allocates an induction variable with given initial value BASE and step STEP
719 alloc_iv (tree base
, tree step
)
721 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
723 if (step
&& integer_zerop (step
))
727 iv
->base_object
= determine_base_object (base
);
730 iv
->have_use_for
= false;
732 iv
->ssa_name
= NULL_TREE
;
737 /* Sets STEP and BASE for induction variable IV. */
740 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
742 struct version_info
*info
= name_info (data
, iv
);
744 gcc_assert (!info
->iv
);
746 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
747 info
->iv
= alloc_iv (base
, step
);
748 info
->iv
->ssa_name
= iv
;
751 /* Finds induction variable declaration for VAR. */
754 get_iv (struct ivopts_data
*data
, tree var
)
758 if (!name_info (data
, var
)->iv
)
760 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
763 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
764 set_iv (data
, var
, var
, NULL_TREE
);
767 return name_info (data
, var
)->iv
;
770 /* Determines the step of a biv defined in PHI. */
773 determine_biv_step (tree phi
)
775 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
776 tree name
= PHI_RESULT (phi
), base
, step
;
777 tree type
= TREE_TYPE (name
);
779 if (!is_gimple_reg (name
))
782 if (!simple_iv (loop
, phi
, name
, &base
, &step
))
786 return build_int_cst (type
, 0);
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
794 abnormal_ssa_name_p (tree exp
)
799 if (TREE_CODE (exp
) != SSA_NAME
)
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
809 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
810 void *data ATTRIBUTE_UNUSED
)
812 if (TREE_CODE (base
) == ARRAY_REF
)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
816 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
820 return !abnormal_ssa_name_p (*index
);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
827 contains_abnormal_ssa_name_p (tree expr
)
829 enum tree_code code
= TREE_CODE (expr
);
830 enum tree_code_class
class = TREE_CODE_CLASS (code
);
832 if (code
== SSA_NAME
)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
835 if (code
== INTEGER_CST
836 || is_gimple_min_invariant (expr
))
839 if (code
== ADDR_EXPR
)
840 return !for_each_index (&TREE_OPERAND (expr
, 0),
841 idx_contains_abnormal_ssa_name_p
,
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
865 /* Finds basic ivs. */
868 find_bivs (struct ivopts_data
*data
)
870 tree phi
, step
, type
, base
;
872 struct loop
*loop
= data
->current_loop
;
874 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
879 step
= determine_biv_step (phi
);
883 if (cst_and_fits_in_hwi (step
)
884 && int_cst_value (step
) == 0)
887 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
888 if (contains_abnormal_ssa_name_p (base
))
891 type
= TREE_TYPE (PHI_RESULT (phi
));
892 base
= fold_convert (type
, base
);
893 step
= fold_convert (type
, step
);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step
))
900 set_iv (data
, PHI_RESULT (phi
), base
, step
);
907 /* Marks basic ivs. */
910 mark_bivs (struct ivopts_data
*data
)
913 struct iv
*iv
, *incr_iv
;
914 struct loop
*loop
= data
->current_loop
;
917 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
919 iv
= get_iv (data
, PHI_RESULT (phi
));
923 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
924 incr_iv
= get_iv (data
, var
);
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
930 if (incr_bb
->loop_father
!= data
->current_loop
931 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
935 incr_iv
->biv_p
= true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
943 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
944 tree
*base
, tree
*step
)
947 struct loop
*loop
= data
->current_loop
;
952 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
955 lhs
= TREE_OPERAND (stmt
, 0);
956 if (TREE_CODE (lhs
) != SSA_NAME
)
959 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
))
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
965 && !cst_and_fits_in_hwi (*step
))
968 if (contains_abnormal_ssa_name_p (*base
))
974 /* Finds general ivs in statement STMT. */
977 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
981 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
984 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
987 /* Finds general ivs in basic block BB. */
990 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
992 block_stmt_iterator bsi
;
994 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
995 find_givs_in_stmt (data
, bsi_stmt (bsi
));
998 /* Finds general ivs. */
1001 find_givs (struct ivopts_data
*data
)
1003 struct loop
*loop
= data
->current_loop
;
1004 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1007 for (i
= 0; i
< loop
->num_nodes
; i
++)
1008 find_givs_in_bb (data
, body
[i
]);
1012 /* Determine the number of iterations of the current loop. */
1015 determine_number_of_iterations (struct ivopts_data
*data
)
1017 struct loop
*loop
= data
->current_loop
;
1018 edge exit
= single_dom_exit (loop
);
1023 number_of_iterations_exit (loop
, exit
, &loop_data (loop
)->niter
);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1030 find_induction_variables (struct ivopts_data
*data
)
1033 struct loop
*loop
= data
->current_loop
;
1036 if (!find_bivs (data
))
1041 determine_number_of_iterations (data
);
1043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1045 if (loop_data (loop
)->niter
.niter
)
1047 fprintf (dump_file
, " number of iterations ");
1048 print_generic_expr (dump_file
, loop_data (loop
)->niter
.niter
,
1050 fprintf (dump_file
, "\n");
1052 fprintf (dump_file
, " may be zero if ");
1053 print_generic_expr (dump_file
, loop_data (loop
)->niter
.may_be_zero
,
1055 fprintf (dump_file
, "\n");
1057 fprintf (dump_file
, " bogus unless ");
1058 print_generic_expr (dump_file
, loop_data (loop
)->niter
.assumptions
,
1060 fprintf (dump_file
, "\n");
1061 fprintf (dump_file
, "\n");
1064 fprintf (dump_file
, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1068 if (ver_info (data
, i
)->iv
)
1069 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use
*
1079 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1080 tree stmt
, enum use_type use_type
)
1082 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1084 use
->id
= n_iv_uses (data
);
1085 use
->type
= use_type
;
1089 use
->related_cands
= BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1093 iv
->ssa_name
= NULL_TREE
;
1095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1096 dump_use (dump_file
, use
);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_uses
, use
);
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1108 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1111 struct version_info
*info
;
1113 if (TREE_CODE (op
) != SSA_NAME
1114 || !is_gimple_reg (op
))
1117 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1119 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1122 info
= name_info (data
, op
);
1124 info
->has_nonlin_use
|= nonlinear_use
;
1126 info
->inv_id
= ++data
->max_inv_id
;
1127 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1130 /* Checks whether the use OP is interesting and if so, records it
1133 static struct iv_use
*
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1142 if (TREE_CODE (op
) != SSA_NAME
)
1145 iv
= get_iv (data
, op
);
1149 if (iv
->have_use_for
)
1151 use
= iv_use (data
, iv
->use_id
);
1153 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1154 || use
->type
== USE_OUTER
);
1156 if (type
== USE_NONLINEAR_EXPR
)
1157 use
->type
= USE_NONLINEAR_EXPR
;
1161 if (zero_p (iv
->step
))
1163 record_invariant (data
, op
, true);
1166 iv
->have_use_for
= true;
1168 civ
= xmalloc (sizeof (struct iv
));
1171 stmt
= SSA_NAME_DEF_STMT (op
);
1172 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1173 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1175 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1176 iv
->use_id
= use
->id
;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use
*
1184 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1186 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1189 /* Records a definition of induction variable OP that is used outside of the
1192 static struct iv_use
*
1193 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1195 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1202 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1206 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1208 tree zero
= integer_zero_node
;
1210 const_iv
.step
= NULL_TREE
;
1212 if (integer_zerop (*cond_p
)
1213 || integer_nonzerop (*cond_p
))
1216 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1223 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1224 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1227 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1228 iv0
= get_iv (data
, *op0_p
);
1232 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1233 iv1
= get_iv (data
, *op1_p
);
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1244 find_interesting_uses_op (data
, *op0_p
);
1245 find_interesting_uses_op (data
, *op1_p
);
1249 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1251 /* If both are invariants, this is a work for unswitching. */
1255 civ
= xmalloc (sizeof (struct iv
));
1256 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1257 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1264 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1269 if (is_gimple_min_invariant (expr
))
1272 if (TREE_CODE (expr
) == SSA_NAME
)
1274 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1276 && flow_bb_inside_loop_p (loop
, def_bb
))
1285 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1286 for (i
= 0; i
< len
; i
++)
1287 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data
*ivopts_data
;
1305 idx_find_step (tree base
, tree
*idx
, void *data
)
1307 struct ifs_ivopts_data
*dta
= data
;
1309 tree step
, type
, iv_type
, iv_step
, lbound
, off
;
1310 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1312 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1316 /* If base is a component ref, require that the offset of the reference
1318 if (TREE_CODE (base
) == COMPONENT_REF
)
1320 off
= component_ref_field_offset (base
);
1321 return expr_invariant_in_loop_p (loop
, off
);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base
) == ARRAY_REF
)
1330 step
= array_ref_element_size (base
);
1331 lbound
= array_ref_low_bound (base
);
1333 if (!expr_invariant_in_loop_p (loop
, step
)
1334 || !expr_invariant_in_loop_p (loop
, lbound
))
1338 if (TREE_CODE (*idx
) != SSA_NAME
)
1341 iv
= get_iv (dta
->ivopts_data
, *idx
);
1350 iv_type
= TREE_TYPE (iv
->base
);
1351 type
= build_pointer_type (TREE_TYPE (base
));
1352 if (TREE_CODE (base
) == ARRAY_REF
)
1354 step
= array_ref_element_size (base
);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step
) != INTEGER_CST
)
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step
= build_int_cst (type
, 1);
1364 if (TYPE_PRECISION (iv_type
) < TYPE_PRECISION (type
))
1365 iv_step
= can_count_iv_in_wider_type (dta
->ivopts_data
->current_loop
,
1366 type
, iv
->base
, iv
->step
, dta
->stmt
);
1368 iv_step
= fold_convert (iv_type
, iv
->step
);
1372 /* The index might wrap. */
1376 step
= fold_binary_to_constant (MULT_EXPR
, type
, step
, iv_step
);
1379 *dta
->step_p
= step
;
1381 *dta
->step_p
= fold_binary_to_constant (PLUS_EXPR
, type
,
1382 *dta
->step_p
, step
);
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1391 idx_record_use (tree base
, tree
*idx
,
1394 find_interesting_uses_op (data
, *idx
);
1395 if (TREE_CODE (base
) == ARRAY_REF
)
1397 find_interesting_uses_op (data
, array_ref_element_size (base
));
1398 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1403 /* Returns true if memory reference REF may be unaligned. */
1406 may_be_unaligned_p (tree ref
)
1410 HOST_WIDE_INT bitsize
;
1411 HOST_WIDE_INT bitpos
;
1413 enum machine_mode mode
;
1414 int unsignedp
, volatilep
;
1415 unsigned base_align
;
1417 /* The test below is basically copy of what expr.c:normal_inner_ref
1418 does to check whether the object must be loaded by parts when
1419 STRICT_ALIGNMENT is true. */
1420 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1421 &unsignedp
, &volatilep
, true);
1422 base_type
= TREE_TYPE (base
);
1423 base_align
= TYPE_ALIGN (base_type
);
1426 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1427 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1428 || bitpos
% BITS_PER_UNIT
!= 0))
1434 /* Finds addresses in *OP_P inside STMT. */
1437 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1439 tree base
= unshare_expr (*op_p
), step
= NULL
;
1441 struct ifs_ivopts_data ifs_ivopts_data
;
1443 /* Ignore bitfields for now. Not really something terribly complicated
1445 if (TREE_CODE (base
) == COMPONENT_REF
1446 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1449 if (STRICT_ALIGNMENT
1450 && may_be_unaligned_p (base
))
1453 ifs_ivopts_data
.ivopts_data
= data
;
1454 ifs_ivopts_data
.stmt
= stmt
;
1455 ifs_ivopts_data
.step_p
= &step
;
1456 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1460 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1461 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1463 if (TREE_CODE (base
) == INDIRECT_REF
)
1464 base
= TREE_OPERAND (base
, 0);
1466 base
= build_addr (base
);
1468 civ
= alloc_iv (base
, step
);
1469 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1473 for_each_index (op_p
, idx_record_use
, data
);
1476 /* Finds and records invariants used in STMT. */
1479 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1481 use_optype uses
= NULL
;
1485 if (TREE_CODE (stmt
) == PHI_NODE
)
1486 n
= PHI_NUM_ARGS (stmt
);
1489 get_stmt_operands (stmt
);
1490 uses
= STMT_USE_OPS (stmt
);
1491 n
= NUM_USES (uses
);
1494 for (i
= 0; i
< n
; i
++)
1496 if (TREE_CODE (stmt
) == PHI_NODE
)
1497 op
= PHI_ARG_DEF (stmt
, i
);
1499 op
= USE_OP (uses
, i
);
1501 record_invariant (data
, op
, false);
1505 /* Finds interesting uses of induction variables in the statement STMT. */
1508 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1512 use_optype uses
= NULL
;
1515 find_invariants_stmt (data
, stmt
);
1517 if (TREE_CODE (stmt
) == COND_EXPR
)
1519 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1523 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1525 lhs
= TREE_OPERAND (stmt
, 0);
1526 rhs
= TREE_OPERAND (stmt
, 1);
1528 if (TREE_CODE (lhs
) == SSA_NAME
)
1530 /* If the statement defines an induction variable, the uses are not
1531 interesting by themselves. */
1533 iv
= get_iv (data
, lhs
);
1535 if (iv
&& !zero_p (iv
->step
))
1539 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1541 case tcc_comparison
:
1542 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1546 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1547 if (REFERENCE_CLASS_P (lhs
))
1548 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1554 if (REFERENCE_CLASS_P (lhs
)
1555 && is_gimple_val (rhs
))
1557 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1558 find_interesting_uses_op (data
, rhs
);
1562 /* TODO -- we should also handle address uses of type
1564 memory = call (whatever);
1571 if (TREE_CODE (stmt
) == PHI_NODE
1572 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1574 lhs
= PHI_RESULT (stmt
);
1575 iv
= get_iv (data
, lhs
);
1577 if (iv
&& !zero_p (iv
->step
))
1581 if (TREE_CODE (stmt
) == PHI_NODE
)
1582 n
= PHI_NUM_ARGS (stmt
);
1585 uses
= STMT_USE_OPS (stmt
);
1586 n
= NUM_USES (uses
);
1589 for (i
= 0; i
< n
; i
++)
1591 if (TREE_CODE (stmt
) == PHI_NODE
)
1592 op
= PHI_ARG_DEF (stmt
, i
);
1594 op
= USE_OP (uses
, i
);
1596 if (TREE_CODE (op
) != SSA_NAME
)
1599 iv
= get_iv (data
, op
);
1603 find_interesting_uses_op (data
, op
);
1607 /* Finds interesting uses of induction variables outside of loops
1608 on loop exit edge EXIT. */
1611 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1615 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1617 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1618 find_interesting_uses_outer (data
, def
);
1622 /* Finds uses of the induction variables that are interesting. */
1625 find_interesting_uses (struct ivopts_data
*data
)
1628 block_stmt_iterator bsi
;
1630 basic_block
*body
= get_loop_body (data
->current_loop
);
1632 struct version_info
*info
;
1635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1636 fprintf (dump_file
, "Uses:\n\n");
1638 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1643 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1644 if (e
->dest
!= EXIT_BLOCK_PTR
1645 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1646 find_interesting_uses_outside (data
, e
);
1648 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1649 find_interesting_uses_stmt (data
, phi
);
1650 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1651 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1654 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1658 fprintf (dump_file
, "\n");
1660 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1662 info
= ver_info (data
, i
);
1665 fprintf (dump_file
, " ");
1666 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1667 fprintf (dump_file
, " is invariant (%d)%s\n",
1668 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1672 fprintf (dump_file
, "\n");
1678 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1679 position to POS. If USE is not NULL, the candidate is set as related to
1680 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1681 replacement of the final value of the iv by a direct computation. */
1683 static struct iv_cand
*
1684 add_candidate_1 (struct ivopts_data
*data
,
1685 tree base
, tree step
, bool important
, enum iv_position pos
,
1686 struct iv_use
*use
, tree incremented_at
)
1689 struct iv_cand
*cand
= NULL
;
1694 type
= TREE_TYPE (base
);
1695 if (!TYPE_UNSIGNED (type
))
1697 type
= unsigned_type_for (type
);
1698 base
= fold_convert (type
, base
);
1700 step
= fold_convert (type
, step
);
1704 for (i
= 0; i
< n_iv_cands (data
); i
++)
1706 cand
= iv_cand (data
, i
);
1708 if (cand
->pos
!= pos
)
1711 if (cand
->incremented_at
!= incremented_at
)
1725 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1728 if (zero_p (cand
->iv
->step
))
1735 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1740 if (i
== n_iv_cands (data
))
1742 cand
= xcalloc (1, sizeof (struct iv_cand
));
1748 cand
->iv
= alloc_iv (base
, step
);
1751 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1753 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
1754 cand
->var_after
= cand
->var_before
;
1756 cand
->important
= important
;
1757 cand
->incremented_at
= incremented_at
;
1758 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_candidates
, cand
);
1760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1761 dump_cand (dump_file
, cand
);
1764 if (important
&& !cand
->important
)
1766 cand
->important
= true;
1767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1768 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
1773 bitmap_set_bit (use
->related_cands
, i
);
1774 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1775 fprintf (dump_file
, "Candidate %d is related to use %d\n",
1782 /* Returns true if incrementing the induction variable at the end of the LOOP
1785 The purpose is to avoid splitting latch edge with a biv increment, thus
1786 creating a jump, possibly confusing other optimization passes and leaving
1787 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1788 is not available (so we do not have a better alternative), or if the latch
1789 edge is already nonempty. */
1792 allow_ip_end_pos_p (struct loop
*loop
)
1794 if (!ip_normal_pos (loop
))
1797 if (!empty_block_p (ip_end_pos (loop
)))
1803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1804 position to POS. If USE is not NULL, the candidate is set as related to
1805 it. The candidate computation is scheduled on all available positions. */
1808 add_candidate (struct ivopts_data
*data
,
1809 tree base
, tree step
, bool important
, struct iv_use
*use
)
1811 if (ip_normal_pos (data
->current_loop
))
1812 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
1813 if (ip_end_pos (data
->current_loop
)
1814 && allow_ip_end_pos_p (data
->current_loop
))
1815 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
1818 /* Adds standard iv candidates. */
1821 add_standard_iv_candidates (struct ivopts_data
*data
)
1823 /* Add 0 + 1 * iteration candidate. */
1824 add_candidate (data
,
1825 build_int_cst (unsigned_intSI_type_node
, 0),
1826 build_int_cst (unsigned_intSI_type_node
, 1),
1829 /* The same for a long type if it is still fast enough. */
1830 if (BITS_PER_WORD
> 32)
1831 add_candidate (data
,
1832 build_int_cst (unsigned_intDI_type_node
, 0),
1833 build_int_cst (unsigned_intDI_type_node
, 1),
1838 /* Adds candidates bases on the old induction variable IV. */
1841 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
1844 struct iv_cand
*cand
;
1846 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
1848 /* The same, but with initial value zero. */
1849 add_candidate (data
,
1850 build_int_cst (TREE_TYPE (iv
->base
), 0),
1851 iv
->step
, true, NULL
);
1853 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
1854 if (TREE_CODE (phi
) == PHI_NODE
)
1856 /* Additionally record the possibility of leaving the original iv
1858 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
1859 cand
= add_candidate_1 (data
,
1860 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
1861 SSA_NAME_DEF_STMT (def
));
1862 cand
->var_before
= iv
->ssa_name
;
1863 cand
->var_after
= def
;
1867 /* Adds candidates based on the old induction variables. */
1870 add_old_ivs_candidates (struct ivopts_data
*data
)
1876 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1878 iv
= ver_info (data
, i
)->iv
;
1879 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
1880 add_old_iv_candidates (data
, iv
);
1884 /* Adds candidates based on the value of the induction variable IV and USE. */
1887 add_iv_value_candidates (struct ivopts_data
*data
,
1888 struct iv
*iv
, struct iv_use
*use
)
1890 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
1892 /* The same, but with initial value zero. */
1893 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
1894 iv
->step
, false, use
);
1897 /* Adds candidates based on the address IV and USE. */
1900 add_address_candidates (struct ivopts_data
*data
,
1901 struct iv
*iv
, struct iv_use
*use
)
1903 tree base
, abase
, tmp
, *act
;
1905 /* First, the trivial choices. */
1906 add_iv_value_candidates (data
, iv
, use
);
1908 /* Second, try removing the COMPONENT_REFs. */
1909 if (TREE_CODE (iv
->base
) == ADDR_EXPR
)
1911 base
= TREE_OPERAND (iv
->base
, 0);
1912 while (TREE_CODE (base
) == COMPONENT_REF
1913 || (TREE_CODE (base
) == ARRAY_REF
1914 && TREE_CODE (TREE_OPERAND (base
, 1)) == INTEGER_CST
))
1915 base
= TREE_OPERAND (base
, 0);
1917 if (base
!= TREE_OPERAND (iv
->base
, 0))
1919 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1920 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1922 if (TREE_CODE (base
) == INDIRECT_REF
)
1923 base
= TREE_OPERAND (base
, 0);
1925 base
= build_addr (base
);
1926 add_candidate (data
, base
, iv
->step
, false, use
);
1930 /* Third, try removing the constant offset. */
1932 while (TREE_CODE (abase
) == PLUS_EXPR
1933 && TREE_CODE (TREE_OPERAND (abase
, 1)) != INTEGER_CST
)
1934 abase
= TREE_OPERAND (abase
, 0);
1935 /* We found the offset, so make the copy of the non-shared part and
1937 if (TREE_CODE (abase
) == PLUS_EXPR
)
1942 for (tmp
= iv
->base
; tmp
!= abase
; tmp
= TREE_OPERAND (tmp
, 0))
1944 *act
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
),
1945 NULL_TREE
, TREE_OPERAND (tmp
, 1));
1946 act
= &TREE_OPERAND (*act
, 0);
1948 *act
= TREE_OPERAND (tmp
, 0);
1950 add_candidate (data
, base
, iv
->step
, false, use
);
1954 /* Possibly adds pseudocandidate for replacing the final value of USE by
1955 a direct computation. */
1958 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
1960 struct tree_niter_desc
*niter
;
1961 struct loop
*loop
= data
->current_loop
;
1963 /* We must know where we exit the loop and how many times does it roll. */
1964 if (!single_dom_exit (loop
))
1967 niter
= &loop_data (loop
)->niter
;
1969 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
1970 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
1973 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
1976 /* Adds candidates based on the uses. */
1979 add_derived_ivs_candidates (struct ivopts_data
*data
)
1983 for (i
= 0; i
< n_iv_uses (data
); i
++)
1985 struct iv_use
*use
= iv_use (data
, i
);
1992 case USE_NONLINEAR_EXPR
:
1994 /* Just add the ivs based on the value of the iv used here. */
1995 add_iv_value_candidates (data
, use
->iv
, use
);
1999 add_iv_value_candidates (data
, use
->iv
, use
);
2001 /* Additionally, add the pseudocandidate for the possibility to
2002 replace the final value by a direct computation. */
2003 add_iv_outer_candidates (data
, use
);
2007 add_address_candidates (data
, use
->iv
, use
);
2016 /* Record important candidates and add them to related_cands bitmaps
2020 record_important_candidates (struct ivopts_data
*data
)
2025 for (i
= 0; i
< n_iv_cands (data
); i
++)
2027 struct iv_cand
*cand
= iv_cand (data
, i
);
2029 if (cand
->important
)
2030 bitmap_set_bit (data
->important_candidates
, i
);
2033 data
->consider_all_candidates
= (n_iv_cands (data
)
2034 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2036 if (data
->consider_all_candidates
)
2038 /* We will not need "related_cands" bitmaps in this case,
2039 so release them to decrease peak memory consumption. */
2040 for (i
= 0; i
< n_iv_uses (data
); i
++)
2042 use
= iv_use (data
, i
);
2043 BITMAP_XFREE (use
->related_cands
);
2048 /* Add important candidates to the related_cands bitmaps. */
2049 for (i
= 0; i
< n_iv_uses (data
); i
++)
2050 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2051 data
->important_candidates
);
2055 /* Finds the candidates for the induction variables. */
2058 find_iv_candidates (struct ivopts_data
*data
)
2060 /* Add commonly used ivs. */
2061 add_standard_iv_candidates (data
);
2063 /* Add old induction variables. */
2064 add_old_ivs_candidates (data
);
2066 /* Add induction variables derived from uses. */
2067 add_derived_ivs_candidates (data
);
2069 /* Record the important candidates. */
2070 record_important_candidates (data
);
2073 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2074 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2075 we allocate a simple list to every use. */
2078 alloc_use_cost_map (struct ivopts_data
*data
)
2080 unsigned i
, size
, s
, j
;
2082 for (i
= 0; i
< n_iv_uses (data
); i
++)
2084 struct iv_use
*use
= iv_use (data
, i
);
2087 if (data
->consider_all_candidates
)
2088 size
= n_iv_cands (data
);
2092 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2097 /* Round up to the power of two, so that moduling by it is fast. */
2098 for (size
= 1; size
< s
; size
<<= 1)
2102 use
->n_map_members
= size
;
2103 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2107 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2108 on invariants DEPENDS_ON. */
2111 set_use_iv_cost (struct ivopts_data
*data
,
2112 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2119 BITMAP_XFREE (depends_on
);
2123 if (data
->consider_all_candidates
)
2125 use
->cost_map
[cand
->id
].cand
= cand
;
2126 use
->cost_map
[cand
->id
].cost
= cost
;
2127 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2131 /* n_map_members is a power of two, so this computes modulo. */
2132 s
= cand
->id
& (use
->n_map_members
- 1);
2133 for (i
= s
; i
< use
->n_map_members
; i
++)
2134 if (!use
->cost_map
[i
].cand
)
2136 for (i
= 0; i
< s
; i
++)
2137 if (!use
->cost_map
[i
].cand
)
2143 use
->cost_map
[i
].cand
= cand
;
2144 use
->cost_map
[i
].cost
= cost
;
2145 use
->cost_map
[i
].depends_on
= depends_on
;
2148 /* Gets cost of (USE, CANDIDATE) pair. */
2150 static struct cost_pair
*
2151 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2152 struct iv_cand
*cand
)
2155 struct cost_pair
*ret
;
2160 if (data
->consider_all_candidates
)
2162 ret
= use
->cost_map
+ cand
->id
;
2169 /* n_map_members is a power of two, so this computes modulo. */
2170 s
= cand
->id
& (use
->n_map_members
- 1);
2171 for (i
= s
; i
< use
->n_map_members
; i
++)
2172 if (use
->cost_map
[i
].cand
== cand
)
2173 return use
->cost_map
+ i
;
2175 for (i
= 0; i
< s
; i
++)
2176 if (use
->cost_map
[i
].cand
== cand
)
2177 return use
->cost_map
+ i
;
2182 /* Returns estimate on cost of computing SEQ. */
2190 for (; seq
; seq
= NEXT_INSN (seq
))
2192 set
= single_set (seq
);
2194 cost
+= rtx_cost (set
, SET
);
2202 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2204 produce_memory_decl_rtl (tree obj
, int *regno
)
2209 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2211 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2212 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2215 x
= gen_raw_REG (Pmode
, (*regno
)++);
2217 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2220 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2221 walk_tree. DATA contains the actual fake register number. */
2224 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2226 tree obj
= NULL_TREE
;
2230 switch (TREE_CODE (*expr_p
))
2233 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2234 handled_component_p (*expr_p
);
2235 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2239 x
= produce_memory_decl_rtl (obj
, regno
);
2244 obj
= SSA_NAME_VAR (*expr_p
);
2245 if (!DECL_RTL_SET_P (obj
))
2246 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2255 if (DECL_RTL_SET_P (obj
))
2258 if (DECL_MODE (obj
) == BLKmode
)
2259 x
= produce_memory_decl_rtl (obj
, regno
);
2261 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2271 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset
, obj
);
2272 SET_DECL_RTL (obj
, x
);
2278 /* Determines cost of the computation of EXPR. */
2281 computation_cost (tree expr
)
2284 tree type
= TREE_TYPE (expr
);
2288 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2290 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2294 cost
= seq_cost (seq
);
2295 if (GET_CODE (rslt
) == MEM
)
2296 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2301 /* Returns variable containing the value of candidate CAND at statement AT. */
2304 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2306 if (stmt_after_increment (loop
, cand
, stmt
))
2307 return cand
->var_after
;
2309 return cand
->var_before
;
2312 /* Determines the expression by that USE is expressed from induction variable
2313 CAND at statement AT in LOOP. */
2316 get_computation_at (struct loop
*loop
,
2317 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
2319 tree ubase
= use
->iv
->base
;
2320 tree ustep
= use
->iv
->step
;
2321 tree cbase
= cand
->iv
->base
;
2322 tree cstep
= cand
->iv
->step
;
2323 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2327 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2328 HOST_WIDE_INT ratioi
;
2330 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2332 /* We do not have a precision to express the values of use. */
2336 expr
= var_at_stmt (loop
, cand
, at
);
2338 if (TREE_TYPE (expr
) != ctype
)
2340 /* This may happen with the original ivs. */
2341 expr
= fold_convert (ctype
, expr
);
2344 if (TYPE_UNSIGNED (utype
))
2348 uutype
= unsigned_type_for (utype
);
2349 ubase
= fold_convert (uutype
, ubase
);
2350 ustep
= fold_convert (uutype
, ustep
);
2353 if (uutype
!= ctype
)
2355 expr
= fold_convert (uutype
, expr
);
2356 cbase
= fold_convert (uutype
, cbase
);
2357 cstep
= fold_convert (uutype
, cstep
);
2360 if (!cst_and_fits_in_hwi (cstep
)
2361 || !cst_and_fits_in_hwi (ustep
))
2364 ustepi
= int_cst_value (ustep
);
2365 cstepi
= int_cst_value (cstep
);
2367 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2369 /* TODO maybe consider case when ustep divides cstep and the ratio is
2370 a power of 2 (so that the division is fast to execute)? We would
2371 need to be much more careful with overflows etc. then. */
2375 /* We may need to shift the value if we are after the increment. */
2376 if (stmt_after_increment (loop
, cand
, at
))
2377 cbase
= fold (build2 (PLUS_EXPR
, uutype
, cbase
, cstep
));
2379 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2380 or |ratio| == 1, it is better to handle this like
2382 ubase - ratio * cbase + ratio * var. */
2386 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, cbase
));
2387 expr
= fold (build2 (PLUS_EXPR
, uutype
, expr
, delta
));
2389 else if (ratioi
== -1)
2391 delta
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, cbase
));
2392 expr
= fold (build2 (MINUS_EXPR
, uutype
, delta
, expr
));
2394 else if (TREE_CODE (cbase
) == INTEGER_CST
)
2396 ratio
= build_int_cst_type (uutype
, ratioi
);
2397 delta
= fold (build2 (MULT_EXPR
, uutype
, ratio
, cbase
));
2398 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, delta
));
2399 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2400 expr
= fold (build2 (PLUS_EXPR
, uutype
, delta
, expr
));
2404 expr
= fold (build2 (MINUS_EXPR
, uutype
, expr
, cbase
));
2405 ratio
= build_int_cst_type (uutype
, ratioi
);
2406 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2407 expr
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, expr
));
2410 return fold_convert (utype
, expr
);
2413 /* Determines the expression by that USE is expressed from induction variable
2417 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2419 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2422 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2425 strip_offset (tree
*expr
, unsigned HOST_WIDE_INT
*offset
)
2428 enum tree_code code
;
2432 if (cst_and_fits_in_hwi (*expr
))
2434 *offset
+= int_cst_value (*expr
);
2435 *expr
= integer_zero_node
;
2439 code
= TREE_CODE (*expr
);
2441 if (code
!= PLUS_EXPR
&& code
!= MINUS_EXPR
)
2444 op0
= TREE_OPERAND (*expr
, 0);
2445 op1
= TREE_OPERAND (*expr
, 1);
2447 if (cst_and_fits_in_hwi (op1
))
2449 if (code
== PLUS_EXPR
)
2450 *offset
+= int_cst_value (op1
);
2452 *offset
-= int_cst_value (op1
);
2458 if (code
!= PLUS_EXPR
)
2461 if (!cst_and_fits_in_hwi (op0
))
2464 *offset
+= int_cst_value (op0
);
2469 /* Returns cost of addition in MODE. */
2472 add_cost (enum machine_mode mode
)
2474 static unsigned costs
[NUM_MACHINE_MODES
];
2482 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2483 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
),
2484 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
+ 1)),
2489 cost
= seq_cost (seq
);
2495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2496 fprintf (dump_file
, "Addition in %s costs %d\n",
2497 GET_MODE_NAME (mode
), cost
);
2501 /* Entry in a hashtable of already known costs for multiplication. */
2504 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2505 enum machine_mode mode
; /* In mode. */
2506 unsigned cost
; /* The cost. */
2509 /* Counts hash value for the ENTRY. */
2512 mbc_entry_hash (const void *entry
)
2514 const struct mbc_entry
*e
= entry
;
2516 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2519 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2522 mbc_entry_eq (const void *entry1
, const void *entry2
)
2524 const struct mbc_entry
*e1
= entry1
;
2525 const struct mbc_entry
*e2
= entry2
;
2527 return (e1
->mode
== e2
->mode
2528 && e1
->cst
== e2
->cst
);
2531 /* Returns cost of multiplication by constant CST in MODE. */
2534 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
2536 static htab_t costs
;
2537 struct mbc_entry
**cached
, act
;
2542 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2546 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2548 return (*cached
)->cost
;
2550 *cached
= xmalloc (sizeof (struct mbc_entry
));
2551 (*cached
)->mode
= mode
;
2552 (*cached
)->cst
= cst
;
2555 expand_mult (mode
, gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
), GEN_INT (cst
),
2560 cost
= seq_cost (seq
);
2562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2563 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2564 (int) cst
, GET_MODE_NAME (mode
), cost
);
2566 (*cached
)->cost
= cost
;
2571 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2572 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2573 variable is omitted. The created memory accesses MODE.
2575 TODO -- there must be some better way. This all is quite crude. */
2578 get_address_cost (bool symbol_present
, bool var_present
,
2579 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
2581 #define MAX_RATIO 128
2582 static sbitmap valid_mult
;
2583 static HOST_WIDE_INT rat
, off
;
2584 static HOST_WIDE_INT min_offset
, max_offset
;
2585 static unsigned costs
[2][2][2][2];
2586 unsigned cost
, acost
;
2587 rtx seq
, addr
, base
;
2588 bool offset_p
, ratio_p
;
2590 HOST_WIDE_INT s_offset
;
2591 unsigned HOST_WIDE_INT mask
;
2598 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2600 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
2601 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2603 XEXP (addr
, 1) = GEN_INT (i
);
2604 if (!memory_address_p (Pmode
, addr
))
2607 max_offset
= i
>> 1;
2610 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2612 XEXP (addr
, 1) = GEN_INT (-i
);
2613 if (!memory_address_p (Pmode
, addr
))
2616 min_offset
= -(i
>> 1);
2618 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2620 fprintf (dump_file
, "get_address_cost:\n");
2621 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
2622 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
2625 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
2626 sbitmap_zero (valid_mult
);
2628 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2629 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2631 XEXP (addr
, 1) = GEN_INT (i
);
2632 if (memory_address_p (Pmode
, addr
))
2634 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
2639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2641 fprintf (dump_file
, " allowed multipliers:");
2642 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2643 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
2644 fprintf (dump_file
, " %d", (int) i
);
2645 fprintf (dump_file
, "\n");
2646 fprintf (dump_file
, "\n");
2650 bits
= GET_MODE_BITSIZE (Pmode
);
2651 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
2653 if ((offset
>> (bits
- 1) & 1))
2658 offset_p
= (s_offset
!= 0
2659 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
2660 ratio_p
= (ratio
!= 1
2661 && -MAX_RATIO
<= ratio
&& ratio
<= MAX_RATIO
2662 && TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
));
2664 if (ratio
!= 1 && !ratio_p
)
2665 cost
+= multiply_by_cost (ratio
, Pmode
);
2667 if (s_offset
&& !offset_p
&& !symbol_present
)
2669 cost
+= add_cost (Pmode
);
2673 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
2678 addr
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2679 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
+ 1);
2681 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, GEN_INT (rat
));
2684 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
2688 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
2690 base
= gen_rtx_fmt_e (CONST
, Pmode
,
2691 gen_rtx_fmt_ee (PLUS
, Pmode
,
2696 base
= GEN_INT (off
);
2701 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
2704 addr
= memory_address (Pmode
, addr
);
2708 acost
= seq_cost (seq
);
2709 acost
+= address_cost (addr
, Pmode
);
2713 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
2716 return cost
+ acost
;
2719 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2720 the bitmap to that we should store it. */
2722 static struct ivopts_data
*fd_ivopts_data
;
2724 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2726 bitmap
*depends_on
= data
;
2727 struct version_info
*info
;
2729 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2731 info
= name_info (fd_ivopts_data
, *expr_p
);
2733 if (!info
->inv_id
|| info
->has_nonlin_use
)
2737 *depends_on
= BITMAP_XMALLOC ();
2738 bitmap_set_bit (*depends_on
, info
->inv_id
);
2743 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2744 invariants the computation depends on. */
2747 force_var_cost (struct ivopts_data
*data
,
2748 tree expr
, bitmap
*depends_on
)
2750 static bool costs_initialized
= false;
2751 static unsigned integer_cost
;
2752 static unsigned symbol_cost
;
2753 static unsigned address_cost
;
2755 unsigned cost0
, cost1
, cost
;
2756 enum machine_mode mode
;
2758 if (!costs_initialized
)
2760 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
2761 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
2762 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
2764 tree type
= build_pointer_type (integer_type_node
);
2766 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
2769 SET_DECL_RTL (var
, x
);
2770 TREE_STATIC (var
) = 1;
2771 addr
= build1 (ADDR_EXPR
, type
, var
);
2772 symbol_cost
= computation_cost (addr
) + 1;
2775 = computation_cost (build2 (PLUS_EXPR
, type
,
2777 build_int_cst_type (type
, 2000))) + 1;
2778 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2780 fprintf (dump_file
, "force_var_cost:\n");
2781 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
2782 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
2783 fprintf (dump_file
, " address %d\n", (int) address_cost
);
2784 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
2785 fprintf (dump_file
, "\n");
2788 costs_initialized
= true;
2793 fd_ivopts_data
= data
;
2794 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
2797 if (SSA_VAR_P (expr
))
2800 if (TREE_INVARIANT (expr
))
2802 if (TREE_CODE (expr
) == INTEGER_CST
)
2803 return integer_cost
;
2805 if (TREE_CODE (expr
) == ADDR_EXPR
)
2807 tree obj
= TREE_OPERAND (expr
, 0);
2809 if (TREE_CODE (obj
) == VAR_DECL
2810 || TREE_CODE (obj
) == PARM_DECL
2811 || TREE_CODE (obj
) == RESULT_DECL
)
2815 return address_cost
;
2818 switch (TREE_CODE (expr
))
2823 op0
= TREE_OPERAND (expr
, 0);
2824 op1
= TREE_OPERAND (expr
, 1);
2826 if (is_gimple_val (op0
))
2829 cost0
= force_var_cost (data
, op0
, NULL
);
2831 if (is_gimple_val (op1
))
2834 cost1
= force_var_cost (data
, op1
, NULL
);
2839 /* Just an arbitrary value, FIXME. */
2840 return target_spill_cost
;
2843 mode
= TYPE_MODE (TREE_TYPE (expr
));
2844 switch (TREE_CODE (expr
))
2848 cost
= add_cost (mode
);
2852 if (cst_and_fits_in_hwi (op0
))
2853 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
2854 else if (cst_and_fits_in_hwi (op1
))
2855 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
2857 return target_spill_cost
;
2867 /* Bound the cost by target_spill_cost. The parts of complicated
2868 computations often are either loop invariant or at least can
2869 be shared between several iv uses, so letting this grow without
2870 limits would not give reasonable results. */
2871 return cost
< target_spill_cost
? cost
: target_spill_cost
;
2874 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2875 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2876 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2877 invariants the computation depends on. */
2880 split_address_cost (struct ivopts_data
*data
,
2881 tree addr
, bool *symbol_present
, bool *var_present
,
2882 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2885 HOST_WIDE_INT bitsize
;
2886 HOST_WIDE_INT bitpos
;
2888 enum machine_mode mode
;
2889 int unsignedp
, volatilep
;
2891 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
2892 &unsignedp
, &volatilep
, false);
2895 || bitpos
% BITS_PER_UNIT
!= 0
2896 || TREE_CODE (core
) != VAR_DECL
)
2898 *symbol_present
= false;
2899 *var_present
= true;
2900 fd_ivopts_data
= data
;
2901 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
2902 return target_spill_cost
;
2905 *offset
+= bitpos
/ BITS_PER_UNIT
;
2906 if (TREE_STATIC (core
)
2907 || DECL_EXTERNAL (core
))
2909 *symbol_present
= true;
2910 *var_present
= false;
2914 *symbol_present
= false;
2915 *var_present
= true;
2919 /* Estimates cost of expressing difference of addresses E1 - E2 as
2920 var + symbol + offset. The value of offset is added to OFFSET,
2921 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2922 part is missing. DEPENDS_ON is a set of the invariants the computation
2926 ptr_difference_cost (struct ivopts_data
*data
,
2927 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2928 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2930 HOST_WIDE_INT diff
= 0;
2933 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
2935 if (ptr_difference_const (e1
, e2
, &diff
))
2938 *symbol_present
= false;
2939 *var_present
= false;
2943 if (e2
== integer_zero_node
)
2944 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
2945 symbol_present
, var_present
, offset
, depends_on
);
2947 *symbol_present
= false;
2948 *var_present
= true;
2950 cost
= force_var_cost (data
, e1
, depends_on
);
2951 cost
+= force_var_cost (data
, e2
, depends_on
);
2952 cost
+= add_cost (Pmode
);
2957 /* Estimates cost of expressing difference E1 - E2 as
2958 var + symbol + offset. The value of offset is added to OFFSET,
2959 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2960 part is missing. DEPENDS_ON is a set of the invariants the computation
2964 difference_cost (struct ivopts_data
*data
,
2965 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2966 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2969 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
2971 strip_offset (&e1
, offset
);
2973 strip_offset (&e2
, offset
);
2976 if (TREE_CODE (e1
) == ADDR_EXPR
)
2977 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
2979 *symbol_present
= false;
2981 if (operand_equal_p (e1
, e2
, 0))
2983 *var_present
= false;
2986 *var_present
= true;
2988 return force_var_cost (data
, e1
, depends_on
);
2992 cost
= force_var_cost (data
, e2
, depends_on
);
2993 cost
+= multiply_by_cost (-1, mode
);
2998 cost
= force_var_cost (data
, e1
, depends_on
);
2999 cost
+= force_var_cost (data
, e2
, depends_on
);
3000 cost
+= add_cost (mode
);
3005 /* Determines the cost of the computation by that USE is expressed
3006 from induction variable CAND. If ADDRESS_P is true, we just need
3007 to create an address from it, otherwise we want to get it into
3008 register. A set of invariants we depend on is stored in
3009 DEPENDS_ON. AT is the statement at that the value is computed. */
3012 get_computation_cost_at (struct ivopts_data
*data
,
3013 struct iv_use
*use
, struct iv_cand
*cand
,
3014 bool address_p
, bitmap
*depends_on
, tree at
)
3016 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3018 tree utype
= TREE_TYPE (ubase
), ctype
;
3019 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3020 HOST_WIDE_INT ratio
, aratio
;
3021 bool var_present
, symbol_present
;
3022 unsigned cost
= 0, n_sums
;
3026 /* Only consider real candidates. */
3030 cbase
= cand
->iv
->base
;
3031 cstep
= cand
->iv
->step
;
3032 ctype
= TREE_TYPE (cbase
);
3034 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3036 /* We do not have a precision to express the values of use. */
3042 /* Do not try to express address of an object with computation based
3043 on address of a different object. This may cause problems in rtl
3044 level alias analysis (that does not expect this to be happening,
3045 as this is illegal in C), and would be unlikely to be useful
3047 if (use
->iv
->base_object
3048 && cand
->iv
->base_object
3049 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3053 if (!cst_and_fits_in_hwi (ustep
)
3054 || !cst_and_fits_in_hwi (cstep
))
3057 if (TREE_CODE (ubase
) == INTEGER_CST
3058 && !cst_and_fits_in_hwi (ubase
))
3061 if (TREE_CODE (cbase
) == INTEGER_CST
3062 && !cst_and_fits_in_hwi (cbase
))
3065 ustepi
= int_cst_value (ustep
);
3066 cstepi
= int_cst_value (cstep
);
3068 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3070 /* TODO -- add direct handling of this case. */
3074 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3077 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3078 or ratio == 1, it is better to handle this like
3080 ubase - ratio * cbase + ratio * var
3082 (also holds in the case ratio == -1, TODO. */
3084 if (TREE_CODE (cbase
) == INTEGER_CST
)
3086 offset
= - ratio
* int_cst_value (cbase
);
3087 cost
+= difference_cost (data
,
3088 ubase
, integer_zero_node
,
3089 &symbol_present
, &var_present
, &offset
,
3092 else if (ratio
== 1)
3094 cost
+= difference_cost (data
,
3096 &symbol_present
, &var_present
, &offset
,
3101 cost
+= force_var_cost (data
, cbase
, depends_on
);
3102 cost
+= add_cost (TYPE_MODE (ctype
));
3103 cost
+= difference_cost (data
,
3104 ubase
, integer_zero_node
,
3105 &symbol_present
, &var_present
, &offset
,
3109 /* If we are after the increment, the value of the candidate is higher by
3111 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3112 offset
-= ratio
* cstepi
;
3114 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3115 (symbol/var/const parts may be omitted). If we are looking for an address,
3116 find the cost of addressing this. */
3118 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3120 /* Otherwise estimate the costs for computing the expression. */
3121 aratio
= ratio
> 0 ? ratio
: -ratio
;
3122 if (!symbol_present
&& !var_present
&& !offset
)
3125 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3131 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3135 /* Symbol + offset should be compile-time computable. */
3136 && (symbol_present
|| offset
))
3139 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3143 /* Just get the expression, expand it and measure the cost. */
3144 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3150 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3152 return computation_cost (comp
);
3156 /* Determines the cost of the computation by that USE is expressed
3157 from induction variable CAND. If ADDRESS_P is true, we just need
3158 to create an address from it, otherwise we want to get it into
3159 register. A set of invariants we depend on is stored in
3163 get_computation_cost (struct ivopts_data
*data
,
3164 struct iv_use
*use
, struct iv_cand
*cand
,
3165 bool address_p
, bitmap
*depends_on
)
3167 return get_computation_cost_at (data
,
3168 use
, cand
, address_p
, depends_on
, use
->stmt
);
3171 /* Determines cost of basing replacement of USE on CAND in a generic
3175 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3176 struct iv_use
*use
, struct iv_cand
*cand
)
3181 /* The simple case first -- if we need to express value of the preserved
3182 original biv, the cost is 0. This also prevents us from counting the
3183 cost of increment twice -- once at this use and once in the cost of
3185 if (cand
->pos
== IP_ORIGINAL
3186 && cand
->incremented_at
== use
->stmt
)
3188 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3192 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3193 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3195 return cost
!= INFTY
;
3198 /* Determines cost of basing replacement of USE on CAND in an address. */
3201 determine_use_iv_cost_address (struct ivopts_data
*data
,
3202 struct iv_use
*use
, struct iv_cand
*cand
)
3205 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3207 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3209 return cost
!= INFTY
;
3212 /* Computes value of induction variable IV in iteration NITER. */
3215 iv_value (struct iv
*iv
, tree niter
)
3218 tree type
= TREE_TYPE (iv
->base
);
3220 niter
= fold_convert (type
, niter
);
3221 val
= fold (build2 (MULT_EXPR
, type
, iv
->step
, niter
));
3223 return fold (build2 (PLUS_EXPR
, type
, iv
->base
, val
));
3226 /* Computes value of candidate CAND at position AT in iteration NITER. */
3229 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3231 tree val
= iv_value (cand
->iv
, niter
);
3232 tree type
= TREE_TYPE (cand
->iv
->base
);
3234 if (stmt_after_increment (loop
, cand
, at
))
3235 val
= fold (build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
));
3240 /* Check whether it is possible to express the condition in USE by comparison
3241 of candidate CAND. If so, store the comparison code to COMPARE and the
3242 value compared with to BOUND. */
3245 may_eliminate_iv (struct loop
*loop
,
3246 struct iv_use
*use
, struct iv_cand
*cand
,
3247 enum tree_code
*compare
, tree
*bound
)
3251 struct tree_niter_desc niter
, new_niter
;
3252 tree wider_type
, type
, base
;
3254 /* For now works only for exits that dominate the loop latch. TODO -- extend
3255 for other conditions inside loop body. */
3256 ex_bb
= bb_for_stmt (use
->stmt
);
3257 if (use
->stmt
!= last_stmt (ex_bb
)
3258 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3260 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3263 exit
= EDGE_SUCC (ex_bb
, 0);
3264 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3265 exit
= EDGE_SUCC (ex_bb
, 1);
3266 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3269 niter
.niter
= NULL_TREE
;
3270 number_of_iterations_exit (loop
, exit
, &niter
);
3272 || !integer_nonzerop (niter
.assumptions
)
3273 || !integer_zerop (niter
.may_be_zero
))
3276 if (exit
->flags
& EDGE_TRUE_VALUE
)
3281 *bound
= cand_value_at (loop
, cand
, use
->stmt
, niter
.niter
);
3283 /* Let us check there is not some problem with overflows, by checking that
3284 the number of iterations is unchanged. */
3285 base
= cand
->iv
->base
;
3286 type
= TREE_TYPE (base
);
3287 if (stmt_after_increment (loop
, cand
, use
->stmt
))
3288 base
= fold (build2 (PLUS_EXPR
, type
, base
, cand
->iv
->step
));
3290 new_niter
.niter
= NULL_TREE
;
3291 number_of_iterations_cond (TREE_TYPE (cand
->iv
->base
), base
,
3292 cand
->iv
->step
, NE_EXPR
, *bound
, NULL_TREE
,
3294 if (!new_niter
.niter
3295 || !integer_nonzerop (new_niter
.assumptions
)
3296 || !integer_zerop (new_niter
.may_be_zero
))
3299 wider_type
= TREE_TYPE (new_niter
.niter
);
3300 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (TREE_TYPE (niter
.niter
)))
3301 wider_type
= TREE_TYPE (niter
.niter
);
3302 if (!operand_equal_p (fold_convert (wider_type
, niter
.niter
),
3303 fold_convert (wider_type
, new_niter
.niter
), 0))
3309 /* Determines cost of basing replacement of USE on CAND in a condition. */
3312 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3313 struct iv_use
*use
, struct iv_cand
*cand
)
3316 enum tree_code compare
;
3318 /* Only consider real candidates. */
3321 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3325 if (may_eliminate_iv (data
->current_loop
, use
, cand
, &compare
, &bound
))
3327 bitmap depends_on
= NULL
;
3328 unsigned cost
= force_var_cost (data
, bound
, &depends_on
);
3330 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3331 return cost
!= INFTY
;
3334 /* The induction variable elimination failed; just express the original
3335 giv. If it is compared with an invariant, note that we cannot get
3337 if (TREE_CODE (*use
->op_p
) == SSA_NAME
)
3338 record_invariant (data
, *use
->op_p
, true);
3341 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 0), true);
3342 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 1), true);
3345 return determine_use_iv_cost_generic (data
, use
, cand
);
3348 /* Checks whether it is possible to replace the final value of USE by
3349 a direct computation. If so, the formula is stored to *VALUE. */
3352 may_replace_final_value (struct loop
*loop
, struct iv_use
*use
, tree
*value
)
3355 struct tree_niter_desc
*niter
;
3357 exit
= single_dom_exit (loop
);
3361 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
3362 bb_for_stmt (use
->stmt
)));
3364 niter
= &loop_data (loop
)->niter
;
3366 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
3367 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
3370 *value
= iv_value (use
->iv
, niter
->niter
);
3375 /* Determines cost of replacing final value of USE using CAND. */
3378 determine_use_iv_cost_outer (struct ivopts_data
*data
,
3379 struct iv_use
*use
, struct iv_cand
*cand
)
3385 struct loop
*loop
= data
->current_loop
;
3387 /* The simple case first -- if we need to express value of the preserved
3388 original biv, the cost is 0. This also prevents us from counting the
3389 cost of increment twice -- once at this use and once in the cost of
3391 if (cand
->pos
== IP_ORIGINAL
3392 && cand
->incremented_at
== use
->stmt
)
3394 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3400 if (!may_replace_final_value (loop
, use
, &value
))
3402 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3407 cost
= force_var_cost (data
, value
, &depends_on
);
3409 cost
/= AVG_LOOP_NITER (loop
);
3411 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3412 return cost
!= INFTY
;
3415 exit
= single_dom_exit (loop
);
3418 /* If there is just a single exit, we may use value of the candidate
3419 after we take it to determine the value of use. */
3420 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
3421 last_stmt (exit
->src
));
3423 cost
/= AVG_LOOP_NITER (loop
);
3427 /* Otherwise we just need to compute the iv. */
3428 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3431 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3433 return cost
!= INFTY
;
3436 /* Determines cost of basing replacement of USE on CAND. Returns false
3437 if USE cannot be based on CAND. */
3440 determine_use_iv_cost (struct ivopts_data
*data
,
3441 struct iv_use
*use
, struct iv_cand
*cand
)
3445 case USE_NONLINEAR_EXPR
:
3446 return determine_use_iv_cost_generic (data
, use
, cand
);
3449 return determine_use_iv_cost_outer (data
, use
, cand
);
3452 return determine_use_iv_cost_address (data
, use
, cand
);
3455 return determine_use_iv_cost_condition (data
, use
, cand
);
3462 /* Determines costs of basing the use of the iv on an iv candidate. */
3465 determine_use_iv_costs (struct ivopts_data
*data
)
3469 struct iv_cand
*cand
;
3470 bitmap to_clear
= BITMAP_XMALLOC ();
3472 alloc_use_cost_map (data
);
3474 for (i
= 0; i
< n_iv_uses (data
); i
++)
3476 use
= iv_use (data
, i
);
3478 if (data
->consider_all_candidates
)
3480 for (j
= 0; j
< n_iv_cands (data
); j
++)
3482 cand
= iv_cand (data
, j
);
3483 determine_use_iv_cost (data
, use
, cand
);
3490 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3492 cand
= iv_cand (data
, j
);
3493 if (!determine_use_iv_cost (data
, use
, cand
))
3494 bitmap_set_bit (to_clear
, j
);
3497 /* Remove the candidates for that the cost is infinite from
3498 the list of related candidates. */
3499 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3500 bitmap_clear (to_clear
);
3504 BITMAP_XFREE (to_clear
);
3506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3508 fprintf (dump_file
, "Use-candidate costs:\n");
3510 for (i
= 0; i
< n_iv_uses (data
); i
++)
3512 use
= iv_use (data
, i
);
3514 fprintf (dump_file
, "Use %d:\n", i
);
3515 fprintf (dump_file
, " cand\tcost\tdepends on\n");
3516 for (j
= 0; j
< use
->n_map_members
; j
++)
3518 if (!use
->cost_map
[j
].cand
3519 || use
->cost_map
[j
].cost
== INFTY
)
3522 fprintf (dump_file
, " %d\t%d\t",
3523 use
->cost_map
[j
].cand
->id
,
3524 use
->cost_map
[j
].cost
);
3525 if (use
->cost_map
[j
].depends_on
)
3526 bitmap_print (dump_file
,
3527 use
->cost_map
[j
].depends_on
, "","");
3528 fprintf (dump_file
, "\n");
3531 fprintf (dump_file
, "\n");
3533 fprintf (dump_file
, "\n");
3537 /* Determines cost of the candidate CAND. */
3540 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
3542 unsigned cost_base
, cost_step
;
3551 /* There are two costs associated with the candidate -- its increment
3552 and its initialization. The second is almost negligible for any loop
3553 that rolls enough, so we take it just very little into account. */
3555 base
= cand
->iv
->base
;
3556 cost_base
= force_var_cost (data
, base
, NULL
);
3557 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
3559 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
3561 /* Prefer the original iv unless we may gain something by replacing it. */
3562 if (cand
->pos
== IP_ORIGINAL
)
3565 /* Prefer not to insert statements into latch unless there are some
3566 already (so that we do not create unnecessary jumps). */
3567 if (cand
->pos
== IP_END
3568 && empty_block_p (ip_end_pos (data
->current_loop
)))
3572 /* Determines costs of computation of the candidates. */
3575 determine_iv_costs (struct ivopts_data
*data
)
3579 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3581 fprintf (dump_file
, "Candidate costs:\n");
3582 fprintf (dump_file
, " cand\tcost\n");
3585 for (i
= 0; i
< n_iv_cands (data
); i
++)
3587 struct iv_cand
*cand
= iv_cand (data
, i
);
3589 determine_iv_cost (data
, cand
);
3591 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3592 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
3595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3596 fprintf (dump_file
, "\n");
3599 /* Calculates cost for having SIZE induction variables. */
3602 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
3604 return global_cost_for_size (size
,
3605 loop_data (data
->current_loop
)->regs_used
,
3609 /* For each size of the induction variable set determine the penalty. */
3612 determine_set_costs (struct ivopts_data
*data
)
3616 struct loop
*loop
= data
->current_loop
;
3619 /* We use the following model (definitely improvable, especially the
3620 cost function -- TODO):
3622 We estimate the number of registers available (using MD data), name it A.
3624 We estimate the number of registers used by the loop, name it U. This
3625 number is obtained as the number of loop phi nodes (not counting virtual
3626 registers and bivs) + the number of variables from outside of the loop.
3628 We set a reserve R (free regs that are used for temporary computations,
3629 etc.). For now the reserve is a constant 3.
3631 Let I be the number of induction variables.
3633 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3634 make a lot of ivs without a reason).
3635 -- if A - R < U + I <= A, the cost is I * PRES_COST
3636 -- if U + I > A, the cost is I * PRES_COST and
3637 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3641 fprintf (dump_file
, "Global costs:\n");
3642 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
3643 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
3644 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
3645 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
3649 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
3651 op
= PHI_RESULT (phi
);
3653 if (!is_gimple_reg (op
))
3656 if (get_iv (data
, op
))
3662 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
3664 struct version_info
*info
= ver_info (data
, j
);
3666 if (info
->inv_id
&& info
->has_nonlin_use
)
3670 loop_data (loop
)->regs_used
= n
;
3671 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3672 fprintf (dump_file
, " regs_used %d\n", n
);
3674 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3676 fprintf (dump_file
, " cost for size:\n");
3677 fprintf (dump_file
, " ivs\tcost\n");
3678 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
3679 fprintf (dump_file
, " %d\t%d\n", j
,
3680 ivopts_global_cost_for_size (data
, j
));
3681 fprintf (dump_file
, "\n");
3685 /* Returns true if A is a cheaper cost pair than B. */
3688 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
3696 if (a
->cost
< b
->cost
)
3699 if (a
->cost
> b
->cost
)
3702 /* In case the costs are the same, prefer the cheaper candidate. */
3703 if (a
->cand
->cost
< b
->cand
->cost
)
3709 /* Computes the cost field of IVS structure. */
3712 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
3716 cost
+= ivs
->cand_use_cost
;
3717 cost
+= ivs
->cand_cost
;
3718 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
3723 /* Set USE not to be expressed by any candidate in IVS. */
3726 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3729 unsigned uid
= use
->id
, cid
, iid
;
3731 struct cost_pair
*cp
;
3734 cp
= ivs
->cand_for_use
[uid
];
3740 ivs
->cand_for_use
[uid
] = NULL
;
3741 ivs
->n_cand_uses
[cid
]--;
3743 if (ivs
->n_cand_uses
[cid
] == 0)
3745 bitmap_clear_bit (ivs
->cands
, cid
);
3746 /* Do not count the pseudocandidates. */
3750 ivs
->cand_cost
-= cp
->cand
->cost
;
3753 ivs
->cand_use_cost
-= cp
->cost
;
3755 deps
= cp
->depends_on
;
3759 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3761 ivs
->n_invariant_uses
[iid
]--;
3762 if (ivs
->n_invariant_uses
[iid
] == 0)
3767 iv_ca_recount_cost (data
, ivs
);
3770 /* Set cost pair for USE in set IVS to CP. */
3773 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3774 struct iv_use
*use
, struct cost_pair
*cp
)
3776 unsigned uid
= use
->id
, cid
, iid
;
3780 if (ivs
->cand_for_use
[uid
] == cp
)
3783 if (ivs
->cand_for_use
[uid
])
3784 iv_ca_set_no_cp (data
, ivs
, use
);
3791 ivs
->cand_for_use
[uid
] = cp
;
3792 ivs
->n_cand_uses
[cid
]++;
3793 if (ivs
->n_cand_uses
[cid
] == 1)
3795 bitmap_set_bit (ivs
->cands
, cid
);
3796 /* Do not count the pseudocandidates. */
3800 ivs
->cand_cost
+= cp
->cand
->cost
;
3803 ivs
->cand_use_cost
+= cp
->cost
;
3805 deps
= cp
->depends_on
;
3809 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3811 ivs
->n_invariant_uses
[iid
]++;
3812 if (ivs
->n_invariant_uses
[iid
] == 1)
3817 iv_ca_recount_cost (data
, ivs
);
3821 /* Extend set IVS by expressing USE by some of the candidates in it
3825 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3828 struct cost_pair
*best_cp
= NULL
, *cp
;
3832 gcc_assert (ivs
->upto
>= use
->id
);
3834 if (ivs
->upto
== use
->id
)
3840 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
3842 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
3844 if (cheaper_cost_pair (cp
, best_cp
))
3848 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
3851 /* Get cost for assignment IVS. */
3854 iv_ca_cost (struct iv_ca
*ivs
)
3856 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
3859 /* Returns true if all dependences of CP are among invariants in IVS. */
3862 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
3867 if (!cp
->depends_on
)
3870 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
3872 if (ivs
->n_invariant_uses
[i
] == 0)
3879 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3880 it before NEXT_CHANGE. */
3882 static struct iv_ca_delta
*
3883 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
3884 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
3886 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
3889 change
->old_cp
= old_cp
;
3890 change
->new_cp
= new_cp
;
3891 change
->next_change
= next_change
;
3896 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3899 static struct iv_ca_delta
*
3900 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
3902 struct iv_ca_delta
*last
;
3910 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
3912 last
->next_change
= l2
;
3917 /* Returns candidate by that USE is expressed in IVS. */
3919 static struct cost_pair
*
3920 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
3922 return ivs
->cand_for_use
[use
->id
];
3925 /* Reverse the list of changes DELTA, forming the inverse to it. */
3927 static struct iv_ca_delta
*
3928 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
3930 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
3931 struct cost_pair
*tmp
;
3933 for (act
= delta
; act
; act
= next
)
3935 next
= act
->next_change
;
3936 act
->next_change
= prev
;
3940 act
->old_cp
= act
->new_cp
;
3947 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3948 reverted instead. */
3951 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3952 struct iv_ca_delta
*delta
, bool forward
)
3954 struct cost_pair
*from
, *to
;
3955 struct iv_ca_delta
*act
;
3958 delta
= iv_ca_delta_reverse (delta
);
3960 for (act
= delta
; act
; act
= act
->next_change
)
3964 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
3965 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
3969 iv_ca_delta_reverse (delta
);
3972 /* Returns true if CAND is used in IVS. */
3975 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
3977 return ivs
->n_cand_uses
[cand
->id
] > 0;
3980 /* Returns number of induction variable candidates in the set IVS. */
3983 iv_ca_n_cands (struct iv_ca
*ivs
)
3985 return ivs
->n_cands
;
3988 /* Free the list of changes DELTA. */
3991 iv_ca_delta_free (struct iv_ca_delta
**delta
)
3993 struct iv_ca_delta
*act
, *next
;
3995 for (act
= *delta
; act
; act
= next
)
3997 next
= act
->next_change
;
4004 /* Allocates new iv candidates assignment. */
4006 static struct iv_ca
*
4007 iv_ca_new (struct ivopts_data
*data
)
4009 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
4013 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
4014 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
4015 nw
->cands
= BITMAP_XMALLOC ();
4018 nw
->cand_use_cost
= 0;
4020 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
4026 /* Free memory occupied by the set IVS. */
4029 iv_ca_free (struct iv_ca
**ivs
)
4031 free ((*ivs
)->cand_for_use
);
4032 free ((*ivs
)->n_cand_uses
);
4033 BITMAP_XFREE ((*ivs
)->cands
);
4034 free ((*ivs
)->n_invariant_uses
);
4039 /* Dumps IVS to FILE. */
4042 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4044 const char *pref
= " invariants ";
4047 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4048 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4050 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4051 if (ivs
->n_invariant_uses
[i
])
4053 fprintf (file
, "%s%d", pref
, i
);
4056 fprintf (file
, "\n");
4059 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4060 new set, and store differences in DELTA. Number of induction variables
4061 in the new set is stored to N_IVS. */
4064 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4065 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4070 struct cost_pair
*old_cp
, *new_cp
;
4073 for (i
= 0; i
< ivs
->upto
; i
++)
4075 use
= iv_use (data
, i
);
4076 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4079 && old_cp
->cand
== cand
)
4082 new_cp
= get_use_iv_cost (data
, use
, cand
);
4086 if (!iv_ca_has_deps (ivs
, new_cp
))
4089 if (!cheaper_cost_pair (new_cp
, old_cp
))
4092 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4095 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4096 cost
= iv_ca_cost (ivs
);
4098 *n_ivs
= iv_ca_n_cands (ivs
);
4099 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4104 /* Try narrowing set IVS by removing CAND. Return the cost of
4105 the new set and store the differences in DELTA. */
4108 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4109 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4113 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4115 struct iv_cand
*cnd
;
4119 for (i
= 0; i
< n_iv_uses (data
); i
++)
4121 use
= iv_use (data
, i
);
4123 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4124 if (old_cp
->cand
!= cand
)
4129 if (data
->consider_all_candidates
)
4131 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4136 cnd
= iv_cand (data
, ci
);
4138 cp
= get_use_iv_cost (data
, use
, cnd
);
4141 if (!iv_ca_has_deps (ivs
, cp
))
4144 if (!cheaper_cost_pair (cp
, new_cp
))
4152 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4157 cnd
= iv_cand (data
, ci
);
4159 cp
= get_use_iv_cost (data
, use
, cnd
);
4162 if (!iv_ca_has_deps (ivs
, cp
))
4165 if (!cheaper_cost_pair (cp
, new_cp
))
4174 iv_ca_delta_free (delta
);
4178 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4181 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4182 cost
= iv_ca_cost (ivs
);
4183 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4188 /* Try optimizing the set of candidates IVS by removing candidates different
4189 from to EXCEPT_CAND from it. Return cost of the new set, and store
4190 differences in DELTA. */
4193 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4194 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4197 struct iv_ca_delta
*act_delta
, *best_delta
;
4198 unsigned i
, best_cost
, acost
;
4199 struct iv_cand
*cand
;
4202 best_cost
= iv_ca_cost (ivs
);
4204 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4206 cand
= iv_cand (data
, i
);
4208 if (cand
== except_cand
)
4211 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4213 if (acost
< best_cost
)
4216 iv_ca_delta_free (&best_delta
);
4217 best_delta
= act_delta
;
4220 iv_ca_delta_free (&act_delta
);
4229 /* Recurse to possibly remove other unnecessary ivs. */
4230 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4231 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4232 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4233 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4237 /* Tries to extend the sets IVS in the best possible way in order
4238 to express the USE. */
4241 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4244 unsigned best_cost
, act_cost
;
4247 struct iv_cand
*cand
;
4248 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4249 struct cost_pair
*cp
;
4251 iv_ca_add_use (data
, ivs
, use
);
4252 best_cost
= iv_ca_cost (ivs
);
4254 cp
= iv_ca_cand_for_use (ivs
, use
);
4257 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4258 iv_ca_set_no_cp (data
, ivs
, use
);
4261 /* First try important candidates. Only if it fails, try the specific ones.
4262 Rationale -- in loops with many variables the best choice often is to use
4263 just one generic biv. If we added here many ivs specific to the uses,
4264 the optimization algorithm later would be likely to get stuck in a local
4265 minimum, thus causing us to create too many ivs. The approach from
4266 few ivs to more seems more likely to be successful -- starting from few
4267 ivs, replacing an expensive use by a specific iv should always be a
4269 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4271 cand
= iv_cand (data
, i
);
4273 if (iv_ca_cand_used_p (ivs
, cand
))
4276 cp
= get_use_iv_cost (data
, use
, cand
);
4280 iv_ca_set_cp (data
, ivs
, use
, cp
);
4281 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4282 iv_ca_set_no_cp (data
, ivs
, use
);
4283 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4285 if (act_cost
< best_cost
)
4287 best_cost
= act_cost
;
4289 iv_ca_delta_free (&best_delta
);
4290 best_delta
= act_delta
;
4293 iv_ca_delta_free (&act_delta
);
4296 if (best_cost
== INFTY
)
4298 for (i
= 0; i
< use
->n_map_members
; i
++)
4300 cp
= use
->cost_map
+ i
;
4305 /* Already tried this. */
4306 if (cand
->important
)
4309 if (iv_ca_cand_used_p (ivs
, cand
))
4313 iv_ca_set_cp (data
, ivs
, use
, cp
);
4314 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4315 iv_ca_set_no_cp (data
, ivs
, use
);
4316 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4319 if (act_cost
< best_cost
)
4321 best_cost
= act_cost
;
4324 iv_ca_delta_free (&best_delta
);
4325 best_delta
= act_delta
;
4328 iv_ca_delta_free (&act_delta
);
4332 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4333 iv_ca_delta_free (&best_delta
);
4335 return (best_cost
!= INFTY
);
4338 /* Finds an initial assignment of candidates to uses. */
4340 static struct iv_ca
*
4341 get_initial_solution (struct ivopts_data
*data
)
4343 struct iv_ca
*ivs
= iv_ca_new (data
);
4346 for (i
= 0; i
< n_iv_uses (data
); i
++)
4347 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4356 /* Tries to improve set of induction variables IVS. */
4359 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4361 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4362 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4363 struct iv_cand
*cand
;
4365 /* Try extending the set of induction variables by one. */
4366 for (i
= 0; i
< n_iv_cands (data
); i
++)
4368 cand
= iv_cand (data
, i
);
4370 if (iv_ca_cand_used_p (ivs
, cand
))
4373 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4377 /* If we successfully added the candidate and the set is small enough,
4378 try optimizing it by removing other candidates. */
4379 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4381 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4382 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4383 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4384 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4387 if (acost
< best_cost
)
4390 iv_ca_delta_free (&best_delta
);
4391 best_delta
= act_delta
;
4394 iv_ca_delta_free (&act_delta
);
4399 /* Try removing the candidates from the set instead. */
4400 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4402 /* Nothing more we can do. */
4407 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4408 gcc_assert (best_cost
== iv_ca_cost (ivs
));
4409 iv_ca_delta_free (&best_delta
);
4413 /* Attempts to find the optimal set of induction variables. We do simple
4414 greedy heuristic -- we try to replace at most one candidate in the selected
4415 solution and remove the unused ivs while this improves the cost. */
4417 static struct iv_ca
*
4418 find_optimal_iv_set (struct ivopts_data
*data
)
4424 /* Get the initial solution. */
4425 set
= get_initial_solution (data
);
4428 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4429 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4433 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4435 fprintf (dump_file
, "Initial set of candidates:\n");
4436 iv_ca_dump (data
, dump_file
, set
);
4439 while (try_improve_iv_set (data
, set
))
4441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4443 fprintf (dump_file
, "Improved to:\n");
4444 iv_ca_dump (data
, dump_file
, set
);
4448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4449 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
4451 for (i
= 0; i
< n_iv_uses (data
); i
++)
4453 use
= iv_use (data
, i
);
4454 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4460 /* Creates a new induction variable corresponding to CAND. */
4463 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4465 block_stmt_iterator incr_pos
;
4475 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
4479 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
4484 /* Mark that the iv is preserved. */
4485 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4486 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4488 /* Rewrite the increment so that it uses var_before directly. */
4489 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
4494 gimple_add_tmp_var (cand
->var_before
);
4495 add_referenced_tmp_var (cand
->var_before
);
4497 base
= unshare_expr (cand
->iv
->base
);
4499 create_iv (base
, cand
->iv
->step
, cand
->var_before
, data
->current_loop
,
4500 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
4503 /* Creates new induction variables described in SET. */
4506 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
4509 struct iv_cand
*cand
;
4512 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
4514 cand
= iv_cand (data
, i
);
4515 create_new_iv (data
, cand
);
4519 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4520 is true, remove also the ssa name defined by the statement. */
4523 remove_statement (tree stmt
, bool including_defined_name
)
4525 if (TREE_CODE (stmt
) == PHI_NODE
)
4527 if (!including_defined_name
)
4529 /* Prevent the ssa name defined by the statement from being removed. */
4530 SET_PHI_RESULT (stmt
, NULL
);
4532 remove_phi_node (stmt
, NULL_TREE
, bb_for_stmt (stmt
));
4536 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
4542 /* Rewrites USE (definition of iv used in a nonlinear expression)
4543 using candidate CAND. */
4546 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
4547 struct iv_use
*use
, struct iv_cand
*cand
)
4549 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4551 tree op
, stmts
, tgt
, ass
;
4552 block_stmt_iterator bsi
, pbsi
;
4554 switch (TREE_CODE (use
->stmt
))
4557 tgt
= PHI_RESULT (use
->stmt
);
4559 /* If we should keep the biv, do not replace it. */
4560 if (name_info (data
, tgt
)->preserve_biv
)
4563 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
4564 while (!bsi_end_p (pbsi
)
4565 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
4573 tgt
= TREE_OPERAND (use
->stmt
, 0);
4574 bsi
= bsi_for_stmt (use
->stmt
);
4581 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
4583 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
4586 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4587 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
4588 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
4589 remove_statement (use
->stmt
, false);
4590 SSA_NAME_DEF_STMT (tgt
) = ass
;
4595 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4596 TREE_OPERAND (use
->stmt
, 1) = op
;
4600 /* Replaces ssa name in index IDX by its basic variable. Callback for
4604 idx_remove_ssa_names (tree base
, tree
*idx
,
4605 void *data ATTRIBUTE_UNUSED
)
4609 if (TREE_CODE (*idx
) == SSA_NAME
)
4610 *idx
= SSA_NAME_VAR (*idx
);
4612 if (TREE_CODE (base
) == ARRAY_REF
)
4614 op
= &TREE_OPERAND (base
, 2);
4616 && TREE_CODE (*op
) == SSA_NAME
)
4617 *op
= SSA_NAME_VAR (*op
);
4618 op
= &TREE_OPERAND (base
, 3);
4620 && TREE_CODE (*op
) == SSA_NAME
)
4621 *op
= SSA_NAME_VAR (*op
);
4627 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4630 unshare_and_remove_ssa_names (tree ref
)
4632 ref
= unshare_expr (ref
);
4633 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
4638 /* Rewrites base of memory access OP with expression WITH in statement
4639 pointed to by BSI. */
4642 rewrite_address_base (block_stmt_iterator
*bsi
, tree
*op
, tree with
)
4644 tree bvar
, var
, new_var
, new_name
, copy
, name
;
4647 var
= bvar
= get_base_address (*op
);
4649 if (!var
|| TREE_CODE (with
) != SSA_NAME
)
4652 gcc_assert (TREE_CODE (var
) != ALIGN_INDIRECT_REF
);
4653 gcc_assert (TREE_CODE (var
) != MISALIGNED_INDIRECT_REF
);
4654 if (TREE_CODE (var
) == INDIRECT_REF
)
4655 var
= TREE_OPERAND (var
, 0);
4656 if (TREE_CODE (var
) == SSA_NAME
)
4659 var
= SSA_NAME_VAR (var
);
4661 else if (DECL_P (var
))
4666 if (var_ann (var
)->type_mem_tag
)
4667 var
= var_ann (var
)->type_mem_tag
;
4669 /* We need to add a memory tag for the variable. But we do not want
4670 to add it to the temporary used for the computations, since this leads
4671 to problems in redundancy elimination when there are common parts
4672 in two computations referring to the different arrays. So we copy
4673 the variable to a new temporary. */
4674 copy
= build2 (MODIFY_EXPR
, void_type_node
, NULL_TREE
, with
);
4676 new_name
= duplicate_ssa_name (name
, copy
);
4679 new_var
= create_tmp_var (TREE_TYPE (with
), "ruatmp");
4680 add_referenced_tmp_var (new_var
);
4681 var_ann (new_var
)->type_mem_tag
= var
;
4682 new_name
= make_ssa_name (new_var
, copy
);
4684 TREE_OPERAND (copy
, 0) = new_name
;
4685 bsi_insert_before (bsi
, copy
, BSI_SAME_STMT
);
4691 gcc_assert (TREE_CODE (*op
) != ALIGN_INDIRECT_REF
);
4692 gcc_assert (TREE_CODE (*op
) != MISALIGNED_INDIRECT_REF
);
4694 if (TREE_CODE (*op
) == INDIRECT_REF
)
4695 orig
= REF_ORIGINAL (*op
);
4697 orig
= unshare_and_remove_ssa_names (*op
);
4699 *op
= build1 (INDIRECT_REF
, TREE_TYPE (*op
), with
);
4701 /* Record the original reference, for purposes of alias analysis. */
4702 REF_ORIGINAL (*op
) = orig
;
4705 /* Rewrites USE (address that is an iv) using candidate CAND. */
4708 rewrite_use_address (struct ivopts_data
*data
,
4709 struct iv_use
*use
, struct iv_cand
*cand
)
4711 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4713 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4715 tree op
= force_gimple_operand (comp
, &stmts
, true, NULL_TREE
);
4718 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4720 rewrite_address_base (&bsi
, use
->op_p
, op
);
4723 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4727 rewrite_use_compare (struct ivopts_data
*data
,
4728 struct iv_use
*use
, struct iv_cand
*cand
)
4731 tree
*op_p
, cond
, op
, stmts
, bound
;
4732 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4733 enum tree_code compare
;
4735 if (may_eliminate_iv (data
->current_loop
,
4736 use
, cand
, &compare
, &bound
))
4738 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
4742 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4744 *use
->op_p
= build2 (compare
, boolean_type_node
,
4745 var_at_stmt (data
->current_loop
,
4746 cand
, use
->stmt
), op
);
4747 modify_stmt (use
->stmt
);
4751 /* The induction variable elimination failed; just express the original
4753 comp
= unshare_expr (get_computation (data
->current_loop
, use
, cand
));
4756 op_p
= &TREE_OPERAND (cond
, 0);
4757 if (TREE_CODE (*op_p
) != SSA_NAME
4758 || zero_p (get_iv (data
, *op_p
)->step
))
4759 op_p
= &TREE_OPERAND (cond
, 1);
4761 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
4763 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4768 /* Ensure that operand *OP_P may be used at the end of EXIT without
4769 violating loop closed ssa form. */
4772 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
4775 struct loop
*def_loop
;
4778 use
= USE_FROM_PTR (op_p
);
4779 if (TREE_CODE (use
) != SSA_NAME
)
4782 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
4786 def_loop
= def_bb
->loop_father
;
4787 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
4790 /* Try finding a phi node that copies the value out of the loop. */
4791 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4792 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
4797 /* Create such a phi node. */
4798 tree new_name
= duplicate_ssa_name (use
, NULL
);
4800 phi
= create_phi_node (new_name
, exit
->dest
);
4801 SSA_NAME_DEF_STMT (new_name
) = phi
;
4802 add_phi_arg (phi
, use
, exit
);
4805 SET_USE (op_p
, PHI_RESULT (phi
));
4808 /* Ensure that operands of STMT may be used at the end of EXIT without
4809 violating loop closed ssa form. */
4812 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
4816 v_may_def_optype v_may_defs
;
4819 get_stmt_operands (stmt
);
4821 uses
= STMT_USE_OPS (stmt
);
4822 for (i
= 0; i
< NUM_USES (uses
); i
++)
4823 protect_loop_closed_ssa_form_use (exit
, USE_OP_PTR (uses
, i
));
4825 vuses
= STMT_VUSE_OPS (stmt
);
4826 for (i
= 0; i
< NUM_VUSES (vuses
); i
++)
4827 protect_loop_closed_ssa_form_use (exit
, VUSE_OP_PTR (vuses
, i
));
4829 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
4830 for (i
= 0; i
< NUM_V_MAY_DEFS (v_may_defs
); i
++)
4831 protect_loop_closed_ssa_form_use (exit
, V_MAY_DEF_OP_PTR (v_may_defs
, i
));
4834 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4835 so that they are emitted on the correct place, and so that the loop closed
4836 ssa form is preserved. */
4839 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
4841 tree_stmt_iterator tsi
;
4842 block_stmt_iterator bsi
;
4843 tree phi
, stmt
, def
, next
;
4845 if (EDGE_COUNT (exit
->dest
->preds
) > 1)
4846 split_loop_exit_edge (exit
);
4848 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
4850 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
4851 protect_loop_closed_ssa_form (exit
, tsi_stmt (tsi
));
4854 protect_loop_closed_ssa_form (exit
, stmts
);
4856 /* Ensure there is label in exit->dest, so that we can
4858 tree_block_label (exit
->dest
);
4859 bsi
= bsi_after_labels (exit
->dest
);
4860 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4865 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
4867 next
= PHI_CHAIN (phi
);
4869 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
4871 def
= PHI_RESULT (phi
);
4872 remove_statement (phi
, false);
4873 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
4875 SSA_NAME_DEF_STMT (def
) = stmt
;
4876 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
4881 /* Rewrites the final value of USE (that is only needed outside of the loop)
4882 using candidate CAND. */
4885 rewrite_use_outer (struct ivopts_data
*data
,
4886 struct iv_use
*use
, struct iv_cand
*cand
)
4889 tree value
, op
, stmts
, tgt
;
4892 switch (TREE_CODE (use
->stmt
))
4895 tgt
= PHI_RESULT (use
->stmt
);
4898 tgt
= TREE_OPERAND (use
->stmt
, 0);
4904 exit
= single_dom_exit (data
->current_loop
);
4910 bool ok
= may_replace_final_value (data
->current_loop
, use
, &value
);
4914 value
= get_computation_at (data
->current_loop
,
4915 use
, cand
, last_stmt (exit
->src
));
4917 value
= unshare_expr (value
);
4918 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
4920 /* If we will preserve the iv anyway and we would need to perform
4921 some computation to replace the final value, do nothing. */
4922 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
4925 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4927 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
4929 if (USE_FROM_PTR (use_p
) == tgt
)
4930 SET_USE (use_p
, op
);
4934 compute_phi_arg_on_exit (exit
, stmts
, op
);
4936 /* Enable removal of the statement. We cannot remove it directly,
4937 since we may still need the aliasing information attached to the
4938 ssa name defined by it. */
4939 name_info (data
, tgt
)->iv
->have_use_for
= false;
4943 /* If the variable is going to be preserved anyway, there is nothing to
4945 if (name_info (data
, tgt
)->preserve_biv
)
4948 /* Otherwise we just need to compute the iv. */
4949 rewrite_use_nonlinear_expr (data
, use
, cand
);
4952 /* Rewrites USE using candidate CAND. */
4955 rewrite_use (struct ivopts_data
*data
,
4956 struct iv_use
*use
, struct iv_cand
*cand
)
4960 case USE_NONLINEAR_EXPR
:
4961 rewrite_use_nonlinear_expr (data
, use
, cand
);
4965 rewrite_use_outer (data
, use
, cand
);
4969 rewrite_use_address (data
, use
, cand
);
4973 rewrite_use_compare (data
, use
, cand
);
4979 modify_stmt (use
->stmt
);
4982 /* Rewrite the uses using the selected induction variables. */
4985 rewrite_uses (struct ivopts_data
*data
)
4988 struct iv_cand
*cand
;
4991 for (i
= 0; i
< n_iv_uses (data
); i
++)
4993 use
= iv_use (data
, i
);
4994 cand
= use
->selected
;
4997 rewrite_use (data
, use
, cand
);
5001 /* Removes the ivs that are not used after rewriting. */
5004 remove_unused_ivs (struct ivopts_data
*data
)
5009 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5011 struct version_info
*info
;
5013 info
= ver_info (data
, j
);
5015 && !zero_p (info
->iv
->step
)
5017 && !info
->iv
->have_use_for
5018 && !info
->preserve_biv
)
5019 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5023 /* Frees data allocated by the optimization of a single loop. */
5026 free_loop_data (struct ivopts_data
*data
)
5031 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5033 struct version_info
*info
;
5035 info
= ver_info (data
, i
);
5039 info
->has_nonlin_use
= false;
5040 info
->preserve_biv
= false;
5043 bitmap_clear (data
->relevant
);
5044 bitmap_clear (data
->important_candidates
);
5046 for (i
= 0; i
< n_iv_uses (data
); i
++)
5048 struct iv_use
*use
= iv_use (data
, i
);
5051 BITMAP_XFREE (use
->related_cands
);
5052 for (j
= 0; j
< use
->n_map_members
; j
++)
5053 if (use
->cost_map
[j
].depends_on
)
5054 BITMAP_XFREE (use
->cost_map
[j
].depends_on
);
5055 free (use
->cost_map
);
5058 VARRAY_POP_ALL (data
->iv_uses
);
5060 for (i
= 0; i
< n_iv_cands (data
); i
++)
5062 struct iv_cand
*cand
= iv_cand (data
, i
);
5068 VARRAY_POP_ALL (data
->iv_candidates
);
5070 if (data
->version_info_size
< num_ssa_names
)
5072 data
->version_info_size
= 2 * num_ssa_names
;
5073 free (data
->version_info
);
5074 data
->version_info
= xcalloc (data
->version_info_size
,
5075 sizeof (struct version_info
));
5078 data
->max_inv_id
= 0;
5080 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (decl_rtl_to_reset
); i
++)
5082 tree obj
= VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset
, i
);
5084 SET_DECL_RTL (obj
, NULL_RTX
);
5086 VARRAY_POP_ALL (decl_rtl_to_reset
);
5089 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5093 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5097 for (i
= 1; i
< loops
->num
; i
++)
5098 if (loops
->parray
[i
])
5100 free (loops
->parray
[i
]->aux
);
5101 loops
->parray
[i
]->aux
= NULL
;
5104 free_loop_data (data
);
5105 free (data
->version_info
);
5106 BITMAP_XFREE (data
->relevant
);
5107 BITMAP_XFREE (data
->important_candidates
);
5109 VARRAY_FREE (decl_rtl_to_reset
);
5110 VARRAY_FREE (data
->iv_uses
);
5111 VARRAY_FREE (data
->iv_candidates
);
5114 /* Optimizes the LOOP. Returns true if anything changed. */
5117 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5119 bool changed
= false;
5120 struct iv_ca
*iv_ca
;
5123 data
->current_loop
= loop
;
5125 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5127 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5129 exit
= single_dom_exit (loop
);
5132 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5133 exit
->src
->index
, exit
->dest
->index
);
5134 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5135 fprintf (dump_file
, "\n");
5138 fprintf (dump_file
, "\n");
5141 /* For each ssa name determines whether it behaves as an induction variable
5143 if (!find_induction_variables (data
))
5146 /* Finds interesting uses (item 1). */
5147 find_interesting_uses (data
);
5148 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5151 /* Finds candidates for the induction variables (item 2). */
5152 find_iv_candidates (data
);
5154 /* Calculates the costs (item 3, part 1). */
5155 determine_use_iv_costs (data
);
5156 determine_iv_costs (data
);
5157 determine_set_costs (data
);
5159 /* Find the optimal set of induction variables (item 3, part 2). */
5160 iv_ca
= find_optimal_iv_set (data
);
5165 /* Create the new induction variables (item 4, part 1). */
5166 create_new_ivs (data
, iv_ca
);
5167 iv_ca_free (&iv_ca
);
5169 /* Rewrite the uses (item 4, part 2). */
5170 rewrite_uses (data
);
5172 /* Remove the ivs that are unused after rewriting. */
5173 remove_unused_ivs (data
);
5175 loop_commit_inserts ();
5177 /* We have changed the structure of induction variables; it might happen
5178 that definitions in the scev database refer to some of them that were
5183 free_loop_data (data
);
5188 /* Main entry point. Optimizes induction variables in LOOPS. */
5191 tree_ssa_iv_optimize (struct loops
*loops
)
5194 struct ivopts_data data
;
5196 tree_ssa_iv_optimize_init (loops
, &data
);
5198 /* Optimize the loops starting with the innermost ones. */
5199 loop
= loops
->tree_root
;
5203 #ifdef ENABLE_CHECKING
5204 verify_loop_closed_ssa ();
5208 /* Scan the loops, inner ones first. */
5209 while (loop
!= loops
->tree_root
)
5211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5212 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5214 tree_ssa_iv_optimize_loop (&data
, loop
);
5226 #ifdef ENABLE_CHECKING
5227 verify_loop_closed_ssa ();
5231 tree_ssa_iv_optimize_finalize (loops
, &data
);