1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
103 tree base
; /* Initial value of the iv. */
104 tree base_object
; /* A memory object to that the induction variable points. */
105 tree step
; /* Step of the iv (constant only). */
106 tree ssa_name
; /* The ssa name with the value. */
107 bool biv_p
; /* Is it a biv? */
108 bool have_use_for
; /* Do we already have a use for it? */
109 unsigned use_id
; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
115 tree name
; /* The ssa name. */
116 struct iv
*iv
; /* Induction variable description. */
117 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id
; /* Id of an invariant. */
120 bool preserve_biv
; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
126 struct tree_niter_desc niter
;
127 /* Number of iterations. */
129 unsigned regs_used
; /* Number of registers used. */
135 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
136 USE_OUTER
, /* The induction variable is used outside the loop. */
137 USE_ADDRESS
, /* Use in an address. */
138 USE_COMPARE
/* Use is a compare. */
141 /* The candidate - cost pair. */
144 struct iv_cand
*cand
; /* The candidate. */
145 unsigned cost
; /* The cost. */
146 bitmap depends_on
; /* The list of invariants that have to be
153 unsigned id
; /* The id of the use. */
154 enum use_type type
; /* Type of the use. */
155 struct iv
*iv
; /* The induction variable it is based on. */
156 tree stmt
; /* Statement in that it occurs. */
157 tree
*op_p
; /* The place where it occurs. */
158 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
161 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
162 struct cost_pair
*cost_map
;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand
*selected
;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
172 IP_NORMAL
, /* At the end, just before the exit condition. */
173 IP_END
, /* At the end of the latch block. */
174 IP_ORIGINAL
/* The original biv. */
177 /* The induction variable candidate. */
180 unsigned id
; /* The number of the candidate. */
181 bool important
; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos
; /* Where it is computed. */
184 tree incremented_at
; /* For original biv, the statement where it is
186 tree var_before
; /* The variable used for it before increment. */
187 tree var_after
; /* The variable used for it after increment. */
188 struct iv
*iv
; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost
; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
199 /* The currently optimized loop. */
200 struct loop
*current_loop
;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size
;
205 /* The array of information for the ssa names. */
206 struct version_info
*version_info
;
208 /* The bitmap of indices in version_info whose value was changed. */
211 /* The maximum invariant id. */
214 /* The uses of induction variables. */
217 /* The candidates. */
218 varray_type iv_candidates
;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates
;
223 /* Whether to consider just related and important candidates when replacing a
225 bool consider_all_candidates
;
228 /* An assignment of iv candidates to uses. */
232 /* The number of uses covered by the assignment. */
235 /* Number of uses that cannot be expressed by the candidates in the set. */
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair
**cand_for_use
;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses
;
244 /* The candidates used. */
247 /* The number of candidates in the set. */
250 /* Total number of registers needed. */
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost
;
256 /* Total cost of candidates. */
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses
;
262 /* Total cost of the assignment. */
266 /* Difference of two iv candidate assignments. */
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair
*old_cp
;
276 /* A new assignment. */
277 struct cost_pair
*new_cp
;
279 /* Next change in the list. */
280 struct iv_ca_delta
*next_change
;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
303 static varray_type decl_rtl_to_reset
;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data
*data
)
310 return VARRAY_ACTIVE_SIZE (data
->iv_uses
);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use
*
316 iv_use (struct ivopts_data
*data
, unsigned i
)
318 return VARRAY_GENERIC_PTR_NOGC (data
->iv_uses
, i
);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data
*data
)
326 return VARRAY_ACTIVE_SIZE (data
->iv_candidates
);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand
*
332 iv_cand (struct ivopts_data
*data
, unsigned i
)
334 return VARRAY_GENERIC_PTR_NOGC (data
->iv_candidates
, i
);
337 /* The data for LOOP. */
339 static inline struct loop_data
*
340 loop_data (struct loop
*loop
)
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 single_dom_exit (struct loop
*loop
)
350 edge exit
= loop
->single_exit
;
355 if (!just_once_each_iteration_p (loop
, exit
->src
))
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv
*);
365 dump_iv (FILE *file
, struct iv
*iv
)
369 fprintf (file
, "ssa name ");
370 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
371 fprintf (file
, "\n");
374 fprintf (file
, " type ");
375 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
376 fprintf (file
, "\n");
380 fprintf (file
, " base ");
381 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
382 fprintf (file
, "\n");
384 fprintf (file
, " step ");
385 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
386 fprintf (file
, "\n");
390 fprintf (file
, " invariant ");
391 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
392 fprintf (file
, "\n");
397 fprintf (file
, " base object ");
398 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
399 fprintf (file
, "\n");
403 fprintf (file
, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use
*);
410 dump_use (FILE *file
, struct iv_use
*use
)
412 fprintf (file
, "use %d\n", use
->id
);
416 case USE_NONLINEAR_EXPR
:
417 fprintf (file
, " generic\n");
421 fprintf (file
, " outside\n");
425 fprintf (file
, " address\n");
429 fprintf (file
, " compare\n");
436 fprintf (file
, " in statement ");
437 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
438 fprintf (file
, "\n");
440 fprintf (file
, " at position ");
442 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
443 fprintf (file
, "\n");
445 dump_iv (file
, use
->iv
);
447 if (use
->related_cands
)
449 fprintf (file
, " related candidates ");
450 dump_bitmap (file
, use
->related_cands
);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data
*);
458 dump_uses (FILE *file
, struct ivopts_data
*data
)
463 for (i
= 0; i
< n_iv_uses (data
); i
++)
465 use
= iv_use (data
, i
);
467 dump_use (file
, use
);
468 fprintf (file
, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand
*);
476 dump_cand (FILE *file
, struct iv_cand
*cand
)
478 struct iv
*iv
= cand
->iv
;
480 fprintf (file
, "candidate %d%s\n",
481 cand
->id
, cand
->important
? " (important)" : "");
485 fprintf (file
, " final value replacement\n");
492 fprintf (file
, " incremented before exit test\n");
496 fprintf (file
, " incremented at end\n");
500 fprintf (file
, " original biv\n");
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info
*
510 ver_info (struct ivopts_data
*data
, unsigned ver
)
512 return data
->version_info
+ ver
;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info
*
518 name_info (struct ivopts_data
*data
, tree name
)
520 return ver_info (data
, SSA_NAME_VERSION (name
));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
527 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
530 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
531 unsigned HOST_WIDE_INT inv
, ex
, val
;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a
& 1) && !(b
& 1))
548 /* If b is still even, a is odd and there is no such x. */
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
556 for (i
= 0; i
< bits
- 1; i
++)
558 inv
= (inv
* ex
) & mask
;
559 ex
= (ex
* ex
) & mask
;
562 val
= (a
* inv
) & mask
;
564 gcc_assert (((val
* b
) & mask
) == a
);
566 if ((val
>> (bits
- 1)) & 1)
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
578 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
580 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
584 if (sbb
== loop
->latch
)
590 return stmt
== last_stmt (bb
);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
597 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
599 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
600 basic_block stmt_bb
= bb_for_stmt (stmt
);
601 block_stmt_iterator bsi
;
603 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
606 if (stmt_bb
!= cand_bb
)
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
613 if (bsi_stmt (bsi
) == cand
->incremented_at
)
615 if (bsi_stmt (bsi
) == stmt
)
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
624 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
632 return stmt_after_ip_normal_pos (loop
, stmt
);
635 return stmt_after_ip_original_pos (cand
, stmt
);
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
646 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
650 data
->version_info_size
= 2 * num_ssa_names
;
651 data
->version_info
= xcalloc (data
->version_info_size
,
652 sizeof (struct version_info
));
653 data
->relevant
= BITMAP_XMALLOC ();
654 data
->important_candidates
= BITMAP_XMALLOC ();
655 data
->max_inv_id
= 0;
657 for (i
= 1; i
< loops
->num
; i
++)
658 if (loops
->parray
[i
])
659 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_uses
, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_candidates
, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset
, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
670 determine_base_object (tree expr
)
672 enum tree_code code
= TREE_CODE (expr
);
673 tree base
, obj
, op0
, op1
;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
684 obj
= TREE_OPERAND (expr
, 0);
685 base
= get_base_address (obj
);
688 return fold_convert (ptr_type_node
, expr
);
690 if (TREE_CODE (base
) == INDIRECT_REF
)
691 return fold_convert (ptr_type_node
, TREE_OPERAND (base
, 0));
693 return fold (build1 (ADDR_EXPR
, ptr_type_node
, base
));
697 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
698 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
704 return (code
== PLUS_EXPR
706 : fold (build1 (NEGATE_EXPR
, ptr_type_node
, op1
)));
708 return fold (build (code
, ptr_type_node
, op0
, op1
));
711 return fold_convert (ptr_type_node
, expr
);
715 /* Allocates an induction variable with given initial value BASE and step STEP
719 alloc_iv (tree base
, tree step
)
721 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
723 if (step
&& integer_zerop (step
))
727 iv
->base_object
= determine_base_object (base
);
730 iv
->have_use_for
= false;
732 iv
->ssa_name
= NULL_TREE
;
737 /* Sets STEP and BASE for induction variable IV. */
740 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
742 struct version_info
*info
= name_info (data
, iv
);
744 gcc_assert (!info
->iv
);
746 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
747 info
->iv
= alloc_iv (base
, step
);
748 info
->iv
->ssa_name
= iv
;
751 /* Finds induction variable declaration for VAR. */
754 get_iv (struct ivopts_data
*data
, tree var
)
758 if (!name_info (data
, var
)->iv
)
760 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
763 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
764 set_iv (data
, var
, var
, NULL_TREE
);
767 return name_info (data
, var
)->iv
;
770 /* Determines the step of a biv defined in PHI. */
773 determine_biv_step (tree phi
)
775 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
776 tree name
= PHI_RESULT (phi
), base
, step
;
777 tree type
= TREE_TYPE (name
);
779 if (!is_gimple_reg (name
))
782 if (!simple_iv (loop
, phi
, name
, &base
, &step
))
786 return build_int_cst (type
, 0);
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
794 abnormal_ssa_name_p (tree exp
)
799 if (TREE_CODE (exp
) != SSA_NAME
)
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
809 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
810 void *data ATTRIBUTE_UNUSED
)
812 if (TREE_CODE (base
) == ARRAY_REF
)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
816 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
820 return !abnormal_ssa_name_p (*index
);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
827 contains_abnormal_ssa_name_p (tree expr
)
829 enum tree_code code
= TREE_CODE (expr
);
830 enum tree_code_class
class = TREE_CODE_CLASS (code
);
832 if (code
== SSA_NAME
)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
835 if (code
== INTEGER_CST
836 || is_gimple_min_invariant (expr
))
839 if (code
== ADDR_EXPR
)
840 return !for_each_index (&TREE_OPERAND (expr
, 0),
841 idx_contains_abnormal_ssa_name_p
,
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
865 /* Finds basic ivs. */
868 find_bivs (struct ivopts_data
*data
)
870 tree phi
, step
, type
, base
;
872 struct loop
*loop
= data
->current_loop
;
874 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
879 step
= determine_biv_step (phi
);
883 if (cst_and_fits_in_hwi (step
)
884 && int_cst_value (step
) == 0)
887 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
888 if (contains_abnormal_ssa_name_p (base
))
891 type
= TREE_TYPE (PHI_RESULT (phi
));
892 base
= fold_convert (type
, base
);
893 step
= fold_convert (type
, step
);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step
))
900 set_iv (data
, PHI_RESULT (phi
), base
, step
);
907 /* Marks basic ivs. */
910 mark_bivs (struct ivopts_data
*data
)
913 struct iv
*iv
, *incr_iv
;
914 struct loop
*loop
= data
->current_loop
;
917 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
919 iv
= get_iv (data
, PHI_RESULT (phi
));
923 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
924 incr_iv
= get_iv (data
, var
);
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
930 if (incr_bb
->loop_father
!= data
->current_loop
931 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
935 incr_iv
->biv_p
= true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
943 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
944 tree
*base
, tree
*step
)
947 struct loop
*loop
= data
->current_loop
;
952 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
955 lhs
= TREE_OPERAND (stmt
, 0);
956 if (TREE_CODE (lhs
) != SSA_NAME
)
959 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
))
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
965 && !cst_and_fits_in_hwi (*step
))
968 if (contains_abnormal_ssa_name_p (*base
))
974 /* Finds general ivs in statement STMT. */
977 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
981 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
984 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
987 /* Finds general ivs in basic block BB. */
990 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
992 block_stmt_iterator bsi
;
994 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
995 find_givs_in_stmt (data
, bsi_stmt (bsi
));
998 /* Finds general ivs. */
1001 find_givs (struct ivopts_data
*data
)
1003 struct loop
*loop
= data
->current_loop
;
1004 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1007 for (i
= 0; i
< loop
->num_nodes
; i
++)
1008 find_givs_in_bb (data
, body
[i
]);
1012 /* Determine the number of iterations of the current loop. */
1015 determine_number_of_iterations (struct ivopts_data
*data
)
1017 struct loop
*loop
= data
->current_loop
;
1018 edge exit
= single_dom_exit (loop
);
1023 number_of_iterations_exit (loop
, exit
, &loop_data (loop
)->niter
);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1030 find_induction_variables (struct ivopts_data
*data
)
1033 struct loop
*loop
= data
->current_loop
;
1036 if (!find_bivs (data
))
1041 determine_number_of_iterations (data
);
1043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1045 if (loop_data (loop
)->niter
.niter
)
1047 fprintf (dump_file
, " number of iterations ");
1048 print_generic_expr (dump_file
, loop_data (loop
)->niter
.niter
,
1050 fprintf (dump_file
, "\n");
1052 fprintf (dump_file
, " may be zero if ");
1053 print_generic_expr (dump_file
, loop_data (loop
)->niter
.may_be_zero
,
1055 fprintf (dump_file
, "\n");
1057 fprintf (dump_file
, " bogus unless ");
1058 print_generic_expr (dump_file
, loop_data (loop
)->niter
.assumptions
,
1060 fprintf (dump_file
, "\n");
1061 fprintf (dump_file
, "\n");
1064 fprintf (dump_file
, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1068 if (ver_info (data
, i
)->iv
)
1069 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use
*
1079 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1080 tree stmt
, enum use_type use_type
)
1082 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1084 use
->id
= n_iv_uses (data
);
1085 use
->type
= use_type
;
1089 use
->related_cands
= BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1093 iv
->ssa_name
= NULL_TREE
;
1095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1096 dump_use (dump_file
, use
);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_uses
, use
);
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1108 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1111 struct version_info
*info
;
1113 if (TREE_CODE (op
) != SSA_NAME
1114 || !is_gimple_reg (op
))
1117 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1119 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1122 info
= name_info (data
, op
);
1124 info
->has_nonlin_use
|= nonlinear_use
;
1126 info
->inv_id
= ++data
->max_inv_id
;
1127 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1130 /* Checks whether the use OP is interesting and if so, records it
1133 static struct iv_use
*
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1142 if (TREE_CODE (op
) != SSA_NAME
)
1145 iv
= get_iv (data
, op
);
1149 if (iv
->have_use_for
)
1151 use
= iv_use (data
, iv
->use_id
);
1153 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1154 || use
->type
== USE_OUTER
);
1156 if (type
== USE_NONLINEAR_EXPR
)
1157 use
->type
= USE_NONLINEAR_EXPR
;
1161 if (zero_p (iv
->step
))
1163 record_invariant (data
, op
, true);
1166 iv
->have_use_for
= true;
1168 civ
= xmalloc (sizeof (struct iv
));
1171 stmt
= SSA_NAME_DEF_STMT (op
);
1172 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1173 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1175 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1176 iv
->use_id
= use
->id
;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use
*
1184 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1186 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1189 /* Records a definition of induction variable OP that is used outside of the
1192 static struct iv_use
*
1193 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1195 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1202 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1206 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1208 tree zero
= integer_zero_node
;
1210 const_iv
.step
= NULL_TREE
;
1212 if (integer_zerop (*cond_p
)
1213 || integer_nonzerop (*cond_p
))
1216 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1223 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1224 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1227 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1228 iv0
= get_iv (data
, *op0_p
);
1232 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1233 iv1
= get_iv (data
, *op1_p
);
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1244 find_interesting_uses_op (data
, *op0_p
);
1245 find_interesting_uses_op (data
, *op1_p
);
1249 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1251 /* If both are invariants, this is a work for unswitching. */
1255 civ
= xmalloc (sizeof (struct iv
));
1256 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1257 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1264 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1269 if (is_gimple_min_invariant (expr
))
1272 if (TREE_CODE (expr
) == SSA_NAME
)
1274 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1276 && flow_bb_inside_loop_p (loop
, def_bb
))
1285 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1286 for (i
= 0; i
< len
; i
++)
1287 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data
*ivopts_data
;
1305 idx_find_step (tree base
, tree
*idx
, void *data
)
1307 struct ifs_ivopts_data
*dta
= data
;
1309 tree step
, type
, iv_type
, iv_step
, lbound
, off
;
1310 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1312 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1316 /* If base is a component ref, require that the offset of the reference
1318 if (TREE_CODE (base
) == COMPONENT_REF
)
1320 off
= component_ref_field_offset (base
);
1321 return expr_invariant_in_loop_p (loop
, off
);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base
) == ARRAY_REF
)
1330 step
= array_ref_element_size (base
);
1331 lbound
= array_ref_low_bound (base
);
1333 if (!expr_invariant_in_loop_p (loop
, step
)
1334 || !expr_invariant_in_loop_p (loop
, lbound
))
1338 if (TREE_CODE (*idx
) != SSA_NAME
)
1341 iv
= get_iv (dta
->ivopts_data
, *idx
);
1350 iv_type
= TREE_TYPE (iv
->base
);
1351 type
= build_pointer_type (TREE_TYPE (base
));
1352 if (TREE_CODE (base
) == ARRAY_REF
)
1354 step
= array_ref_element_size (base
);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step
) != INTEGER_CST
)
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step
= build_int_cst (type
, 1);
1364 if (TYPE_PRECISION (iv_type
) < TYPE_PRECISION (type
))
1365 iv_step
= can_count_iv_in_wider_type (dta
->ivopts_data
->current_loop
,
1366 type
, iv
->base
, iv
->step
, dta
->stmt
);
1368 iv_step
= fold_convert (iv_type
, iv
->step
);
1372 /* The index might wrap. */
1376 step
= fold_binary_to_constant (MULT_EXPR
, type
, step
, iv_step
);
1379 *dta
->step_p
= step
;
1381 *dta
->step_p
= fold_binary_to_constant (PLUS_EXPR
, type
,
1382 *dta
->step_p
, step
);
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1391 idx_record_use (tree base
, tree
*idx
,
1394 find_interesting_uses_op (data
, *idx
);
1395 if (TREE_CODE (base
) == ARRAY_REF
)
1397 find_interesting_uses_op (data
, array_ref_element_size (base
));
1398 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1403 /* Finds addresses in *OP_P inside STMT. */
1406 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1408 tree base
= unshare_expr (*op_p
), step
= NULL
;
1410 struct ifs_ivopts_data ifs_ivopts_data
;
1412 /* Ignore bitfields for now. Not really something terribly complicated
1414 if (TREE_CODE (base
) == COMPONENT_REF
1415 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1418 ifs_ivopts_data
.ivopts_data
= data
;
1419 ifs_ivopts_data
.stmt
= stmt
;
1420 ifs_ivopts_data
.step_p
= &step
;
1421 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1425 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1426 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1428 if (TREE_CODE (base
) == INDIRECT_REF
)
1429 base
= TREE_OPERAND (base
, 0);
1431 base
= build_addr (base
);
1433 civ
= alloc_iv (base
, step
);
1434 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1438 for_each_index (op_p
, idx_record_use
, data
);
1441 /* Finds and records invariants used in STMT. */
1444 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1446 use_optype uses
= NULL
;
1450 if (TREE_CODE (stmt
) == PHI_NODE
)
1451 n
= PHI_NUM_ARGS (stmt
);
1454 get_stmt_operands (stmt
);
1455 uses
= STMT_USE_OPS (stmt
);
1456 n
= NUM_USES (uses
);
1459 for (i
= 0; i
< n
; i
++)
1461 if (TREE_CODE (stmt
) == PHI_NODE
)
1462 op
= PHI_ARG_DEF (stmt
, i
);
1464 op
= USE_OP (uses
, i
);
1466 record_invariant (data
, op
, false);
1470 /* Finds interesting uses of induction variables in the statement STMT. */
1473 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1477 use_optype uses
= NULL
;
1480 find_invariants_stmt (data
, stmt
);
1482 if (TREE_CODE (stmt
) == COND_EXPR
)
1484 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1488 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1490 lhs
= TREE_OPERAND (stmt
, 0);
1491 rhs
= TREE_OPERAND (stmt
, 1);
1493 if (TREE_CODE (lhs
) == SSA_NAME
)
1495 /* If the statement defines an induction variable, the uses are not
1496 interesting by themselves. */
1498 iv
= get_iv (data
, lhs
);
1500 if (iv
&& !zero_p (iv
->step
))
1504 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1506 case tcc_comparison
:
1507 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1511 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1512 if (REFERENCE_CLASS_P (lhs
))
1513 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1519 if (REFERENCE_CLASS_P (lhs
)
1520 && is_gimple_val (rhs
))
1522 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1523 find_interesting_uses_op (data
, rhs
);
1527 /* TODO -- we should also handle address uses of type
1529 memory = call (whatever);
1536 if (TREE_CODE (stmt
) == PHI_NODE
1537 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1539 lhs
= PHI_RESULT (stmt
);
1540 iv
= get_iv (data
, lhs
);
1542 if (iv
&& !zero_p (iv
->step
))
1546 if (TREE_CODE (stmt
) == PHI_NODE
)
1547 n
= PHI_NUM_ARGS (stmt
);
1550 uses
= STMT_USE_OPS (stmt
);
1551 n
= NUM_USES (uses
);
1554 for (i
= 0; i
< n
; i
++)
1556 if (TREE_CODE (stmt
) == PHI_NODE
)
1557 op
= PHI_ARG_DEF (stmt
, i
);
1559 op
= USE_OP (uses
, i
);
1561 if (TREE_CODE (op
) != SSA_NAME
)
1564 iv
= get_iv (data
, op
);
1568 find_interesting_uses_op (data
, op
);
1572 /* Finds interesting uses of induction variables outside of loops
1573 on loop exit edge EXIT. */
1576 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1580 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1582 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1583 find_interesting_uses_outer (data
, def
);
1587 /* Finds uses of the induction variables that are interesting. */
1590 find_interesting_uses (struct ivopts_data
*data
)
1593 block_stmt_iterator bsi
;
1595 basic_block
*body
= get_loop_body (data
->current_loop
);
1597 struct version_info
*info
;
1600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1601 fprintf (dump_file
, "Uses:\n\n");
1603 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1608 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1609 if (e
->dest
!= EXIT_BLOCK_PTR
1610 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1611 find_interesting_uses_outside (data
, e
);
1613 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1614 find_interesting_uses_stmt (data
, phi
);
1615 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1616 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1623 fprintf (dump_file
, "\n");
1625 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1627 info
= ver_info (data
, i
);
1630 fprintf (dump_file
, " ");
1631 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1632 fprintf (dump_file
, " is invariant (%d)%s\n",
1633 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1637 fprintf (dump_file
, "\n");
1643 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1644 position to POS. If USE is not NULL, the candidate is set as related to
1645 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1646 replacement of the final value of the iv by a direct computation. */
1648 static struct iv_cand
*
1649 add_candidate_1 (struct ivopts_data
*data
,
1650 tree base
, tree step
, bool important
, enum iv_position pos
,
1651 struct iv_use
*use
, tree incremented_at
)
1654 struct iv_cand
*cand
= NULL
;
1659 type
= TREE_TYPE (base
);
1660 if (!TYPE_UNSIGNED (type
))
1662 type
= unsigned_type_for (type
);
1663 base
= fold_convert (type
, base
);
1665 step
= fold_convert (type
, step
);
1669 for (i
= 0; i
< n_iv_cands (data
); i
++)
1671 cand
= iv_cand (data
, i
);
1673 if (cand
->pos
!= pos
)
1676 if (cand
->incremented_at
!= incremented_at
)
1690 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1693 if (zero_p (cand
->iv
->step
))
1700 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1705 if (i
== n_iv_cands (data
))
1707 cand
= xcalloc (1, sizeof (struct iv_cand
));
1713 cand
->iv
= alloc_iv (base
, step
);
1716 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1718 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
1719 cand
->var_after
= cand
->var_before
;
1721 cand
->important
= important
;
1722 cand
->incremented_at
= incremented_at
;
1723 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_candidates
, cand
);
1725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1726 dump_cand (dump_file
, cand
);
1729 if (important
&& !cand
->important
)
1731 cand
->important
= true;
1732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1733 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
1738 bitmap_set_bit (use
->related_cands
, i
);
1739 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1740 fprintf (dump_file
, "Candidate %d is related to use %d\n",
1747 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1748 position to POS. If USE is not NULL, the candidate is set as related to
1749 it. The candidate computation is scheduled on all available positions. */
1752 add_candidate (struct ivopts_data
*data
,
1753 tree base
, tree step
, bool important
, struct iv_use
*use
)
1755 if (ip_normal_pos (data
->current_loop
))
1756 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
1757 if (ip_end_pos (data
->current_loop
))
1758 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
1761 /* Adds standard iv candidates. */
1764 add_standard_iv_candidates (struct ivopts_data
*data
)
1766 /* Add 0 + 1 * iteration candidate. */
1767 add_candidate (data
,
1768 build_int_cst (unsigned_intSI_type_node
, 0),
1769 build_int_cst (unsigned_intSI_type_node
, 1),
1772 /* The same for a long type if it is still fast enough. */
1773 if (BITS_PER_WORD
> 32)
1774 add_candidate (data
,
1775 build_int_cst (unsigned_intDI_type_node
, 0),
1776 build_int_cst (unsigned_intDI_type_node
, 1),
1781 /* Adds candidates bases on the old induction variable IV. */
1784 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
1787 struct iv_cand
*cand
;
1789 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
1791 /* The same, but with initial value zero. */
1792 add_candidate (data
,
1793 build_int_cst (TREE_TYPE (iv
->base
), 0),
1794 iv
->step
, true, NULL
);
1796 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
1797 if (TREE_CODE (phi
) == PHI_NODE
)
1799 /* Additionally record the possibility of leaving the original iv
1801 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
1802 cand
= add_candidate_1 (data
,
1803 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
1804 SSA_NAME_DEF_STMT (def
));
1805 cand
->var_before
= iv
->ssa_name
;
1806 cand
->var_after
= def
;
1810 /* Adds candidates based on the old induction variables. */
1813 add_old_ivs_candidates (struct ivopts_data
*data
)
1819 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1821 iv
= ver_info (data
, i
)->iv
;
1822 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
1823 add_old_iv_candidates (data
, iv
);
1827 /* Adds candidates based on the value of the induction variable IV and USE. */
1830 add_iv_value_candidates (struct ivopts_data
*data
,
1831 struct iv
*iv
, struct iv_use
*use
)
1833 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
1835 /* The same, but with initial value zero. */
1836 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
1837 iv
->step
, false, use
);
1840 /* Adds candidates based on the address IV and USE. */
1843 add_address_candidates (struct ivopts_data
*data
,
1844 struct iv
*iv
, struct iv_use
*use
)
1846 tree base
, abase
, tmp
, *act
;
1848 /* First, the trivial choices. */
1849 add_iv_value_candidates (data
, iv
, use
);
1851 /* Second, try removing the COMPONENT_REFs. */
1852 if (TREE_CODE (iv
->base
) == ADDR_EXPR
)
1854 base
= TREE_OPERAND (iv
->base
, 0);
1855 while (TREE_CODE (base
) == COMPONENT_REF
1856 || (TREE_CODE (base
) == ARRAY_REF
1857 && TREE_CODE (TREE_OPERAND (base
, 1)) == INTEGER_CST
))
1858 base
= TREE_OPERAND (base
, 0);
1860 if (base
!= TREE_OPERAND (iv
->base
, 0))
1862 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1863 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1865 if (TREE_CODE (base
) == INDIRECT_REF
)
1866 base
= TREE_OPERAND (base
, 0);
1868 base
= build_addr (base
);
1869 add_candidate (data
, base
, iv
->step
, false, use
);
1873 /* Third, try removing the constant offset. */
1875 while (TREE_CODE (abase
) == PLUS_EXPR
1876 && TREE_CODE (TREE_OPERAND (abase
, 1)) != INTEGER_CST
)
1877 abase
= TREE_OPERAND (abase
, 0);
1878 /* We found the offset, so make the copy of the non-shared part and
1880 if (TREE_CODE (abase
) == PLUS_EXPR
)
1885 for (tmp
= iv
->base
; tmp
!= abase
; tmp
= TREE_OPERAND (tmp
, 0))
1887 *act
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
),
1888 NULL_TREE
, TREE_OPERAND (tmp
, 1));
1889 act
= &TREE_OPERAND (*act
, 0);
1891 *act
= TREE_OPERAND (tmp
, 0);
1893 add_candidate (data
, base
, iv
->step
, false, use
);
1897 /* Possibly adds pseudocandidate for replacing the final value of USE by
1898 a direct computation. */
1901 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
1903 struct tree_niter_desc
*niter
;
1904 struct loop
*loop
= data
->current_loop
;
1906 /* We must know where we exit the loop and how many times does it roll. */
1907 if (!single_dom_exit (loop
))
1910 niter
= &loop_data (loop
)->niter
;
1912 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
1913 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
1916 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
1919 /* Adds candidates based on the uses. */
1922 add_derived_ivs_candidates (struct ivopts_data
*data
)
1926 for (i
= 0; i
< n_iv_uses (data
); i
++)
1928 struct iv_use
*use
= iv_use (data
, i
);
1935 case USE_NONLINEAR_EXPR
:
1937 /* Just add the ivs based on the value of the iv used here. */
1938 add_iv_value_candidates (data
, use
->iv
, use
);
1942 add_iv_value_candidates (data
, use
->iv
, use
);
1944 /* Additionally, add the pseudocandidate for the possibility to
1945 replace the final value by a direct computation. */
1946 add_iv_outer_candidates (data
, use
);
1950 add_address_candidates (data
, use
->iv
, use
);
1959 /* Record important candidates and add them to related_cands bitmaps
1963 record_important_candidates (struct ivopts_data
*data
)
1968 for (i
= 0; i
< n_iv_cands (data
); i
++)
1970 struct iv_cand
*cand
= iv_cand (data
, i
);
1972 if (cand
->important
)
1973 bitmap_set_bit (data
->important_candidates
, i
);
1976 data
->consider_all_candidates
= (n_iv_cands (data
)
1977 <= CONSIDER_ALL_CANDIDATES_BOUND
);
1979 if (data
->consider_all_candidates
)
1981 /* We will not need "related_cands" bitmaps in this case,
1982 so release them to decrease peak memory consumption. */
1983 for (i
= 0; i
< n_iv_uses (data
); i
++)
1985 use
= iv_use (data
, i
);
1986 BITMAP_XFREE (use
->related_cands
);
1991 /* Add important candidates to the related_cands bitmaps. */
1992 for (i
= 0; i
< n_iv_uses (data
); i
++)
1993 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
1994 data
->important_candidates
);
1998 /* Finds the candidates for the induction variables. */
2001 find_iv_candidates (struct ivopts_data
*data
)
2003 /* Add commonly used ivs. */
2004 add_standard_iv_candidates (data
);
2006 /* Add old induction variables. */
2007 add_old_ivs_candidates (data
);
2009 /* Add induction variables derived from uses. */
2010 add_derived_ivs_candidates (data
);
2012 /* Record the important candidates. */
2013 record_important_candidates (data
);
2016 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2017 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2018 we allocate a simple list to every use. */
2021 alloc_use_cost_map (struct ivopts_data
*data
)
2023 unsigned i
, size
, s
, j
;
2025 for (i
= 0; i
< n_iv_uses (data
); i
++)
2027 struct iv_use
*use
= iv_use (data
, i
);
2030 if (data
->consider_all_candidates
)
2031 size
= n_iv_cands (data
);
2035 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2040 /* Round up to the power of two, so that moduling by it is fast. */
2041 for (size
= 1; size
< s
; size
<<= 1)
2045 use
->n_map_members
= size
;
2046 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2050 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2051 on invariants DEPENDS_ON. */
2054 set_use_iv_cost (struct ivopts_data
*data
,
2055 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2062 BITMAP_XFREE (depends_on
);
2066 if (data
->consider_all_candidates
)
2068 use
->cost_map
[cand
->id
].cand
= cand
;
2069 use
->cost_map
[cand
->id
].cost
= cost
;
2070 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2074 /* n_map_members is a power of two, so this computes modulo. */
2075 s
= cand
->id
& (use
->n_map_members
- 1);
2076 for (i
= s
; i
< use
->n_map_members
; i
++)
2077 if (!use
->cost_map
[i
].cand
)
2079 for (i
= 0; i
< s
; i
++)
2080 if (!use
->cost_map
[i
].cand
)
2086 use
->cost_map
[i
].cand
= cand
;
2087 use
->cost_map
[i
].cost
= cost
;
2088 use
->cost_map
[i
].depends_on
= depends_on
;
2091 /* Gets cost of (USE, CANDIDATE) pair. */
2093 static struct cost_pair
*
2094 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2095 struct iv_cand
*cand
)
2098 struct cost_pair
*ret
;
2103 if (data
->consider_all_candidates
)
2105 ret
= use
->cost_map
+ cand
->id
;
2112 /* n_map_members is a power of two, so this computes modulo. */
2113 s
= cand
->id
& (use
->n_map_members
- 1);
2114 for (i
= s
; i
< use
->n_map_members
; i
++)
2115 if (use
->cost_map
[i
].cand
== cand
)
2116 return use
->cost_map
+ i
;
2118 for (i
= 0; i
< s
; i
++)
2119 if (use
->cost_map
[i
].cand
== cand
)
2120 return use
->cost_map
+ i
;
2125 /* Returns estimate on cost of computing SEQ. */
2133 for (; seq
; seq
= NEXT_INSN (seq
))
2135 set
= single_set (seq
);
2137 cost
+= rtx_cost (set
, SET
);
2145 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2147 produce_memory_decl_rtl (tree obj
, int *regno
)
2152 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2154 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2155 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2158 x
= gen_raw_REG (Pmode
, (*regno
)++);
2160 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2163 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2164 walk_tree. DATA contains the actual fake register number. */
2167 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2169 tree obj
= NULL_TREE
;
2173 switch (TREE_CODE (*expr_p
))
2176 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2177 handled_component_p (*expr_p
);
2178 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2182 x
= produce_memory_decl_rtl (obj
, regno
);
2187 obj
= SSA_NAME_VAR (*expr_p
);
2188 if (!DECL_RTL_SET_P (obj
))
2189 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2198 if (DECL_RTL_SET_P (obj
))
2201 if (DECL_MODE (obj
) == BLKmode
)
2202 x
= produce_memory_decl_rtl (obj
, regno
);
2204 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2214 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset
, obj
);
2215 SET_DECL_RTL (obj
, x
);
2221 /* Determines cost of the computation of EXPR. */
2224 computation_cost (tree expr
)
2227 tree type
= TREE_TYPE (expr
);
2231 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2233 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2237 cost
= seq_cost (seq
);
2238 if (GET_CODE (rslt
) == MEM
)
2239 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2244 /* Returns variable containing the value of candidate CAND at statement AT. */
2247 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2249 if (stmt_after_increment (loop
, cand
, stmt
))
2250 return cand
->var_after
;
2252 return cand
->var_before
;
2255 /* Determines the expression by that USE is expressed from induction variable
2256 CAND at statement AT in LOOP. */
2259 get_computation_at (struct loop
*loop
,
2260 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
2262 tree ubase
= use
->iv
->base
;
2263 tree ustep
= use
->iv
->step
;
2264 tree cbase
= cand
->iv
->base
;
2265 tree cstep
= cand
->iv
->step
;
2266 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2270 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2271 HOST_WIDE_INT ratioi
;
2273 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2275 /* We do not have a precision to express the values of use. */
2279 expr
= var_at_stmt (loop
, cand
, at
);
2281 if (TREE_TYPE (expr
) != ctype
)
2283 /* This may happen with the original ivs. */
2284 expr
= fold_convert (ctype
, expr
);
2287 if (TYPE_UNSIGNED (utype
))
2291 uutype
= unsigned_type_for (utype
);
2292 ubase
= fold_convert (uutype
, ubase
);
2293 ustep
= fold_convert (uutype
, ustep
);
2296 if (uutype
!= ctype
)
2298 expr
= fold_convert (uutype
, expr
);
2299 cbase
= fold_convert (uutype
, cbase
);
2300 cstep
= fold_convert (uutype
, cstep
);
2303 if (!cst_and_fits_in_hwi (cstep
)
2304 || !cst_and_fits_in_hwi (ustep
))
2307 ustepi
= int_cst_value (ustep
);
2308 cstepi
= int_cst_value (cstep
);
2310 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2312 /* TODO maybe consider case when ustep divides cstep and the ratio is
2313 a power of 2 (so that the division is fast to execute)? We would
2314 need to be much more careful with overflows etc. then. */
2318 /* We may need to shift the value if we are after the increment. */
2319 if (stmt_after_increment (loop
, cand
, at
))
2320 cbase
= fold (build2 (PLUS_EXPR
, uutype
, cbase
, cstep
));
2322 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2323 or |ratio| == 1, it is better to handle this like
2325 ubase - ratio * cbase + ratio * var. */
2329 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, cbase
));
2330 expr
= fold (build2 (PLUS_EXPR
, uutype
, expr
, delta
));
2332 else if (ratioi
== -1)
2334 delta
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, cbase
));
2335 expr
= fold (build2 (MINUS_EXPR
, uutype
, delta
, expr
));
2337 else if (TREE_CODE (cbase
) == INTEGER_CST
)
2339 ratio
= build_int_cst_type (uutype
, ratioi
);
2340 delta
= fold (build2 (MULT_EXPR
, uutype
, ratio
, cbase
));
2341 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, delta
));
2342 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2343 expr
= fold (build2 (PLUS_EXPR
, uutype
, delta
, expr
));
2347 expr
= fold (build2 (MINUS_EXPR
, uutype
, expr
, cbase
));
2348 ratio
= build_int_cst_type (uutype
, ratioi
);
2349 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2350 expr
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, expr
));
2353 return fold_convert (utype
, expr
);
2356 /* Determines the expression by that USE is expressed from induction variable
2360 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2362 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2365 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2368 strip_offset (tree
*expr
, unsigned HOST_WIDE_INT
*offset
)
2371 enum tree_code code
;
2375 if (cst_and_fits_in_hwi (*expr
))
2377 *offset
+= int_cst_value (*expr
);
2378 *expr
= integer_zero_node
;
2382 code
= TREE_CODE (*expr
);
2384 if (code
!= PLUS_EXPR
&& code
!= MINUS_EXPR
)
2387 op0
= TREE_OPERAND (*expr
, 0);
2388 op1
= TREE_OPERAND (*expr
, 1);
2390 if (cst_and_fits_in_hwi (op1
))
2392 if (code
== PLUS_EXPR
)
2393 *offset
+= int_cst_value (op1
);
2395 *offset
-= int_cst_value (op1
);
2401 if (code
!= PLUS_EXPR
)
2404 if (!cst_and_fits_in_hwi (op0
))
2407 *offset
+= int_cst_value (op0
);
2412 /* Returns cost of addition in MODE. */
2415 add_cost (enum machine_mode mode
)
2417 static unsigned costs
[NUM_MACHINE_MODES
];
2425 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2426 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
),
2427 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
+ 1)),
2432 cost
= seq_cost (seq
);
2438 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2439 fprintf (dump_file
, "Addition in %s costs %d\n",
2440 GET_MODE_NAME (mode
), cost
);
2444 /* Entry in a hashtable of already known costs for multiplication. */
2447 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2448 enum machine_mode mode
; /* In mode. */
2449 unsigned cost
; /* The cost. */
2452 /* Counts hash value for the ENTRY. */
2455 mbc_entry_hash (const void *entry
)
2457 const struct mbc_entry
*e
= entry
;
2459 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2462 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2465 mbc_entry_eq (const void *entry1
, const void *entry2
)
2467 const struct mbc_entry
*e1
= entry1
;
2468 const struct mbc_entry
*e2
= entry2
;
2470 return (e1
->mode
== e2
->mode
2471 && e1
->cst
== e2
->cst
);
2474 /* Returns cost of multiplication by constant CST in MODE. */
2477 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
2479 static htab_t costs
;
2480 struct mbc_entry
**cached
, act
;
2485 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2489 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2491 return (*cached
)->cost
;
2493 *cached
= xmalloc (sizeof (struct mbc_entry
));
2494 (*cached
)->mode
= mode
;
2495 (*cached
)->cst
= cst
;
2498 expand_mult (mode
, gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
), GEN_INT (cst
),
2503 cost
= seq_cost (seq
);
2505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2506 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2507 (int) cst
, GET_MODE_NAME (mode
), cost
);
2509 (*cached
)->cost
= cost
;
2514 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2515 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2516 variable is omitted. The created memory accesses MODE.
2518 TODO -- there must be some better way. This all is quite crude. */
2521 get_address_cost (bool symbol_present
, bool var_present
,
2522 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
2524 #define MAX_RATIO 128
2525 static sbitmap valid_mult
;
2526 static HOST_WIDE_INT rat
, off
;
2527 static HOST_WIDE_INT min_offset
, max_offset
;
2528 static unsigned costs
[2][2][2][2];
2529 unsigned cost
, acost
;
2530 rtx seq
, addr
, base
;
2531 bool offset_p
, ratio_p
;
2533 HOST_WIDE_INT s_offset
;
2534 unsigned HOST_WIDE_INT mask
;
2541 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2543 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
2544 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2546 XEXP (addr
, 1) = GEN_INT (i
);
2547 if (!memory_address_p (Pmode
, addr
))
2550 max_offset
= i
>> 1;
2553 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2555 XEXP (addr
, 1) = GEN_INT (-i
);
2556 if (!memory_address_p (Pmode
, addr
))
2559 min_offset
= -(i
>> 1);
2561 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2563 fprintf (dump_file
, "get_address_cost:\n");
2564 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
2565 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
2568 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
2569 sbitmap_zero (valid_mult
);
2571 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2572 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2574 XEXP (addr
, 1) = GEN_INT (i
);
2575 if (memory_address_p (Pmode
, addr
))
2577 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
2582 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2584 fprintf (dump_file
, " allowed multipliers:");
2585 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2586 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
2587 fprintf (dump_file
, " %d", (int) i
);
2588 fprintf (dump_file
, "\n");
2589 fprintf (dump_file
, "\n");
2593 bits
= GET_MODE_BITSIZE (Pmode
);
2594 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
2596 if ((offset
>> (bits
- 1) & 1))
2601 offset_p
= (s_offset
!= 0
2602 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
2603 ratio_p
= (ratio
!= 1
2604 && -MAX_RATIO
<= ratio
&& ratio
<= MAX_RATIO
2605 && TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
));
2607 if (ratio
!= 1 && !ratio_p
)
2608 cost
+= multiply_by_cost (ratio
, Pmode
);
2610 if (s_offset
&& !offset_p
&& !symbol_present
)
2612 cost
+= add_cost (Pmode
);
2616 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
2621 addr
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2622 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
+ 1);
2624 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, GEN_INT (rat
));
2627 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
2631 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
2633 base
= gen_rtx_fmt_e (CONST
, Pmode
,
2634 gen_rtx_fmt_ee (PLUS
, Pmode
,
2639 base
= GEN_INT (off
);
2644 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
2647 addr
= memory_address (Pmode
, addr
);
2651 acost
= seq_cost (seq
);
2652 acost
+= address_cost (addr
, Pmode
);
2656 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
2659 return cost
+ acost
;
2662 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2663 the bitmap to that we should store it. */
2665 static struct ivopts_data
*fd_ivopts_data
;
2667 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2669 bitmap
*depends_on
= data
;
2670 struct version_info
*info
;
2672 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2674 info
= name_info (fd_ivopts_data
, *expr_p
);
2676 if (!info
->inv_id
|| info
->has_nonlin_use
)
2680 *depends_on
= BITMAP_XMALLOC ();
2681 bitmap_set_bit (*depends_on
, info
->inv_id
);
2686 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2687 invariants the computation depends on. */
2690 force_var_cost (struct ivopts_data
*data
,
2691 tree expr
, bitmap
*depends_on
)
2693 static bool costs_initialized
= false;
2694 static unsigned integer_cost
;
2695 static unsigned symbol_cost
;
2696 static unsigned address_cost
;
2698 unsigned cost0
, cost1
, cost
;
2699 enum machine_mode mode
;
2701 if (!costs_initialized
)
2703 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
2704 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
2705 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
2707 tree type
= build_pointer_type (integer_type_node
);
2709 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
2712 SET_DECL_RTL (var
, x
);
2713 TREE_STATIC (var
) = 1;
2714 addr
= build1 (ADDR_EXPR
, type
, var
);
2715 symbol_cost
= computation_cost (addr
) + 1;
2718 = computation_cost (build2 (PLUS_EXPR
, type
,
2720 build_int_cst_type (type
, 2000))) + 1;
2721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2723 fprintf (dump_file
, "force_var_cost:\n");
2724 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
2725 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
2726 fprintf (dump_file
, " address %d\n", (int) address_cost
);
2727 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
2728 fprintf (dump_file
, "\n");
2731 costs_initialized
= true;
2736 fd_ivopts_data
= data
;
2737 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
2740 if (SSA_VAR_P (expr
))
2743 if (TREE_INVARIANT (expr
))
2745 if (TREE_CODE (expr
) == INTEGER_CST
)
2746 return integer_cost
;
2748 if (TREE_CODE (expr
) == ADDR_EXPR
)
2750 tree obj
= TREE_OPERAND (expr
, 0);
2752 if (TREE_CODE (obj
) == VAR_DECL
2753 || TREE_CODE (obj
) == PARM_DECL
2754 || TREE_CODE (obj
) == RESULT_DECL
)
2758 return address_cost
;
2761 switch (TREE_CODE (expr
))
2766 op0
= TREE_OPERAND (expr
, 0);
2767 op1
= TREE_OPERAND (expr
, 1);
2769 if (is_gimple_val (op0
))
2772 cost0
= force_var_cost (data
, op0
, NULL
);
2774 if (is_gimple_val (op1
))
2777 cost1
= force_var_cost (data
, op1
, NULL
);
2782 /* Just an arbitrary value, FIXME. */
2783 return target_spill_cost
;
2786 mode
= TYPE_MODE (TREE_TYPE (expr
));
2787 switch (TREE_CODE (expr
))
2791 cost
= add_cost (mode
);
2795 if (cst_and_fits_in_hwi (op0
))
2796 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
2797 else if (cst_and_fits_in_hwi (op1
))
2798 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
2800 return target_spill_cost
;
2810 /* Bound the cost by target_spill_cost. The parts of complicated
2811 computations often are either loop invariant or at least can
2812 be shared between several iv uses, so letting this grow without
2813 limits would not give reasonable results. */
2814 return cost
< target_spill_cost
? cost
: target_spill_cost
;
2817 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2818 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2819 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2820 invariants the computation depends on. */
2823 split_address_cost (struct ivopts_data
*data
,
2824 tree addr
, bool *symbol_present
, bool *var_present
,
2825 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2828 HOST_WIDE_INT bitsize
;
2829 HOST_WIDE_INT bitpos
;
2831 enum machine_mode mode
;
2832 int unsignedp
, volatilep
;
2834 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
2835 &unsignedp
, &volatilep
, false);
2838 || bitpos
% BITS_PER_UNIT
!= 0
2839 || TREE_CODE (core
) != VAR_DECL
)
2841 *symbol_present
= false;
2842 *var_present
= true;
2843 fd_ivopts_data
= data
;
2844 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
2845 return target_spill_cost
;
2848 *offset
+= bitpos
/ BITS_PER_UNIT
;
2849 if (TREE_STATIC (core
)
2850 || DECL_EXTERNAL (core
))
2852 *symbol_present
= true;
2853 *var_present
= false;
2857 *symbol_present
= false;
2858 *var_present
= true;
2862 /* Estimates cost of expressing difference of addresses E1 - E2 as
2863 var + symbol + offset. The value of offset is added to OFFSET,
2864 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2865 part is missing. DEPENDS_ON is a set of the invariants the computation
2869 ptr_difference_cost (struct ivopts_data
*data
,
2870 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2871 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2873 HOST_WIDE_INT diff
= 0;
2876 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
2878 if (ptr_difference_const (e1
, e2
, &diff
))
2881 *symbol_present
= false;
2882 *var_present
= false;
2886 if (e2
== integer_zero_node
)
2887 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
2888 symbol_present
, var_present
, offset
, depends_on
);
2890 *symbol_present
= false;
2891 *var_present
= true;
2893 cost
= force_var_cost (data
, e1
, depends_on
);
2894 cost
+= force_var_cost (data
, e2
, depends_on
);
2895 cost
+= add_cost (Pmode
);
2900 /* Estimates cost of expressing difference E1 - E2 as
2901 var + symbol + offset. The value of offset is added to OFFSET,
2902 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2903 part is missing. DEPENDS_ON is a set of the invariants the computation
2907 difference_cost (struct ivopts_data
*data
,
2908 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2909 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2912 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
2914 strip_offset (&e1
, offset
);
2916 strip_offset (&e2
, offset
);
2919 if (TREE_CODE (e1
) == ADDR_EXPR
)
2920 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
2922 *symbol_present
= false;
2924 if (operand_equal_p (e1
, e2
, 0))
2926 *var_present
= false;
2929 *var_present
= true;
2931 return force_var_cost (data
, e1
, depends_on
);
2935 cost
= force_var_cost (data
, e2
, depends_on
);
2936 cost
+= multiply_by_cost (-1, mode
);
2941 cost
= force_var_cost (data
, e1
, depends_on
);
2942 cost
+= force_var_cost (data
, e2
, depends_on
);
2943 cost
+= add_cost (mode
);
2948 /* Determines the cost of the computation by that USE is expressed
2949 from induction variable CAND. If ADDRESS_P is true, we just need
2950 to create an address from it, otherwise we want to get it into
2951 register. A set of invariants we depend on is stored in
2952 DEPENDS_ON. AT is the statement at that the value is computed. */
2955 get_computation_cost_at (struct ivopts_data
*data
,
2956 struct iv_use
*use
, struct iv_cand
*cand
,
2957 bool address_p
, bitmap
*depends_on
, tree at
)
2959 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
2961 tree utype
= TREE_TYPE (ubase
), ctype
;
2962 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
2963 HOST_WIDE_INT ratio
, aratio
;
2964 bool var_present
, symbol_present
;
2965 unsigned cost
= 0, n_sums
;
2969 /* Only consider real candidates. */
2973 cbase
= cand
->iv
->base
;
2974 cstep
= cand
->iv
->step
;
2975 ctype
= TREE_TYPE (cbase
);
2977 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2979 /* We do not have a precision to express the values of use. */
2985 /* Do not try to express address of an object with computation based
2986 on address of a different object. This may cause problems in rtl
2987 level alias analysis (that does not expect this to be happening,
2988 as this is illegal in C), and would be unlikely to be useful
2990 if (use
->iv
->base_object
2991 && cand
->iv
->base_object
2992 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
2996 if (!cst_and_fits_in_hwi (ustep
)
2997 || !cst_and_fits_in_hwi (cstep
))
3000 if (TREE_CODE (ubase
) == INTEGER_CST
3001 && !cst_and_fits_in_hwi (ubase
))
3004 if (TREE_CODE (cbase
) == INTEGER_CST
3005 && !cst_and_fits_in_hwi (cbase
))
3008 ustepi
= int_cst_value (ustep
);
3009 cstepi
= int_cst_value (cstep
);
3011 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3013 /* TODO -- add direct handling of this case. */
3017 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3020 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3021 or ratio == 1, it is better to handle this like
3023 ubase - ratio * cbase + ratio * var
3025 (also holds in the case ratio == -1, TODO. */
3027 if (TREE_CODE (cbase
) == INTEGER_CST
)
3029 offset
= - ratio
* int_cst_value (cbase
);
3030 cost
+= difference_cost (data
,
3031 ubase
, integer_zero_node
,
3032 &symbol_present
, &var_present
, &offset
,
3035 else if (ratio
== 1)
3037 cost
+= difference_cost (data
,
3039 &symbol_present
, &var_present
, &offset
,
3044 cost
+= force_var_cost (data
, cbase
, depends_on
);
3045 cost
+= add_cost (TYPE_MODE (ctype
));
3046 cost
+= difference_cost (data
,
3047 ubase
, integer_zero_node
,
3048 &symbol_present
, &var_present
, &offset
,
3052 /* If we are after the increment, the value of the candidate is higher by
3054 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3055 offset
-= ratio
* cstepi
;
3057 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3058 (symbol/var/const parts may be omitted). If we are looking for an address,
3059 find the cost of addressing this. */
3061 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3063 /* Otherwise estimate the costs for computing the expression. */
3064 aratio
= ratio
> 0 ? ratio
: -ratio
;
3065 if (!symbol_present
&& !var_present
&& !offset
)
3068 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3074 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3078 /* Symbol + offset should be compile-time computable. */
3079 && (symbol_present
|| offset
))
3082 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3086 /* Just get the expression, expand it and measure the cost. */
3087 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3093 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3095 return computation_cost (comp
);
3099 /* Determines the cost of the computation by that USE is expressed
3100 from induction variable CAND. If ADDRESS_P is true, we just need
3101 to create an address from it, otherwise we want to get it into
3102 register. A set of invariants we depend on is stored in
3106 get_computation_cost (struct ivopts_data
*data
,
3107 struct iv_use
*use
, struct iv_cand
*cand
,
3108 bool address_p
, bitmap
*depends_on
)
3110 return get_computation_cost_at (data
,
3111 use
, cand
, address_p
, depends_on
, use
->stmt
);
3114 /* Determines cost of basing replacement of USE on CAND in a generic
3118 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3119 struct iv_use
*use
, struct iv_cand
*cand
)
3124 /* The simple case first -- if we need to express value of the preserved
3125 original biv, the cost is 0. This also prevents us from counting the
3126 cost of increment twice -- once at this use and once in the cost of
3128 if (cand
->pos
== IP_ORIGINAL
3129 && cand
->incremented_at
== use
->stmt
)
3131 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3135 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3136 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3138 return cost
!= INFTY
;
3141 /* Determines cost of basing replacement of USE on CAND in an address. */
3144 determine_use_iv_cost_address (struct ivopts_data
*data
,
3145 struct iv_use
*use
, struct iv_cand
*cand
)
3148 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3150 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3152 return cost
!= INFTY
;
3155 /* Computes value of induction variable IV in iteration NITER. */
3158 iv_value (struct iv
*iv
, tree niter
)
3161 tree type
= TREE_TYPE (iv
->base
);
3163 niter
= fold_convert (type
, niter
);
3164 val
= fold (build2 (MULT_EXPR
, type
, iv
->step
, niter
));
3166 return fold (build2 (PLUS_EXPR
, type
, iv
->base
, val
));
3169 /* Computes value of candidate CAND at position AT in iteration NITER. */
3172 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3174 tree val
= iv_value (cand
->iv
, niter
);
3175 tree type
= TREE_TYPE (cand
->iv
->base
);
3177 if (stmt_after_increment (loop
, cand
, at
))
3178 val
= fold (build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
));
3183 /* Check whether it is possible to express the condition in USE by comparison
3184 of candidate CAND. If so, store the comparison code to COMPARE and the
3185 value compared with to BOUND. */
3188 may_eliminate_iv (struct loop
*loop
,
3189 struct iv_use
*use
, struct iv_cand
*cand
,
3190 enum tree_code
*compare
, tree
*bound
)
3194 struct tree_niter_desc niter
, new_niter
;
3195 tree wider_type
, type
, base
;
3197 /* For now works only for exits that dominate the loop latch. TODO -- extend
3198 for other conditions inside loop body. */
3199 ex_bb
= bb_for_stmt (use
->stmt
);
3200 if (use
->stmt
!= last_stmt (ex_bb
)
3201 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3203 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3206 exit
= EDGE_SUCC (ex_bb
, 0);
3207 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3208 exit
= EDGE_SUCC (ex_bb
, 1);
3209 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3212 niter
.niter
= NULL_TREE
;
3213 number_of_iterations_exit (loop
, exit
, &niter
);
3215 || !integer_nonzerop (niter
.assumptions
)
3216 || !integer_zerop (niter
.may_be_zero
))
3219 if (exit
->flags
& EDGE_TRUE_VALUE
)
3224 *bound
= cand_value_at (loop
, cand
, use
->stmt
, niter
.niter
);
3226 /* Let us check there is not some problem with overflows, by checking that
3227 the number of iterations is unchanged. */
3228 base
= cand
->iv
->base
;
3229 type
= TREE_TYPE (base
);
3230 if (stmt_after_increment (loop
, cand
, use
->stmt
))
3231 base
= fold (build2 (PLUS_EXPR
, type
, base
, cand
->iv
->step
));
3233 new_niter
.niter
= NULL_TREE
;
3234 number_of_iterations_cond (TREE_TYPE (cand
->iv
->base
), base
,
3235 cand
->iv
->step
, NE_EXPR
, *bound
, NULL_TREE
,
3237 if (!new_niter
.niter
3238 || !integer_nonzerop (new_niter
.assumptions
)
3239 || !integer_zerop (new_niter
.may_be_zero
))
3242 wider_type
= TREE_TYPE (new_niter
.niter
);
3243 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (TREE_TYPE (niter
.niter
)))
3244 wider_type
= TREE_TYPE (niter
.niter
);
3245 if (!operand_equal_p (fold_convert (wider_type
, niter
.niter
),
3246 fold_convert (wider_type
, new_niter
.niter
), 0))
3252 /* Determines cost of basing replacement of USE on CAND in a condition. */
3255 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3256 struct iv_use
*use
, struct iv_cand
*cand
)
3259 enum tree_code compare
;
3261 /* Only consider real candidates. */
3264 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3268 if (may_eliminate_iv (data
->current_loop
, use
, cand
, &compare
, &bound
))
3270 bitmap depends_on
= NULL
;
3271 unsigned cost
= force_var_cost (data
, bound
, &depends_on
);
3273 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3274 return cost
!= INFTY
;
3277 /* The induction variable elimination failed; just express the original
3278 giv. If it is compared with an invariant, note that we cannot get
3280 if (TREE_CODE (*use
->op_p
) == SSA_NAME
)
3281 record_invariant (data
, *use
->op_p
, true);
3284 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 0), true);
3285 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 1), true);
3288 return determine_use_iv_cost_generic (data
, use
, cand
);
3291 /* Checks whether it is possible to replace the final value of USE by
3292 a direct computation. If so, the formula is stored to *VALUE. */
3295 may_replace_final_value (struct loop
*loop
, struct iv_use
*use
, tree
*value
)
3298 struct tree_niter_desc
*niter
;
3300 exit
= single_dom_exit (loop
);
3304 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
3305 bb_for_stmt (use
->stmt
)));
3307 niter
= &loop_data (loop
)->niter
;
3309 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
3310 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
3313 *value
= iv_value (use
->iv
, niter
->niter
);
3318 /* Determines cost of replacing final value of USE using CAND. */
3321 determine_use_iv_cost_outer (struct ivopts_data
*data
,
3322 struct iv_use
*use
, struct iv_cand
*cand
)
3328 struct loop
*loop
= data
->current_loop
;
3330 /* The simple case first -- if we need to express value of the preserved
3331 original biv, the cost is 0. This also prevents us from counting the
3332 cost of increment twice -- once at this use and once in the cost of
3334 if (cand
->pos
== IP_ORIGINAL
3335 && cand
->incremented_at
== use
->stmt
)
3337 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3343 if (!may_replace_final_value (loop
, use
, &value
))
3345 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3350 cost
= force_var_cost (data
, value
, &depends_on
);
3352 cost
/= AVG_LOOP_NITER (loop
);
3354 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3355 return cost
!= INFTY
;
3358 exit
= single_dom_exit (loop
);
3361 /* If there is just a single exit, we may use value of the candidate
3362 after we take it to determine the value of use. */
3363 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
3364 last_stmt (exit
->src
));
3366 cost
/= AVG_LOOP_NITER (loop
);
3370 /* Otherwise we just need to compute the iv. */
3371 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3374 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3376 return cost
!= INFTY
;
3379 /* Determines cost of basing replacement of USE on CAND. Returns false
3380 if USE cannot be based on CAND. */
3383 determine_use_iv_cost (struct ivopts_data
*data
,
3384 struct iv_use
*use
, struct iv_cand
*cand
)
3388 case USE_NONLINEAR_EXPR
:
3389 return determine_use_iv_cost_generic (data
, use
, cand
);
3392 return determine_use_iv_cost_outer (data
, use
, cand
);
3395 return determine_use_iv_cost_address (data
, use
, cand
);
3398 return determine_use_iv_cost_condition (data
, use
, cand
);
3405 /* Determines costs of basing the use of the iv on an iv candidate. */
3408 determine_use_iv_costs (struct ivopts_data
*data
)
3412 struct iv_cand
*cand
;
3413 bitmap to_clear
= BITMAP_XMALLOC ();
3415 alloc_use_cost_map (data
);
3417 for (i
= 0; i
< n_iv_uses (data
); i
++)
3419 use
= iv_use (data
, i
);
3421 if (data
->consider_all_candidates
)
3423 for (j
= 0; j
< n_iv_cands (data
); j
++)
3425 cand
= iv_cand (data
, j
);
3426 determine_use_iv_cost (data
, use
, cand
);
3433 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3435 cand
= iv_cand (data
, j
);
3436 if (!determine_use_iv_cost (data
, use
, cand
))
3437 bitmap_set_bit (to_clear
, j
);
3440 /* Remove the candidates for that the cost is infinite from
3441 the list of related candidates. */
3442 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3443 bitmap_clear (to_clear
);
3447 BITMAP_XFREE (to_clear
);
3449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3451 fprintf (dump_file
, "Use-candidate costs:\n");
3453 for (i
= 0; i
< n_iv_uses (data
); i
++)
3455 use
= iv_use (data
, i
);
3457 fprintf (dump_file
, "Use %d:\n", i
);
3458 fprintf (dump_file
, " cand\tcost\tdepends on\n");
3459 for (j
= 0; j
< use
->n_map_members
; j
++)
3461 if (!use
->cost_map
[j
].cand
3462 || use
->cost_map
[j
].cost
== INFTY
)
3465 fprintf (dump_file
, " %d\t%d\t",
3466 use
->cost_map
[j
].cand
->id
,
3467 use
->cost_map
[j
].cost
);
3468 if (use
->cost_map
[j
].depends_on
)
3469 bitmap_print (dump_file
,
3470 use
->cost_map
[j
].depends_on
, "","");
3471 fprintf (dump_file
, "\n");
3474 fprintf (dump_file
, "\n");
3476 fprintf (dump_file
, "\n");
3480 /* Determines cost of the candidate CAND. */
3483 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
3485 unsigned cost_base
, cost_step
;
3495 /* There are two costs associated with the candidate -- its increment
3496 and its initialization. The second is almost negligible for any loop
3497 that rolls enough, so we take it just very little into account. */
3499 base
= cand
->iv
->base
;
3500 cost_base
= force_var_cost (data
, base
, NULL
);
3501 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
3503 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
3505 /* Prefer the original iv unless we may gain something by replacing it. */
3506 if (cand
->pos
== IP_ORIGINAL
)
3509 /* Prefer not to insert statements into latch unless there are some
3510 already (so that we do not create unnecessary jumps). */
3511 if (cand
->pos
== IP_END
)
3513 bb
= ip_end_pos (data
->current_loop
);
3514 last
= last_stmt (bb
);
3517 || TREE_CODE (last
) == LABEL_EXPR
)
3522 /* Determines costs of computation of the candidates. */
3525 determine_iv_costs (struct ivopts_data
*data
)
3529 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3531 fprintf (dump_file
, "Candidate costs:\n");
3532 fprintf (dump_file
, " cand\tcost\n");
3535 for (i
= 0; i
< n_iv_cands (data
); i
++)
3537 struct iv_cand
*cand
= iv_cand (data
, i
);
3539 determine_iv_cost (data
, cand
);
3541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3546 fprintf (dump_file
, "\n");
3549 /* Calculates cost for having SIZE induction variables. */
3552 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
3554 return global_cost_for_size (size
,
3555 loop_data (data
->current_loop
)->regs_used
,
3559 /* For each size of the induction variable set determine the penalty. */
3562 determine_set_costs (struct ivopts_data
*data
)
3566 struct loop
*loop
= data
->current_loop
;
3569 /* We use the following model (definitely improvable, especially the
3570 cost function -- TODO):
3572 We estimate the number of registers available (using MD data), name it A.
3574 We estimate the number of registers used by the loop, name it U. This
3575 number is obtained as the number of loop phi nodes (not counting virtual
3576 registers and bivs) + the number of variables from outside of the loop.
3578 We set a reserve R (free regs that are used for temporary computations,
3579 etc.). For now the reserve is a constant 3.
3581 Let I be the number of induction variables.
3583 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3584 make a lot of ivs without a reason).
3585 -- if A - R < U + I <= A, the cost is I * PRES_COST
3586 -- if U + I > A, the cost is I * PRES_COST and
3587 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3591 fprintf (dump_file
, "Global costs:\n");
3592 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
3593 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
3594 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
3595 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
3599 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
3601 op
= PHI_RESULT (phi
);
3603 if (!is_gimple_reg (op
))
3606 if (get_iv (data
, op
))
3612 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
3614 struct version_info
*info
= ver_info (data
, j
);
3616 if (info
->inv_id
&& info
->has_nonlin_use
)
3620 loop_data (loop
)->regs_used
= n
;
3621 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3622 fprintf (dump_file
, " regs_used %d\n", n
);
3624 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3626 fprintf (dump_file
, " cost for size:\n");
3627 fprintf (dump_file
, " ivs\tcost\n");
3628 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
3629 fprintf (dump_file
, " %d\t%d\n", j
,
3630 ivopts_global_cost_for_size (data
, j
));
3631 fprintf (dump_file
, "\n");
3635 /* Returns true if A is a cheaper cost pair than B. */
3638 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
3646 if (a
->cost
< b
->cost
)
3649 if (a
->cost
> b
->cost
)
3652 /* In case the costs are the same, prefer the cheaper candidate. */
3653 if (a
->cand
->cost
< b
->cand
->cost
)
3659 /* Computes the cost field of IVS structure. */
3662 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
3666 cost
+= ivs
->cand_use_cost
;
3667 cost
+= ivs
->cand_cost
;
3668 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
3673 /* Set USE not to be expressed by any candidate in IVS. */
3676 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3679 unsigned uid
= use
->id
, cid
, iid
;
3681 struct cost_pair
*cp
;
3684 cp
= ivs
->cand_for_use
[uid
];
3690 ivs
->cand_for_use
[uid
] = NULL
;
3691 ivs
->n_cand_uses
[cid
]--;
3693 if (ivs
->n_cand_uses
[cid
] == 0)
3695 bitmap_clear_bit (ivs
->cands
, cid
);
3696 /* Do not count the pseudocandidates. */
3700 ivs
->cand_cost
-= cp
->cand
->cost
;
3703 ivs
->cand_use_cost
-= cp
->cost
;
3705 deps
= cp
->depends_on
;
3709 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3711 ivs
->n_invariant_uses
[iid
]--;
3712 if (ivs
->n_invariant_uses
[iid
] == 0)
3717 iv_ca_recount_cost (data
, ivs
);
3720 /* Set cost pair for USE in set IVS to CP. */
3723 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3724 struct iv_use
*use
, struct cost_pair
*cp
)
3726 unsigned uid
= use
->id
, cid
, iid
;
3730 if (ivs
->cand_for_use
[uid
] == cp
)
3733 if (ivs
->cand_for_use
[uid
])
3734 iv_ca_set_no_cp (data
, ivs
, use
);
3741 ivs
->cand_for_use
[uid
] = cp
;
3742 ivs
->n_cand_uses
[cid
]++;
3743 if (ivs
->n_cand_uses
[cid
] == 1)
3745 bitmap_set_bit (ivs
->cands
, cid
);
3746 /* Do not count the pseudocandidates. */
3750 ivs
->cand_cost
+= cp
->cand
->cost
;
3753 ivs
->cand_use_cost
+= cp
->cost
;
3755 deps
= cp
->depends_on
;
3759 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3761 ivs
->n_invariant_uses
[iid
]++;
3762 if (ivs
->n_invariant_uses
[iid
] == 1)
3767 iv_ca_recount_cost (data
, ivs
);
3771 /* Extend set IVS by expressing USE by some of the candidates in it
3775 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3778 struct cost_pair
*best_cp
= NULL
, *cp
;
3782 gcc_assert (ivs
->upto
>= use
->id
);
3784 if (ivs
->upto
== use
->id
)
3790 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
3792 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
3794 if (cheaper_cost_pair (cp
, best_cp
))
3798 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
3801 /* Get cost for assignment IVS. */
3804 iv_ca_cost (struct iv_ca
*ivs
)
3806 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
3809 /* Returns true if all dependences of CP are among invariants in IVS. */
3812 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
3817 if (!cp
->depends_on
)
3820 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
3822 if (ivs
->n_invariant_uses
[i
] == 0)
3829 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3830 it before NEXT_CHANGE. */
3832 static struct iv_ca_delta
*
3833 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
3834 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
3836 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
3839 change
->old_cp
= old_cp
;
3840 change
->new_cp
= new_cp
;
3841 change
->next_change
= next_change
;
3846 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3849 static struct iv_ca_delta
*
3850 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
3852 struct iv_ca_delta
*last
;
3860 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
3862 last
->next_change
= l2
;
3867 /* Returns candidate by that USE is expressed in IVS. */
3869 static struct cost_pair
*
3870 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
3872 return ivs
->cand_for_use
[use
->id
];
3875 /* Reverse the list of changes DELTA, forming the inverse to it. */
3877 static struct iv_ca_delta
*
3878 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
3880 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
3881 struct cost_pair
*tmp
;
3883 for (act
= delta
; act
; act
= next
)
3885 next
= act
->next_change
;
3886 act
->next_change
= prev
;
3890 act
->old_cp
= act
->new_cp
;
3897 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3898 reverted instead. */
3901 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3902 struct iv_ca_delta
*delta
, bool forward
)
3904 struct cost_pair
*from
, *to
;
3905 struct iv_ca_delta
*act
;
3908 delta
= iv_ca_delta_reverse (delta
);
3910 for (act
= delta
; act
; act
= act
->next_change
)
3914 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
3915 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
3919 iv_ca_delta_reverse (delta
);
3922 /* Returns true if CAND is used in IVS. */
3925 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
3927 return ivs
->n_cand_uses
[cand
->id
] > 0;
3930 /* Returns number of induction variable candidates in the set IVS. */
3933 iv_ca_n_cands (struct iv_ca
*ivs
)
3935 return ivs
->n_cands
;
3938 /* Free the list of changes DELTA. */
3941 iv_ca_delta_free (struct iv_ca_delta
**delta
)
3943 struct iv_ca_delta
*act
, *next
;
3945 for (act
= *delta
; act
; act
= next
)
3947 next
= act
->next_change
;
3954 /* Allocates new iv candidates assignment. */
3956 static struct iv_ca
*
3957 iv_ca_new (struct ivopts_data
*data
)
3959 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
3963 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
3964 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
3965 nw
->cands
= BITMAP_XMALLOC ();
3968 nw
->cand_use_cost
= 0;
3970 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
3976 /* Free memory occupied by the set IVS. */
3979 iv_ca_free (struct iv_ca
**ivs
)
3981 free ((*ivs
)->cand_for_use
);
3982 free ((*ivs
)->n_cand_uses
);
3983 BITMAP_XFREE ((*ivs
)->cands
);
3984 free ((*ivs
)->n_invariant_uses
);
3989 /* Dumps IVS to FILE. */
3992 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
3994 const char *pref
= " invariants ";
3997 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
3998 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4000 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4001 if (ivs
->n_invariant_uses
[i
])
4003 fprintf (file
, "%s%d", pref
, i
);
4006 fprintf (file
, "\n");
4009 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4010 new set, and store differences in DELTA. Number of induction variables
4011 in the new set is stored to N_IVS. */
4014 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4015 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4020 struct cost_pair
*old_cp
, *new_cp
;
4023 for (i
= 0; i
< ivs
->upto
; i
++)
4025 use
= iv_use (data
, i
);
4026 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4029 && old_cp
->cand
== cand
)
4032 new_cp
= get_use_iv_cost (data
, use
, cand
);
4036 if (!iv_ca_has_deps (ivs
, new_cp
))
4039 if (!cheaper_cost_pair (new_cp
, old_cp
))
4042 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4045 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4046 cost
= iv_ca_cost (ivs
);
4048 *n_ivs
= iv_ca_n_cands (ivs
);
4049 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4054 /* Try narrowing set IVS by removing CAND. Return the cost of
4055 the new set and store the differences in DELTA. */
4058 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4059 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4063 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4065 struct iv_cand
*cnd
;
4069 for (i
= 0; i
< n_iv_uses (data
); i
++)
4071 use
= iv_use (data
, i
);
4073 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4074 if (old_cp
->cand
!= cand
)
4079 if (data
->consider_all_candidates
)
4081 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4086 cnd
= iv_cand (data
, ci
);
4088 cp
= get_use_iv_cost (data
, use
, cnd
);
4091 if (!iv_ca_has_deps (ivs
, cp
))
4094 if (!cheaper_cost_pair (cp
, new_cp
))
4102 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4107 cnd
= iv_cand (data
, ci
);
4109 cp
= get_use_iv_cost (data
, use
, cnd
);
4112 if (!iv_ca_has_deps (ivs
, cp
))
4115 if (!cheaper_cost_pair (cp
, new_cp
))
4124 iv_ca_delta_free (delta
);
4128 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4131 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4132 cost
= iv_ca_cost (ivs
);
4133 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4138 /* Try optimizing the set of candidates IVS by removing candidates different
4139 from to EXCEPT_CAND from it. Return cost of the new set, and store
4140 differences in DELTA. */
4143 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4144 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4147 struct iv_ca_delta
*act_delta
, *best_delta
;
4148 unsigned i
, best_cost
, acost
;
4149 struct iv_cand
*cand
;
4152 best_cost
= iv_ca_cost (ivs
);
4154 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4156 cand
= iv_cand (data
, i
);
4158 if (cand
== except_cand
)
4161 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4163 if (acost
< best_cost
)
4166 iv_ca_delta_free (&best_delta
);
4167 best_delta
= act_delta
;
4170 iv_ca_delta_free (&act_delta
);
4179 /* Recurse to possibly remove other unnecessary ivs. */
4180 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4181 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4182 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4183 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4187 /* Tries to extend the sets IVS in the best possible way in order
4188 to express the USE. */
4191 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4194 unsigned best_cost
, act_cost
;
4197 struct iv_cand
*cand
;
4198 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4199 struct cost_pair
*cp
;
4201 iv_ca_add_use (data
, ivs
, use
);
4202 best_cost
= iv_ca_cost (ivs
);
4204 cp
= iv_ca_cand_for_use (ivs
, use
);
4207 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4208 iv_ca_set_no_cp (data
, ivs
, use
);
4211 /* First try important candidates. Only if it fails, try the specific ones.
4212 Rationale -- in loops with many variables the best choice often is to use
4213 just one generic biv. If we added here many ivs specific to the uses,
4214 the optimization algorithm later would be likely to get stuck in a local
4215 minimum, thus causing us to create too many ivs. The approach from
4216 few ivs to more seems more likely to be successful -- starting from few
4217 ivs, replacing an expensive use by a specific iv should always be a
4219 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4221 cand
= iv_cand (data
, i
);
4223 if (iv_ca_cand_used_p (ivs
, cand
))
4226 cp
= get_use_iv_cost (data
, use
, cand
);
4230 iv_ca_set_cp (data
, ivs
, use
, cp
);
4231 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4232 iv_ca_set_no_cp (data
, ivs
, use
);
4233 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4235 if (act_cost
< best_cost
)
4237 best_cost
= act_cost
;
4239 iv_ca_delta_free (&best_delta
);
4240 best_delta
= act_delta
;
4243 iv_ca_delta_free (&act_delta
);
4246 if (best_cost
== INFTY
)
4248 for (i
= 0; i
< use
->n_map_members
; i
++)
4250 cp
= use
->cost_map
+ i
;
4255 /* Already tried this. */
4256 if (cand
->important
)
4259 if (iv_ca_cand_used_p (ivs
, cand
))
4263 iv_ca_set_cp (data
, ivs
, use
, cp
);
4264 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4265 iv_ca_set_no_cp (data
, ivs
, use
);
4266 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4269 if (act_cost
< best_cost
)
4271 best_cost
= act_cost
;
4274 iv_ca_delta_free (&best_delta
);
4275 best_delta
= act_delta
;
4278 iv_ca_delta_free (&act_delta
);
4282 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4283 iv_ca_delta_free (&best_delta
);
4285 return (best_cost
!= INFTY
);
4288 /* Finds an initial assignment of candidates to uses. */
4290 static struct iv_ca
*
4291 get_initial_solution (struct ivopts_data
*data
)
4293 struct iv_ca
*ivs
= iv_ca_new (data
);
4296 for (i
= 0; i
< n_iv_uses (data
); i
++)
4297 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4306 /* Tries to improve set of induction variables IVS. */
4309 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4311 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4312 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4313 struct iv_cand
*cand
;
4315 /* Try extending the set of induction variables by one. */
4316 for (i
= 0; i
< n_iv_cands (data
); i
++)
4318 cand
= iv_cand (data
, i
);
4320 if (iv_ca_cand_used_p (ivs
, cand
))
4323 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4327 /* If we successfully added the candidate and the set is small enough,
4328 try optimizing it by removing other candidates. */
4329 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4331 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4332 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4333 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4334 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4337 if (acost
< best_cost
)
4340 iv_ca_delta_free (&best_delta
);
4341 best_delta
= act_delta
;
4344 iv_ca_delta_free (&act_delta
);
4349 /* Try removing the candidates from the set instead. */
4350 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4352 /* Nothing more we can do. */
4357 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4358 gcc_assert (best_cost
== iv_ca_cost (ivs
));
4359 iv_ca_delta_free (&best_delta
);
4363 /* Attempts to find the optimal set of induction variables. We do simple
4364 greedy heuristic -- we try to replace at most one candidate in the selected
4365 solution and remove the unused ivs while this improves the cost. */
4367 static struct iv_ca
*
4368 find_optimal_iv_set (struct ivopts_data
*data
)
4374 /* Get the initial solution. */
4375 set
= get_initial_solution (data
);
4378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4379 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4383 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4385 fprintf (dump_file
, "Initial set of candidates:\n");
4386 iv_ca_dump (data
, dump_file
, set
);
4389 while (try_improve_iv_set (data
, set
))
4391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4393 fprintf (dump_file
, "Improved to:\n");
4394 iv_ca_dump (data
, dump_file
, set
);
4398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4399 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
4401 for (i
= 0; i
< n_iv_uses (data
); i
++)
4403 use
= iv_use (data
, i
);
4404 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4410 /* Creates a new induction variable corresponding to CAND. */
4413 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4415 block_stmt_iterator incr_pos
;
4425 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
4429 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
4434 /* Mark that the iv is preserved. */
4435 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4436 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4438 /* Rewrite the increment so that it uses var_before directly. */
4439 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
4444 gimple_add_tmp_var (cand
->var_before
);
4445 add_referenced_tmp_var (cand
->var_before
);
4447 base
= unshare_expr (cand
->iv
->base
);
4449 create_iv (base
, cand
->iv
->step
, cand
->var_before
, data
->current_loop
,
4450 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
4453 /* Creates new induction variables described in SET. */
4456 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
4459 struct iv_cand
*cand
;
4462 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
4464 cand
= iv_cand (data
, i
);
4465 create_new_iv (data
, cand
);
4469 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4470 is true, remove also the ssa name defined by the statement. */
4473 remove_statement (tree stmt
, bool including_defined_name
)
4475 if (TREE_CODE (stmt
) == PHI_NODE
)
4477 if (!including_defined_name
)
4479 /* Prevent the ssa name defined by the statement from being removed. */
4480 SET_PHI_RESULT (stmt
, NULL
);
4482 remove_phi_node (stmt
, NULL_TREE
, bb_for_stmt (stmt
));
4486 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
4492 /* Rewrites USE (definition of iv used in a nonlinear expression)
4493 using candidate CAND. */
4496 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
4497 struct iv_use
*use
, struct iv_cand
*cand
)
4499 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4501 tree op
, stmts
, tgt
, ass
;
4502 block_stmt_iterator bsi
, pbsi
;
4504 switch (TREE_CODE (use
->stmt
))
4507 tgt
= PHI_RESULT (use
->stmt
);
4509 /* If we should keep the biv, do not replace it. */
4510 if (name_info (data
, tgt
)->preserve_biv
)
4513 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
4514 while (!bsi_end_p (pbsi
)
4515 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
4523 tgt
= TREE_OPERAND (use
->stmt
, 0);
4524 bsi
= bsi_for_stmt (use
->stmt
);
4531 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
4533 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
4536 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4537 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
4538 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
4539 remove_statement (use
->stmt
, false);
4540 SSA_NAME_DEF_STMT (tgt
) = ass
;
4545 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4546 TREE_OPERAND (use
->stmt
, 1) = op
;
4550 /* Replaces ssa name in index IDX by its basic variable. Callback for
4554 idx_remove_ssa_names (tree base
, tree
*idx
,
4555 void *data ATTRIBUTE_UNUSED
)
4559 if (TREE_CODE (*idx
) == SSA_NAME
)
4560 *idx
= SSA_NAME_VAR (*idx
);
4562 if (TREE_CODE (base
) == ARRAY_REF
)
4564 op
= &TREE_OPERAND (base
, 2);
4566 && TREE_CODE (*op
) == SSA_NAME
)
4567 *op
= SSA_NAME_VAR (*op
);
4568 op
= &TREE_OPERAND (base
, 3);
4570 && TREE_CODE (*op
) == SSA_NAME
)
4571 *op
= SSA_NAME_VAR (*op
);
4577 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4580 unshare_and_remove_ssa_names (tree ref
)
4582 ref
= unshare_expr (ref
);
4583 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
4588 /* Rewrites base of memory access OP with expression WITH in statement
4589 pointed to by BSI. */
4592 rewrite_address_base (block_stmt_iterator
*bsi
, tree
*op
, tree with
)
4594 tree bvar
, var
, new_var
, new_name
, copy
, name
;
4597 var
= bvar
= get_base_address (*op
);
4599 if (!var
|| TREE_CODE (with
) != SSA_NAME
)
4602 gcc_assert (TREE_CODE (var
) != ALIGN_INDIRECT_REF
);
4603 gcc_assert (TREE_CODE (var
) != MISALIGNED_INDIRECT_REF
);
4604 if (TREE_CODE (var
) == INDIRECT_REF
)
4605 var
= TREE_OPERAND (var
, 0);
4606 if (TREE_CODE (var
) == SSA_NAME
)
4609 var
= SSA_NAME_VAR (var
);
4611 else if (DECL_P (var
))
4616 if (var_ann (var
)->type_mem_tag
)
4617 var
= var_ann (var
)->type_mem_tag
;
4619 /* We need to add a memory tag for the variable. But we do not want
4620 to add it to the temporary used for the computations, since this leads
4621 to problems in redundancy elimination when there are common parts
4622 in two computations referring to the different arrays. So we copy
4623 the variable to a new temporary. */
4624 copy
= build2 (MODIFY_EXPR
, void_type_node
, NULL_TREE
, with
);
4626 new_name
= duplicate_ssa_name (name
, copy
);
4629 new_var
= create_tmp_var (TREE_TYPE (with
), "ruatmp");
4630 add_referenced_tmp_var (new_var
);
4631 var_ann (new_var
)->type_mem_tag
= var
;
4632 new_name
= make_ssa_name (new_var
, copy
);
4634 TREE_OPERAND (copy
, 0) = new_name
;
4635 bsi_insert_before (bsi
, copy
, BSI_SAME_STMT
);
4641 gcc_assert (TREE_CODE (*op
) != ALIGN_INDIRECT_REF
);
4642 gcc_assert (TREE_CODE (*op
) != MISALIGNED_INDIRECT_REF
);
4644 if (TREE_CODE (*op
) == INDIRECT_REF
)
4645 orig
= REF_ORIGINAL (*op
);
4647 orig
= unshare_and_remove_ssa_names (*op
);
4649 *op
= build1 (INDIRECT_REF
, TREE_TYPE (*op
), with
);
4651 /* Record the original reference, for purposes of alias analysis. */
4652 REF_ORIGINAL (*op
) = orig
;
4655 /* Rewrites USE (address that is an iv) using candidate CAND. */
4658 rewrite_use_address (struct ivopts_data
*data
,
4659 struct iv_use
*use
, struct iv_cand
*cand
)
4661 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4663 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4665 tree op
= force_gimple_operand (comp
, &stmts
, true, NULL_TREE
);
4668 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4670 rewrite_address_base (&bsi
, use
->op_p
, op
);
4673 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4677 rewrite_use_compare (struct ivopts_data
*data
,
4678 struct iv_use
*use
, struct iv_cand
*cand
)
4681 tree
*op_p
, cond
, op
, stmts
, bound
;
4682 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4683 enum tree_code compare
;
4685 if (may_eliminate_iv (data
->current_loop
,
4686 use
, cand
, &compare
, &bound
))
4688 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
4692 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4694 *use
->op_p
= build2 (compare
, boolean_type_node
,
4695 var_at_stmt (data
->current_loop
,
4696 cand
, use
->stmt
), op
);
4697 modify_stmt (use
->stmt
);
4701 /* The induction variable elimination failed; just express the original
4703 comp
= unshare_expr (get_computation (data
->current_loop
, use
, cand
));
4706 op_p
= &TREE_OPERAND (cond
, 0);
4707 if (TREE_CODE (*op_p
) != SSA_NAME
4708 || zero_p (get_iv (data
, *op_p
)->step
))
4709 op_p
= &TREE_OPERAND (cond
, 1);
4711 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
4713 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4718 /* Ensure that operand *OP_P may be used at the end of EXIT without
4719 violating loop closed ssa form. */
4722 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
4725 struct loop
*def_loop
;
4728 use
= USE_FROM_PTR (op_p
);
4729 if (TREE_CODE (use
) != SSA_NAME
)
4732 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
4736 def_loop
= def_bb
->loop_father
;
4737 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
4740 /* Try finding a phi node that copies the value out of the loop. */
4741 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4742 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
4747 /* Create such a phi node. */
4748 tree new_name
= duplicate_ssa_name (use
, NULL
);
4750 phi
= create_phi_node (new_name
, exit
->dest
);
4751 SSA_NAME_DEF_STMT (new_name
) = phi
;
4752 add_phi_arg (phi
, use
, exit
);
4755 SET_USE (op_p
, PHI_RESULT (phi
));
4758 /* Ensure that operands of STMT may be used at the end of EXIT without
4759 violating loop closed ssa form. */
4762 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
4766 v_may_def_optype v_may_defs
;
4769 get_stmt_operands (stmt
);
4771 uses
= STMT_USE_OPS (stmt
);
4772 for (i
= 0; i
< NUM_USES (uses
); i
++)
4773 protect_loop_closed_ssa_form_use (exit
, USE_OP_PTR (uses
, i
));
4775 vuses
= STMT_VUSE_OPS (stmt
);
4776 for (i
= 0; i
< NUM_VUSES (vuses
); i
++)
4777 protect_loop_closed_ssa_form_use (exit
, VUSE_OP_PTR (vuses
, i
));
4779 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
4780 for (i
= 0; i
< NUM_V_MAY_DEFS (v_may_defs
); i
++)
4781 protect_loop_closed_ssa_form_use (exit
, V_MAY_DEF_OP_PTR (v_may_defs
, i
));
4784 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4785 so that they are emitted on the correct place, and so that the loop closed
4786 ssa form is preserved. */
4789 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
4791 tree_stmt_iterator tsi
;
4792 block_stmt_iterator bsi
;
4793 tree phi
, stmt
, def
, next
;
4795 if (EDGE_COUNT (exit
->dest
->preds
) > 1)
4796 split_loop_exit_edge (exit
);
4798 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
4800 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
4801 protect_loop_closed_ssa_form (exit
, tsi_stmt (tsi
));
4804 protect_loop_closed_ssa_form (exit
, stmts
);
4806 /* Ensure there is label in exit->dest, so that we can
4808 tree_block_label (exit
->dest
);
4809 bsi
= bsi_after_labels (exit
->dest
);
4810 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4815 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
4817 next
= PHI_CHAIN (phi
);
4819 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
4821 def
= PHI_RESULT (phi
);
4822 remove_statement (phi
, false);
4823 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
4825 SSA_NAME_DEF_STMT (def
) = stmt
;
4826 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
4831 /* Rewrites the final value of USE (that is only needed outside of the loop)
4832 using candidate CAND. */
4835 rewrite_use_outer (struct ivopts_data
*data
,
4836 struct iv_use
*use
, struct iv_cand
*cand
)
4839 tree value
, op
, stmts
, tgt
;
4842 switch (TREE_CODE (use
->stmt
))
4845 tgt
= PHI_RESULT (use
->stmt
);
4848 tgt
= TREE_OPERAND (use
->stmt
, 0);
4854 exit
= single_dom_exit (data
->current_loop
);
4860 bool ok
= may_replace_final_value (data
->current_loop
, use
, &value
);
4864 value
= get_computation_at (data
->current_loop
,
4865 use
, cand
, last_stmt (exit
->src
));
4867 value
= unshare_expr (value
);
4868 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
4870 /* If we will preserve the iv anyway and we would need to perform
4871 some computation to replace the final value, do nothing. */
4872 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
4875 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4877 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
4879 if (USE_FROM_PTR (use_p
) == tgt
)
4880 SET_USE (use_p
, op
);
4884 compute_phi_arg_on_exit (exit
, stmts
, op
);
4886 /* Enable removal of the statement. We cannot remove it directly,
4887 since we may still need the aliasing information attached to the
4888 ssa name defined by it. */
4889 name_info (data
, tgt
)->iv
->have_use_for
= false;
4893 /* If the variable is going to be preserved anyway, there is nothing to
4895 if (name_info (data
, tgt
)->preserve_biv
)
4898 /* Otherwise we just need to compute the iv. */
4899 rewrite_use_nonlinear_expr (data
, use
, cand
);
4902 /* Rewrites USE using candidate CAND. */
4905 rewrite_use (struct ivopts_data
*data
,
4906 struct iv_use
*use
, struct iv_cand
*cand
)
4910 case USE_NONLINEAR_EXPR
:
4911 rewrite_use_nonlinear_expr (data
, use
, cand
);
4915 rewrite_use_outer (data
, use
, cand
);
4919 rewrite_use_address (data
, use
, cand
);
4923 rewrite_use_compare (data
, use
, cand
);
4929 modify_stmt (use
->stmt
);
4932 /* Rewrite the uses using the selected induction variables. */
4935 rewrite_uses (struct ivopts_data
*data
)
4938 struct iv_cand
*cand
;
4941 for (i
= 0; i
< n_iv_uses (data
); i
++)
4943 use
= iv_use (data
, i
);
4944 cand
= use
->selected
;
4947 rewrite_use (data
, use
, cand
);
4951 /* Removes the ivs that are not used after rewriting. */
4954 remove_unused_ivs (struct ivopts_data
*data
)
4959 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4961 struct version_info
*info
;
4963 info
= ver_info (data
, j
);
4965 && !zero_p (info
->iv
->step
)
4967 && !info
->iv
->have_use_for
4968 && !info
->preserve_biv
)
4969 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
4973 /* Frees data allocated by the optimization of a single loop. */
4976 free_loop_data (struct ivopts_data
*data
)
4981 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
4983 struct version_info
*info
;
4985 info
= ver_info (data
, i
);
4989 info
->has_nonlin_use
= false;
4990 info
->preserve_biv
= false;
4993 bitmap_clear (data
->relevant
);
4994 bitmap_clear (data
->important_candidates
);
4996 for (i
= 0; i
< n_iv_uses (data
); i
++)
4998 struct iv_use
*use
= iv_use (data
, i
);
5001 BITMAP_XFREE (use
->related_cands
);
5002 for (j
= 0; j
< use
->n_map_members
; j
++)
5003 if (use
->cost_map
[j
].depends_on
)
5004 BITMAP_XFREE (use
->cost_map
[j
].depends_on
);
5005 free (use
->cost_map
);
5008 VARRAY_POP_ALL (data
->iv_uses
);
5010 for (i
= 0; i
< n_iv_cands (data
); i
++)
5012 struct iv_cand
*cand
= iv_cand (data
, i
);
5018 VARRAY_POP_ALL (data
->iv_candidates
);
5020 if (data
->version_info_size
< num_ssa_names
)
5022 data
->version_info_size
= 2 * num_ssa_names
;
5023 free (data
->version_info
);
5024 data
->version_info
= xcalloc (data
->version_info_size
,
5025 sizeof (struct version_info
));
5028 data
->max_inv_id
= 0;
5030 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (decl_rtl_to_reset
); i
++)
5032 tree obj
= VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset
, i
);
5034 SET_DECL_RTL (obj
, NULL_RTX
);
5036 VARRAY_POP_ALL (decl_rtl_to_reset
);
5039 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5043 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5047 for (i
= 1; i
< loops
->num
; i
++)
5048 if (loops
->parray
[i
])
5050 free (loops
->parray
[i
]->aux
);
5051 loops
->parray
[i
]->aux
= NULL
;
5054 free_loop_data (data
);
5055 free (data
->version_info
);
5056 BITMAP_XFREE (data
->relevant
);
5057 BITMAP_XFREE (data
->important_candidates
);
5059 VARRAY_FREE (decl_rtl_to_reset
);
5060 VARRAY_FREE (data
->iv_uses
);
5061 VARRAY_FREE (data
->iv_candidates
);
5064 /* Optimizes the LOOP. Returns true if anything changed. */
5067 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5069 bool changed
= false;
5070 struct iv_ca
*iv_ca
;
5073 data
->current_loop
= loop
;
5075 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5077 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5079 exit
= single_dom_exit (loop
);
5082 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5083 exit
->src
->index
, exit
->dest
->index
);
5084 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5085 fprintf (dump_file
, "\n");
5088 fprintf (dump_file
, "\n");
5091 /* For each ssa name determines whether it behaves as an induction variable
5093 if (!find_induction_variables (data
))
5096 /* Finds interesting uses (item 1). */
5097 find_interesting_uses (data
);
5098 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5101 /* Finds candidates for the induction variables (item 2). */
5102 find_iv_candidates (data
);
5104 /* Calculates the costs (item 3, part 1). */
5105 determine_use_iv_costs (data
);
5106 determine_iv_costs (data
);
5107 determine_set_costs (data
);
5109 /* Find the optimal set of induction variables (item 3, part 2). */
5110 iv_ca
= find_optimal_iv_set (data
);
5115 /* Create the new induction variables (item 4, part 1). */
5116 create_new_ivs (data
, iv_ca
);
5117 iv_ca_free (&iv_ca
);
5119 /* Rewrite the uses (item 4, part 2). */
5120 rewrite_uses (data
);
5122 /* Remove the ivs that are unused after rewriting. */
5123 remove_unused_ivs (data
);
5125 loop_commit_inserts ();
5127 /* We have changed the structure of induction variables; it might happen
5128 that definitions in the scev database refer to some of them that were
5133 free_loop_data (data
);
5138 /* Main entry point. Optimizes induction variables in LOOPS. */
5141 tree_ssa_iv_optimize (struct loops
*loops
)
5144 struct ivopts_data data
;
5146 tree_ssa_iv_optimize_init (loops
, &data
);
5148 /* Optimize the loops starting with the innermost ones. */
5149 loop
= loops
->tree_root
;
5153 #ifdef ENABLE_CHECKING
5154 verify_loop_closed_ssa ();
5158 /* Scan the loops, inner ones first. */
5159 while (loop
!= loops
->tree_root
)
5161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5162 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5164 tree_ssa_iv_optimize_loop (&data
, loop
);
5176 #ifdef ENABLE_CHECKING
5177 verify_loop_closed_ssa ();
5181 tree_ssa_iv_optimize_finalize (loops
, &data
);