1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
103 tree base
; /* Initial value of the iv. */
104 tree base_object
; /* A memory object to that the induction variable points. */
105 tree step
; /* Step of the iv (constant only). */
106 tree ssa_name
; /* The ssa name with the value. */
107 bool biv_p
; /* Is it a biv? */
108 bool have_use_for
; /* Do we already have a use for it? */
109 unsigned use_id
; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
115 tree name
; /* The ssa name. */
116 struct iv
*iv
; /* Induction variable description. */
117 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id
; /* Id of an invariant. */
120 bool preserve_biv
; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
126 struct tree_niter_desc niter
;
127 /* Number of iterations. */
129 unsigned regs_used
; /* Number of registers used. */
135 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
136 USE_OUTER
, /* The induction variable is used outside the loop. */
137 USE_ADDRESS
, /* Use in an address. */
138 USE_COMPARE
/* Use is a compare. */
141 /* The candidate - cost pair. */
144 struct iv_cand
*cand
; /* The candidate. */
145 unsigned cost
; /* The cost. */
146 bitmap depends_on
; /* The list of invariants that have to be
153 unsigned id
; /* The id of the use. */
154 enum use_type type
; /* Type of the use. */
155 struct iv
*iv
; /* The induction variable it is based on. */
156 tree stmt
; /* Statement in that it occurs. */
157 tree
*op_p
; /* The place where it occurs. */
158 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
161 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
162 struct cost_pair
*cost_map
;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand
*selected
;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
172 IP_NORMAL
, /* At the end, just before the exit condition. */
173 IP_END
, /* At the end of the latch block. */
174 IP_ORIGINAL
/* The original biv. */
177 /* The induction variable candidate. */
180 unsigned id
; /* The number of the candidate. */
181 bool important
; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos
; /* Where it is computed. */
184 tree incremented_at
; /* For original biv, the statement where it is
186 tree var_before
; /* The variable used for it before increment. */
187 tree var_after
; /* The variable used for it after increment. */
188 struct iv
*iv
; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost
; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
199 /* The currently optimized loop. */
200 struct loop
*current_loop
;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size
;
205 /* The array of information for the ssa names. */
206 struct version_info
*version_info
;
208 /* The bitmap of indices in version_info whose value was changed. */
211 /* The maximum invariant id. */
214 /* The uses of induction variables. */
217 /* The candidates. */
218 varray_type iv_candidates
;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates
;
223 /* Whether to consider just related and important candidates when replacing a
225 bool consider_all_candidates
;
228 /* An assignment of iv candidates to uses. */
232 /* The number of uses covered by the assignment. */
235 /* Number of uses that cannot be expressed by the candidates in the set. */
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair
**cand_for_use
;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses
;
244 /* The candidates used. */
247 /* The number of candidates in the set. */
250 /* Total number of registers needed. */
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost
;
256 /* Total cost of candidates. */
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses
;
262 /* Total cost of the assignment. */
266 /* Difference of two iv candidate assignments. */
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair
*old_cp
;
276 /* A new assignment. */
277 struct cost_pair
*new_cp
;
279 /* Next change in the list. */
280 struct iv_ca_delta
*next_change
;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
303 static varray_type decl_rtl_to_reset
;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data
*data
)
310 return VARRAY_ACTIVE_SIZE (data
->iv_uses
);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use
*
316 iv_use (struct ivopts_data
*data
, unsigned i
)
318 return VARRAY_GENERIC_PTR_NOGC (data
->iv_uses
, i
);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data
*data
)
326 return VARRAY_ACTIVE_SIZE (data
->iv_candidates
);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand
*
332 iv_cand (struct ivopts_data
*data
, unsigned i
)
334 return VARRAY_GENERIC_PTR_NOGC (data
->iv_candidates
, i
);
337 /* The data for LOOP. */
339 static inline struct loop_data
*
340 loop_data (struct loop
*loop
)
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 single_dom_exit (struct loop
*loop
)
350 edge exit
= loop
->single_exit
;
355 if (!just_once_each_iteration_p (loop
, exit
->src
))
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv
*);
365 dump_iv (FILE *file
, struct iv
*iv
)
369 fprintf (file
, "ssa name ");
370 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
371 fprintf (file
, "\n");
374 fprintf (file
, " type ");
375 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
376 fprintf (file
, "\n");
380 fprintf (file
, " base ");
381 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
382 fprintf (file
, "\n");
384 fprintf (file
, " step ");
385 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
386 fprintf (file
, "\n");
390 fprintf (file
, " invariant ");
391 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
392 fprintf (file
, "\n");
397 fprintf (file
, " base object ");
398 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
399 fprintf (file
, "\n");
403 fprintf (file
, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use
*);
410 dump_use (FILE *file
, struct iv_use
*use
)
412 fprintf (file
, "use %d\n", use
->id
);
416 case USE_NONLINEAR_EXPR
:
417 fprintf (file
, " generic\n");
421 fprintf (file
, " outside\n");
425 fprintf (file
, " address\n");
429 fprintf (file
, " compare\n");
436 fprintf (file
, " in statement ");
437 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
438 fprintf (file
, "\n");
440 fprintf (file
, " at position ");
442 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
443 fprintf (file
, "\n");
445 dump_iv (file
, use
->iv
);
447 if (use
->related_cands
)
449 fprintf (file
, " related candidates ");
450 dump_bitmap (file
, use
->related_cands
);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data
*);
458 dump_uses (FILE *file
, struct ivopts_data
*data
)
463 for (i
= 0; i
< n_iv_uses (data
); i
++)
465 use
= iv_use (data
, i
);
467 dump_use (file
, use
);
468 fprintf (file
, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand
*);
476 dump_cand (FILE *file
, struct iv_cand
*cand
)
478 struct iv
*iv
= cand
->iv
;
480 fprintf (file
, "candidate %d%s\n",
481 cand
->id
, cand
->important
? " (important)" : "");
485 fprintf (file
, " final value replacement\n");
492 fprintf (file
, " incremented before exit test\n");
496 fprintf (file
, " incremented at end\n");
500 fprintf (file
, " original biv\n");
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info
*
510 ver_info (struct ivopts_data
*data
, unsigned ver
)
512 return data
->version_info
+ ver
;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info
*
518 name_info (struct ivopts_data
*data
, tree name
)
520 return ver_info (data
, SSA_NAME_VERSION (name
));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
527 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
530 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
531 unsigned HOST_WIDE_INT inv
, ex
, val
;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a
& 1) && !(b
& 1))
548 /* If b is still even, a is odd and there is no such x. */
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
556 for (i
= 0; i
< bits
- 1; i
++)
558 inv
= (inv
* ex
) & mask
;
559 ex
= (ex
* ex
) & mask
;
562 val
= (a
* inv
) & mask
;
564 gcc_assert (((val
* b
) & mask
) == a
);
566 if ((val
>> (bits
- 1)) & 1)
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
578 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
580 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
584 if (sbb
== loop
->latch
)
590 return stmt
== last_stmt (bb
);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
597 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
599 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
600 basic_block stmt_bb
= bb_for_stmt (stmt
);
601 block_stmt_iterator bsi
;
603 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
606 if (stmt_bb
!= cand_bb
)
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
613 if (bsi_stmt (bsi
) == cand
->incremented_at
)
615 if (bsi_stmt (bsi
) == stmt
)
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
624 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
632 return stmt_after_ip_normal_pos (loop
, stmt
);
635 return stmt_after_ip_original_pos (cand
, stmt
);
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
646 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
650 data
->version_info_size
= 2 * num_ssa_names
;
651 data
->version_info
= xcalloc (data
->version_info_size
,
652 sizeof (struct version_info
));
653 data
->relevant
= BITMAP_XMALLOC ();
654 data
->important_candidates
= BITMAP_XMALLOC ();
655 data
->max_inv_id
= 0;
657 for (i
= 1; i
< loops
->num
; i
++)
658 if (loops
->parray
[i
])
659 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_uses
, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data
->iv_candidates
, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset
, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
670 determine_base_object (tree expr
)
672 enum tree_code code
= TREE_CODE (expr
);
673 tree base
, obj
, op0
, op1
;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
684 obj
= TREE_OPERAND (expr
, 0);
685 base
= get_base_address (obj
);
688 return fold_convert (ptr_type_node
, expr
);
690 if (TREE_CODE (base
) == INDIRECT_REF
)
691 return fold_convert (ptr_type_node
, TREE_OPERAND (base
, 0));
693 return fold (build1 (ADDR_EXPR
, ptr_type_node
, base
));
697 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
698 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
704 return (code
== PLUS_EXPR
706 : fold (build1 (NEGATE_EXPR
, ptr_type_node
, op1
)));
708 return fold (build (code
, ptr_type_node
, op0
, op1
));
711 return fold_convert (ptr_type_node
, expr
);
715 /* Allocates an induction variable with given initial value BASE and step STEP
719 alloc_iv (tree base
, tree step
)
721 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
723 if (step
&& integer_zerop (step
))
727 iv
->base_object
= determine_base_object (base
);
730 iv
->have_use_for
= false;
732 iv
->ssa_name
= NULL_TREE
;
737 /* Sets STEP and BASE for induction variable IV. */
740 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
742 struct version_info
*info
= name_info (data
, iv
);
744 gcc_assert (!info
->iv
);
746 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
747 info
->iv
= alloc_iv (base
, step
);
748 info
->iv
->ssa_name
= iv
;
751 /* Finds induction variable declaration for VAR. */
754 get_iv (struct ivopts_data
*data
, tree var
)
758 if (!name_info (data
, var
)->iv
)
760 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
763 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
764 set_iv (data
, var
, var
, NULL_TREE
);
767 return name_info (data
, var
)->iv
;
770 /* Determines the step of a biv defined in PHI. */
773 determine_biv_step (tree phi
)
775 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
776 tree name
= PHI_RESULT (phi
), base
, step
;
777 tree type
= TREE_TYPE (name
);
779 if (!is_gimple_reg (name
))
782 if (!simple_iv (loop
, phi
, name
, &base
, &step
))
786 return build_int_cst (type
, 0);
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
794 abnormal_ssa_name_p (tree exp
)
799 if (TREE_CODE (exp
) != SSA_NAME
)
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
809 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
810 void *data ATTRIBUTE_UNUSED
)
812 if (TREE_CODE (base
) == ARRAY_REF
)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
816 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
820 return !abnormal_ssa_name_p (*index
);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
827 contains_abnormal_ssa_name_p (tree expr
)
829 enum tree_code code
= TREE_CODE (expr
);
830 enum tree_code_class
class = TREE_CODE_CLASS (code
);
832 if (code
== SSA_NAME
)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
835 if (code
== INTEGER_CST
836 || is_gimple_min_invariant (expr
))
839 if (code
== ADDR_EXPR
)
840 return !for_each_index (&TREE_OPERAND (expr
, 0),
841 idx_contains_abnormal_ssa_name_p
,
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
865 /* Finds basic ivs. */
868 find_bivs (struct ivopts_data
*data
)
870 tree phi
, step
, type
, base
;
872 struct loop
*loop
= data
->current_loop
;
874 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
879 step
= determine_biv_step (phi
);
883 if (cst_and_fits_in_hwi (step
)
884 && int_cst_value (step
) == 0)
887 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
888 if (contains_abnormal_ssa_name_p (base
))
891 type
= TREE_TYPE (PHI_RESULT (phi
));
892 base
= fold_convert (type
, base
);
893 step
= fold_convert (type
, step
);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step
))
900 set_iv (data
, PHI_RESULT (phi
), base
, step
);
907 /* Marks basic ivs. */
910 mark_bivs (struct ivopts_data
*data
)
913 struct iv
*iv
, *incr_iv
;
914 struct loop
*loop
= data
->current_loop
;
917 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
919 iv
= get_iv (data
, PHI_RESULT (phi
));
923 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
924 incr_iv
= get_iv (data
, var
);
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
930 if (incr_bb
->loop_father
!= data
->current_loop
931 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
935 incr_iv
->biv_p
= true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
943 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
944 tree
*base
, tree
*step
)
947 struct loop
*loop
= data
->current_loop
;
952 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
955 lhs
= TREE_OPERAND (stmt
, 0);
956 if (TREE_CODE (lhs
) != SSA_NAME
)
959 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
))
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
965 && !cst_and_fits_in_hwi (*step
))
968 if (contains_abnormal_ssa_name_p (*base
))
974 /* Finds general ivs in statement STMT. */
977 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
981 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
984 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
987 /* Finds general ivs in basic block BB. */
990 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
992 block_stmt_iterator bsi
;
994 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
995 find_givs_in_stmt (data
, bsi_stmt (bsi
));
998 /* Finds general ivs. */
1001 find_givs (struct ivopts_data
*data
)
1003 struct loop
*loop
= data
->current_loop
;
1004 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1007 for (i
= 0; i
< loop
->num_nodes
; i
++)
1008 find_givs_in_bb (data
, body
[i
]);
1012 /* Determine the number of iterations of the current loop. */
1015 determine_number_of_iterations (struct ivopts_data
*data
)
1017 struct loop
*loop
= data
->current_loop
;
1018 edge exit
= single_dom_exit (loop
);
1023 number_of_iterations_exit (loop
, exit
, &loop_data (loop
)->niter
);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1030 find_induction_variables (struct ivopts_data
*data
)
1033 struct loop
*loop
= data
->current_loop
;
1036 if (!find_bivs (data
))
1041 determine_number_of_iterations (data
);
1043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1045 if (loop_data (loop
)->niter
.niter
)
1047 fprintf (dump_file
, " number of iterations ");
1048 print_generic_expr (dump_file
, loop_data (loop
)->niter
.niter
,
1050 fprintf (dump_file
, "\n");
1052 fprintf (dump_file
, " may be zero if ");
1053 print_generic_expr (dump_file
, loop_data (loop
)->niter
.may_be_zero
,
1055 fprintf (dump_file
, "\n");
1057 fprintf (dump_file
, " bogus unless ");
1058 print_generic_expr (dump_file
, loop_data (loop
)->niter
.assumptions
,
1060 fprintf (dump_file
, "\n");
1061 fprintf (dump_file
, "\n");
1064 fprintf (dump_file
, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1068 if (ver_info (data
, i
)->iv
)
1069 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use
*
1079 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1080 tree stmt
, enum use_type use_type
)
1082 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1084 use
->id
= n_iv_uses (data
);
1085 use
->type
= use_type
;
1089 use
->related_cands
= BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1093 iv
->ssa_name
= NULL_TREE
;
1095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1096 dump_use (dump_file
, use
);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_uses
, use
);
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1108 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1111 struct version_info
*info
;
1113 if (TREE_CODE (op
) != SSA_NAME
1114 || !is_gimple_reg (op
))
1117 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1119 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1122 info
= name_info (data
, op
);
1124 info
->has_nonlin_use
|= nonlinear_use
;
1126 info
->inv_id
= ++data
->max_inv_id
;
1127 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1130 /* Checks whether the use OP is interesting and if so, records it
1133 static struct iv_use
*
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1142 if (TREE_CODE (op
) != SSA_NAME
)
1145 iv
= get_iv (data
, op
);
1149 if (iv
->have_use_for
)
1151 use
= iv_use (data
, iv
->use_id
);
1153 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1154 || use
->type
== USE_OUTER
);
1156 if (type
== USE_NONLINEAR_EXPR
)
1157 use
->type
= USE_NONLINEAR_EXPR
;
1161 if (zero_p (iv
->step
))
1163 record_invariant (data
, op
, true);
1166 iv
->have_use_for
= true;
1168 civ
= xmalloc (sizeof (struct iv
));
1171 stmt
= SSA_NAME_DEF_STMT (op
);
1172 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1173 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1175 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1176 iv
->use_id
= use
->id
;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use
*
1184 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1186 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1189 /* Records a definition of induction variable OP that is used outside of the
1192 static struct iv_use
*
1193 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1195 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1202 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1206 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1208 tree zero
= integer_zero_node
;
1210 const_iv
.step
= NULL_TREE
;
1212 if (integer_zerop (*cond_p
)
1213 || integer_nonzerop (*cond_p
))
1216 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1223 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1224 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1227 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1228 iv0
= get_iv (data
, *op0_p
);
1232 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1233 iv1
= get_iv (data
, *op1_p
);
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1244 find_interesting_uses_op (data
, *op0_p
);
1245 find_interesting_uses_op (data
, *op1_p
);
1249 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1251 /* If both are invariants, this is a work for unswitching. */
1255 civ
= xmalloc (sizeof (struct iv
));
1256 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1257 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1264 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1269 if (is_gimple_min_invariant (expr
))
1272 if (TREE_CODE (expr
) == SSA_NAME
)
1274 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1276 && flow_bb_inside_loop_p (loop
, def_bb
))
1285 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1286 for (i
= 0; i
< len
; i
++)
1287 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data
*ivopts_data
;
1305 idx_find_step (tree base
, tree
*idx
, void *data
)
1307 struct ifs_ivopts_data
*dta
= data
;
1309 tree step
, type
, iv_type
, iv_step
, lbound
, off
;
1310 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1312 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1316 /* If base is a component ref, require that the offset of the reference
1318 if (TREE_CODE (base
) == COMPONENT_REF
)
1320 off
= component_ref_field_offset (base
);
1321 return expr_invariant_in_loop_p (loop
, off
);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base
) == ARRAY_REF
)
1330 step
= array_ref_element_size (base
);
1331 lbound
= array_ref_low_bound (base
);
1333 if (!expr_invariant_in_loop_p (loop
, step
)
1334 || !expr_invariant_in_loop_p (loop
, lbound
))
1338 if (TREE_CODE (*idx
) != SSA_NAME
)
1341 iv
= get_iv (dta
->ivopts_data
, *idx
);
1350 iv_type
= TREE_TYPE (iv
->base
);
1351 type
= build_pointer_type (TREE_TYPE (base
));
1352 if (TREE_CODE (base
) == ARRAY_REF
)
1354 step
= array_ref_element_size (base
);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step
) != INTEGER_CST
)
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step
= build_int_cst (type
, 1);
1364 if (TYPE_PRECISION (iv_type
) < TYPE_PRECISION (type
))
1365 iv_step
= can_count_iv_in_wider_type (dta
->ivopts_data
->current_loop
,
1366 type
, iv
->base
, iv
->step
, dta
->stmt
);
1368 iv_step
= fold_convert (iv_type
, iv
->step
);
1372 /* The index might wrap. */
1376 step
= fold_binary_to_constant (MULT_EXPR
, type
, step
, iv_step
);
1379 *dta
->step_p
= step
;
1381 *dta
->step_p
= fold_binary_to_constant (PLUS_EXPR
, type
,
1382 *dta
->step_p
, step
);
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1391 idx_record_use (tree base
, tree
*idx
,
1394 find_interesting_uses_op (data
, *idx
);
1395 if (TREE_CODE (base
) == ARRAY_REF
)
1397 find_interesting_uses_op (data
, array_ref_element_size (base
));
1398 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1403 /* Returns true if memory reference REF may be unaligned. */
1406 may_be_unaligned_p (tree ref
)
1410 HOST_WIDE_INT bitsize
;
1411 HOST_WIDE_INT bitpos
;
1413 enum machine_mode mode
;
1414 int unsignedp
, volatilep
;
1415 unsigned base_align
;
1417 /* The test below is basically copy of what expr.c:normal_inner_ref
1418 does to check whether the object must be loaded by parts when
1419 STRICT_ALIGNMENT is true. */
1420 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1421 &unsignedp
, &volatilep
, true);
1422 base_type
= TREE_TYPE (base
);
1423 base_align
= TYPE_ALIGN (base_type
);
1426 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1427 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1428 || bitpos
% BITS_PER_UNIT
!= 0))
1434 /* Finds addresses in *OP_P inside STMT. */
1437 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1439 tree base
= unshare_expr (*op_p
), step
= NULL
;
1441 struct ifs_ivopts_data ifs_ivopts_data
;
1443 /* Ignore bitfields for now. Not really something terribly complicated
1445 if (TREE_CODE (base
) == COMPONENT_REF
1446 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1449 if (STRICT_ALIGNMENT
1450 && may_be_unaligned_p (base
))
1453 ifs_ivopts_data
.ivopts_data
= data
;
1454 ifs_ivopts_data
.stmt
= stmt
;
1455 ifs_ivopts_data
.step_p
= &step
;
1456 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1460 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1461 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1463 if (TREE_CODE (base
) == INDIRECT_REF
)
1464 base
= TREE_OPERAND (base
, 0);
1466 base
= build_addr (base
);
1468 civ
= alloc_iv (base
, step
);
1469 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1473 for_each_index (op_p
, idx_record_use
, data
);
1476 /* Finds and records invariants used in STMT. */
1479 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1481 use_optype uses
= NULL
;
1485 if (TREE_CODE (stmt
) == PHI_NODE
)
1486 n
= PHI_NUM_ARGS (stmt
);
1489 get_stmt_operands (stmt
);
1490 uses
= STMT_USE_OPS (stmt
);
1491 n
= NUM_USES (uses
);
1494 for (i
= 0; i
< n
; i
++)
1496 if (TREE_CODE (stmt
) == PHI_NODE
)
1497 op
= PHI_ARG_DEF (stmt
, i
);
1499 op
= USE_OP (uses
, i
);
1501 record_invariant (data
, op
, false);
1505 /* Finds interesting uses of induction variables in the statement STMT. */
1508 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1512 use_optype uses
= NULL
;
1515 find_invariants_stmt (data
, stmt
);
1517 if (TREE_CODE (stmt
) == COND_EXPR
)
1519 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1523 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1525 lhs
= TREE_OPERAND (stmt
, 0);
1526 rhs
= TREE_OPERAND (stmt
, 1);
1528 if (TREE_CODE (lhs
) == SSA_NAME
)
1530 /* If the statement defines an induction variable, the uses are not
1531 interesting by themselves. */
1533 iv
= get_iv (data
, lhs
);
1535 if (iv
&& !zero_p (iv
->step
))
1539 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1541 case tcc_comparison
:
1542 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1546 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1547 if (REFERENCE_CLASS_P (lhs
))
1548 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1554 if (REFERENCE_CLASS_P (lhs
)
1555 && is_gimple_val (rhs
))
1557 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1558 find_interesting_uses_op (data
, rhs
);
1562 /* TODO -- we should also handle address uses of type
1564 memory = call (whatever);
1571 if (TREE_CODE (stmt
) == PHI_NODE
1572 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1574 lhs
= PHI_RESULT (stmt
);
1575 iv
= get_iv (data
, lhs
);
1577 if (iv
&& !zero_p (iv
->step
))
1581 if (TREE_CODE (stmt
) == PHI_NODE
)
1582 n
= PHI_NUM_ARGS (stmt
);
1585 uses
= STMT_USE_OPS (stmt
);
1586 n
= NUM_USES (uses
);
1589 for (i
= 0; i
< n
; i
++)
1591 if (TREE_CODE (stmt
) == PHI_NODE
)
1592 op
= PHI_ARG_DEF (stmt
, i
);
1594 op
= USE_OP (uses
, i
);
1596 if (TREE_CODE (op
) != SSA_NAME
)
1599 iv
= get_iv (data
, op
);
1603 find_interesting_uses_op (data
, op
);
1607 /* Finds interesting uses of induction variables outside of loops
1608 on loop exit edge EXIT. */
1611 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1615 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1617 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1618 find_interesting_uses_outer (data
, def
);
1622 /* Finds uses of the induction variables that are interesting. */
1625 find_interesting_uses (struct ivopts_data
*data
)
1628 block_stmt_iterator bsi
;
1630 basic_block
*body
= get_loop_body (data
->current_loop
);
1632 struct version_info
*info
;
1635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1636 fprintf (dump_file
, "Uses:\n\n");
1638 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1643 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1644 if (e
->dest
!= EXIT_BLOCK_PTR
1645 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1646 find_interesting_uses_outside (data
, e
);
1648 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1649 find_interesting_uses_stmt (data
, phi
);
1650 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1651 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1654 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1658 fprintf (dump_file
, "\n");
1660 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1662 info
= ver_info (data
, i
);
1665 fprintf (dump_file
, " ");
1666 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1667 fprintf (dump_file
, " is invariant (%d)%s\n",
1668 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1672 fprintf (dump_file
, "\n");
1678 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1679 position to POS. If USE is not NULL, the candidate is set as related to
1680 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1681 replacement of the final value of the iv by a direct computation. */
1683 static struct iv_cand
*
1684 add_candidate_1 (struct ivopts_data
*data
,
1685 tree base
, tree step
, bool important
, enum iv_position pos
,
1686 struct iv_use
*use
, tree incremented_at
)
1689 struct iv_cand
*cand
= NULL
;
1694 type
= TREE_TYPE (base
);
1695 if (!TYPE_UNSIGNED (type
))
1697 type
= unsigned_type_for (type
);
1698 base
= fold_convert (type
, base
);
1700 step
= fold_convert (type
, step
);
1704 for (i
= 0; i
< n_iv_cands (data
); i
++)
1706 cand
= iv_cand (data
, i
);
1708 if (cand
->pos
!= pos
)
1711 if (cand
->incremented_at
!= incremented_at
)
1725 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1728 if (zero_p (cand
->iv
->step
))
1735 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1740 if (i
== n_iv_cands (data
))
1742 cand
= xcalloc (1, sizeof (struct iv_cand
));
1748 cand
->iv
= alloc_iv (base
, step
);
1751 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1753 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
1754 cand
->var_after
= cand
->var_before
;
1756 cand
->important
= important
;
1757 cand
->incremented_at
= incremented_at
;
1758 VARRAY_PUSH_GENERIC_PTR_NOGC (data
->iv_candidates
, cand
);
1760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1761 dump_cand (dump_file
, cand
);
1764 if (important
&& !cand
->important
)
1766 cand
->important
= true;
1767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1768 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
1773 bitmap_set_bit (use
->related_cands
, i
);
1774 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1775 fprintf (dump_file
, "Candidate %d is related to use %d\n",
1782 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1783 position to POS. If USE is not NULL, the candidate is set as related to
1784 it. The candidate computation is scheduled on all available positions. */
1787 add_candidate (struct ivopts_data
*data
,
1788 tree base
, tree step
, bool important
, struct iv_use
*use
)
1790 if (ip_normal_pos (data
->current_loop
))
1791 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
1792 if (ip_end_pos (data
->current_loop
))
1793 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
1796 /* Adds standard iv candidates. */
1799 add_standard_iv_candidates (struct ivopts_data
*data
)
1801 /* Add 0 + 1 * iteration candidate. */
1802 add_candidate (data
,
1803 build_int_cst (unsigned_intSI_type_node
, 0),
1804 build_int_cst (unsigned_intSI_type_node
, 1),
1807 /* The same for a long type if it is still fast enough. */
1808 if (BITS_PER_WORD
> 32)
1809 add_candidate (data
,
1810 build_int_cst (unsigned_intDI_type_node
, 0),
1811 build_int_cst (unsigned_intDI_type_node
, 1),
1816 /* Adds candidates bases on the old induction variable IV. */
1819 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
1822 struct iv_cand
*cand
;
1824 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
1826 /* The same, but with initial value zero. */
1827 add_candidate (data
,
1828 build_int_cst (TREE_TYPE (iv
->base
), 0),
1829 iv
->step
, true, NULL
);
1831 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
1832 if (TREE_CODE (phi
) == PHI_NODE
)
1834 /* Additionally record the possibility of leaving the original iv
1836 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
1837 cand
= add_candidate_1 (data
,
1838 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
1839 SSA_NAME_DEF_STMT (def
));
1840 cand
->var_before
= iv
->ssa_name
;
1841 cand
->var_after
= def
;
1845 /* Adds candidates based on the old induction variables. */
1848 add_old_ivs_candidates (struct ivopts_data
*data
)
1854 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1856 iv
= ver_info (data
, i
)->iv
;
1857 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
1858 add_old_iv_candidates (data
, iv
);
1862 /* Adds candidates based on the value of the induction variable IV and USE. */
1865 add_iv_value_candidates (struct ivopts_data
*data
,
1866 struct iv
*iv
, struct iv_use
*use
)
1868 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
1870 /* The same, but with initial value zero. */
1871 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
1872 iv
->step
, false, use
);
1875 /* Adds candidates based on the address IV and USE. */
1878 add_address_candidates (struct ivopts_data
*data
,
1879 struct iv
*iv
, struct iv_use
*use
)
1881 tree base
, abase
, tmp
, *act
;
1883 /* First, the trivial choices. */
1884 add_iv_value_candidates (data
, iv
, use
);
1886 /* Second, try removing the COMPONENT_REFs. */
1887 if (TREE_CODE (iv
->base
) == ADDR_EXPR
)
1889 base
= TREE_OPERAND (iv
->base
, 0);
1890 while (TREE_CODE (base
) == COMPONENT_REF
1891 || (TREE_CODE (base
) == ARRAY_REF
1892 && TREE_CODE (TREE_OPERAND (base
, 1)) == INTEGER_CST
))
1893 base
= TREE_OPERAND (base
, 0);
1895 if (base
!= TREE_OPERAND (iv
->base
, 0))
1897 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1898 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1900 if (TREE_CODE (base
) == INDIRECT_REF
)
1901 base
= TREE_OPERAND (base
, 0);
1903 base
= build_addr (base
);
1904 add_candidate (data
, base
, iv
->step
, false, use
);
1908 /* Third, try removing the constant offset. */
1910 while (TREE_CODE (abase
) == PLUS_EXPR
1911 && TREE_CODE (TREE_OPERAND (abase
, 1)) != INTEGER_CST
)
1912 abase
= TREE_OPERAND (abase
, 0);
1913 /* We found the offset, so make the copy of the non-shared part and
1915 if (TREE_CODE (abase
) == PLUS_EXPR
)
1920 for (tmp
= iv
->base
; tmp
!= abase
; tmp
= TREE_OPERAND (tmp
, 0))
1922 *act
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
),
1923 NULL_TREE
, TREE_OPERAND (tmp
, 1));
1924 act
= &TREE_OPERAND (*act
, 0);
1926 *act
= TREE_OPERAND (tmp
, 0);
1928 add_candidate (data
, base
, iv
->step
, false, use
);
1932 /* Possibly adds pseudocandidate for replacing the final value of USE by
1933 a direct computation. */
1936 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
1938 struct tree_niter_desc
*niter
;
1939 struct loop
*loop
= data
->current_loop
;
1941 /* We must know where we exit the loop and how many times does it roll. */
1942 if (!single_dom_exit (loop
))
1945 niter
= &loop_data (loop
)->niter
;
1947 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
1948 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
1951 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
1954 /* Adds candidates based on the uses. */
1957 add_derived_ivs_candidates (struct ivopts_data
*data
)
1961 for (i
= 0; i
< n_iv_uses (data
); i
++)
1963 struct iv_use
*use
= iv_use (data
, i
);
1970 case USE_NONLINEAR_EXPR
:
1972 /* Just add the ivs based on the value of the iv used here. */
1973 add_iv_value_candidates (data
, use
->iv
, use
);
1977 add_iv_value_candidates (data
, use
->iv
, use
);
1979 /* Additionally, add the pseudocandidate for the possibility to
1980 replace the final value by a direct computation. */
1981 add_iv_outer_candidates (data
, use
);
1985 add_address_candidates (data
, use
->iv
, use
);
1994 /* Record important candidates and add them to related_cands bitmaps
1998 record_important_candidates (struct ivopts_data
*data
)
2003 for (i
= 0; i
< n_iv_cands (data
); i
++)
2005 struct iv_cand
*cand
= iv_cand (data
, i
);
2007 if (cand
->important
)
2008 bitmap_set_bit (data
->important_candidates
, i
);
2011 data
->consider_all_candidates
= (n_iv_cands (data
)
2012 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2014 if (data
->consider_all_candidates
)
2016 /* We will not need "related_cands" bitmaps in this case,
2017 so release them to decrease peak memory consumption. */
2018 for (i
= 0; i
< n_iv_uses (data
); i
++)
2020 use
= iv_use (data
, i
);
2021 BITMAP_XFREE (use
->related_cands
);
2026 /* Add important candidates to the related_cands bitmaps. */
2027 for (i
= 0; i
< n_iv_uses (data
); i
++)
2028 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2029 data
->important_candidates
);
2033 /* Finds the candidates for the induction variables. */
2036 find_iv_candidates (struct ivopts_data
*data
)
2038 /* Add commonly used ivs. */
2039 add_standard_iv_candidates (data
);
2041 /* Add old induction variables. */
2042 add_old_ivs_candidates (data
);
2044 /* Add induction variables derived from uses. */
2045 add_derived_ivs_candidates (data
);
2047 /* Record the important candidates. */
2048 record_important_candidates (data
);
2051 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2052 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2053 we allocate a simple list to every use. */
2056 alloc_use_cost_map (struct ivopts_data
*data
)
2058 unsigned i
, size
, s
, j
;
2060 for (i
= 0; i
< n_iv_uses (data
); i
++)
2062 struct iv_use
*use
= iv_use (data
, i
);
2065 if (data
->consider_all_candidates
)
2066 size
= n_iv_cands (data
);
2070 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2075 /* Round up to the power of two, so that moduling by it is fast. */
2076 for (size
= 1; size
< s
; size
<<= 1)
2080 use
->n_map_members
= size
;
2081 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2085 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2086 on invariants DEPENDS_ON. */
2089 set_use_iv_cost (struct ivopts_data
*data
,
2090 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2097 BITMAP_XFREE (depends_on
);
2101 if (data
->consider_all_candidates
)
2103 use
->cost_map
[cand
->id
].cand
= cand
;
2104 use
->cost_map
[cand
->id
].cost
= cost
;
2105 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2109 /* n_map_members is a power of two, so this computes modulo. */
2110 s
= cand
->id
& (use
->n_map_members
- 1);
2111 for (i
= s
; i
< use
->n_map_members
; i
++)
2112 if (!use
->cost_map
[i
].cand
)
2114 for (i
= 0; i
< s
; i
++)
2115 if (!use
->cost_map
[i
].cand
)
2121 use
->cost_map
[i
].cand
= cand
;
2122 use
->cost_map
[i
].cost
= cost
;
2123 use
->cost_map
[i
].depends_on
= depends_on
;
2126 /* Gets cost of (USE, CANDIDATE) pair. */
2128 static struct cost_pair
*
2129 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2130 struct iv_cand
*cand
)
2133 struct cost_pair
*ret
;
2138 if (data
->consider_all_candidates
)
2140 ret
= use
->cost_map
+ cand
->id
;
2147 /* n_map_members is a power of two, so this computes modulo. */
2148 s
= cand
->id
& (use
->n_map_members
- 1);
2149 for (i
= s
; i
< use
->n_map_members
; i
++)
2150 if (use
->cost_map
[i
].cand
== cand
)
2151 return use
->cost_map
+ i
;
2153 for (i
= 0; i
< s
; i
++)
2154 if (use
->cost_map
[i
].cand
== cand
)
2155 return use
->cost_map
+ i
;
2160 /* Returns estimate on cost of computing SEQ. */
2168 for (; seq
; seq
= NEXT_INSN (seq
))
2170 set
= single_set (seq
);
2172 cost
+= rtx_cost (set
, SET
);
2180 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2182 produce_memory_decl_rtl (tree obj
, int *regno
)
2187 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2189 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2190 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2193 x
= gen_raw_REG (Pmode
, (*regno
)++);
2195 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2198 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2199 walk_tree. DATA contains the actual fake register number. */
2202 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2204 tree obj
= NULL_TREE
;
2208 switch (TREE_CODE (*expr_p
))
2211 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2212 handled_component_p (*expr_p
);
2213 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2217 x
= produce_memory_decl_rtl (obj
, regno
);
2222 obj
= SSA_NAME_VAR (*expr_p
);
2223 if (!DECL_RTL_SET_P (obj
))
2224 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2233 if (DECL_RTL_SET_P (obj
))
2236 if (DECL_MODE (obj
) == BLKmode
)
2237 x
= produce_memory_decl_rtl (obj
, regno
);
2239 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2249 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset
, obj
);
2250 SET_DECL_RTL (obj
, x
);
2256 /* Determines cost of the computation of EXPR. */
2259 computation_cost (tree expr
)
2262 tree type
= TREE_TYPE (expr
);
2266 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2268 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2272 cost
= seq_cost (seq
);
2273 if (GET_CODE (rslt
) == MEM
)
2274 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2279 /* Returns variable containing the value of candidate CAND at statement AT. */
2282 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2284 if (stmt_after_increment (loop
, cand
, stmt
))
2285 return cand
->var_after
;
2287 return cand
->var_before
;
2290 /* Determines the expression by that USE is expressed from induction variable
2291 CAND at statement AT in LOOP. */
2294 get_computation_at (struct loop
*loop
,
2295 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
2297 tree ubase
= use
->iv
->base
;
2298 tree ustep
= use
->iv
->step
;
2299 tree cbase
= cand
->iv
->base
;
2300 tree cstep
= cand
->iv
->step
;
2301 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2305 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2306 HOST_WIDE_INT ratioi
;
2308 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2310 /* We do not have a precision to express the values of use. */
2314 expr
= var_at_stmt (loop
, cand
, at
);
2316 if (TREE_TYPE (expr
) != ctype
)
2318 /* This may happen with the original ivs. */
2319 expr
= fold_convert (ctype
, expr
);
2322 if (TYPE_UNSIGNED (utype
))
2326 uutype
= unsigned_type_for (utype
);
2327 ubase
= fold_convert (uutype
, ubase
);
2328 ustep
= fold_convert (uutype
, ustep
);
2331 if (uutype
!= ctype
)
2333 expr
= fold_convert (uutype
, expr
);
2334 cbase
= fold_convert (uutype
, cbase
);
2335 cstep
= fold_convert (uutype
, cstep
);
2338 if (!cst_and_fits_in_hwi (cstep
)
2339 || !cst_and_fits_in_hwi (ustep
))
2342 ustepi
= int_cst_value (ustep
);
2343 cstepi
= int_cst_value (cstep
);
2345 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2347 /* TODO maybe consider case when ustep divides cstep and the ratio is
2348 a power of 2 (so that the division is fast to execute)? We would
2349 need to be much more careful with overflows etc. then. */
2353 /* We may need to shift the value if we are after the increment. */
2354 if (stmt_after_increment (loop
, cand
, at
))
2355 cbase
= fold (build2 (PLUS_EXPR
, uutype
, cbase
, cstep
));
2357 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2358 or |ratio| == 1, it is better to handle this like
2360 ubase - ratio * cbase + ratio * var. */
2364 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, cbase
));
2365 expr
= fold (build2 (PLUS_EXPR
, uutype
, expr
, delta
));
2367 else if (ratioi
== -1)
2369 delta
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, cbase
));
2370 expr
= fold (build2 (MINUS_EXPR
, uutype
, delta
, expr
));
2372 else if (TREE_CODE (cbase
) == INTEGER_CST
)
2374 ratio
= build_int_cst_type (uutype
, ratioi
);
2375 delta
= fold (build2 (MULT_EXPR
, uutype
, ratio
, cbase
));
2376 delta
= fold (build2 (MINUS_EXPR
, uutype
, ubase
, delta
));
2377 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2378 expr
= fold (build2 (PLUS_EXPR
, uutype
, delta
, expr
));
2382 expr
= fold (build2 (MINUS_EXPR
, uutype
, expr
, cbase
));
2383 ratio
= build_int_cst_type (uutype
, ratioi
);
2384 expr
= fold (build2 (MULT_EXPR
, uutype
, ratio
, expr
));
2385 expr
= fold (build2 (PLUS_EXPR
, uutype
, ubase
, expr
));
2388 return fold_convert (utype
, expr
);
2391 /* Determines the expression by that USE is expressed from induction variable
2395 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2397 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2400 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2403 strip_offset (tree
*expr
, unsigned HOST_WIDE_INT
*offset
)
2406 enum tree_code code
;
2410 if (cst_and_fits_in_hwi (*expr
))
2412 *offset
+= int_cst_value (*expr
);
2413 *expr
= integer_zero_node
;
2417 code
= TREE_CODE (*expr
);
2419 if (code
!= PLUS_EXPR
&& code
!= MINUS_EXPR
)
2422 op0
= TREE_OPERAND (*expr
, 0);
2423 op1
= TREE_OPERAND (*expr
, 1);
2425 if (cst_and_fits_in_hwi (op1
))
2427 if (code
== PLUS_EXPR
)
2428 *offset
+= int_cst_value (op1
);
2430 *offset
-= int_cst_value (op1
);
2436 if (code
!= PLUS_EXPR
)
2439 if (!cst_and_fits_in_hwi (op0
))
2442 *offset
+= int_cst_value (op0
);
2447 /* Returns cost of addition in MODE. */
2450 add_cost (enum machine_mode mode
)
2452 static unsigned costs
[NUM_MACHINE_MODES
];
2460 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2461 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
),
2462 gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
+ 1)),
2467 cost
= seq_cost (seq
);
2473 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2474 fprintf (dump_file
, "Addition in %s costs %d\n",
2475 GET_MODE_NAME (mode
), cost
);
2479 /* Entry in a hashtable of already known costs for multiplication. */
2482 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2483 enum machine_mode mode
; /* In mode. */
2484 unsigned cost
; /* The cost. */
2487 /* Counts hash value for the ENTRY. */
2490 mbc_entry_hash (const void *entry
)
2492 const struct mbc_entry
*e
= entry
;
2494 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2497 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2500 mbc_entry_eq (const void *entry1
, const void *entry2
)
2502 const struct mbc_entry
*e1
= entry1
;
2503 const struct mbc_entry
*e2
= entry2
;
2505 return (e1
->mode
== e2
->mode
2506 && e1
->cst
== e2
->cst
);
2509 /* Returns cost of multiplication by constant CST in MODE. */
2512 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
2514 static htab_t costs
;
2515 struct mbc_entry
**cached
, act
;
2520 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2524 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2526 return (*cached
)->cost
;
2528 *cached
= xmalloc (sizeof (struct mbc_entry
));
2529 (*cached
)->mode
= mode
;
2530 (*cached
)->cst
= cst
;
2533 expand_mult (mode
, gen_raw_REG (mode
, FIRST_PSEUDO_REGISTER
), GEN_INT (cst
),
2538 cost
= seq_cost (seq
);
2540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2541 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2542 (int) cst
, GET_MODE_NAME (mode
), cost
);
2544 (*cached
)->cost
= cost
;
2549 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2550 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2551 variable is omitted. The created memory accesses MODE.
2553 TODO -- there must be some better way. This all is quite crude. */
2556 get_address_cost (bool symbol_present
, bool var_present
,
2557 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
2559 #define MAX_RATIO 128
2560 static sbitmap valid_mult
;
2561 static HOST_WIDE_INT rat
, off
;
2562 static HOST_WIDE_INT min_offset
, max_offset
;
2563 static unsigned costs
[2][2][2][2];
2564 unsigned cost
, acost
;
2565 rtx seq
, addr
, base
;
2566 bool offset_p
, ratio_p
;
2568 HOST_WIDE_INT s_offset
;
2569 unsigned HOST_WIDE_INT mask
;
2576 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2578 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
2579 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2581 XEXP (addr
, 1) = GEN_INT (i
);
2582 if (!memory_address_p (Pmode
, addr
))
2585 max_offset
= i
>> 1;
2588 for (i
= 1; i
<= 1 << 20; i
<<= 1)
2590 XEXP (addr
, 1) = GEN_INT (-i
);
2591 if (!memory_address_p (Pmode
, addr
))
2594 min_offset
= -(i
>> 1);
2596 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2598 fprintf (dump_file
, "get_address_cost:\n");
2599 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
2600 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
2603 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
2604 sbitmap_zero (valid_mult
);
2606 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2607 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2609 XEXP (addr
, 1) = GEN_INT (i
);
2610 if (memory_address_p (Pmode
, addr
))
2612 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
2617 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2619 fprintf (dump_file
, " allowed multipliers:");
2620 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2621 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
2622 fprintf (dump_file
, " %d", (int) i
);
2623 fprintf (dump_file
, "\n");
2624 fprintf (dump_file
, "\n");
2628 bits
= GET_MODE_BITSIZE (Pmode
);
2629 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
2631 if ((offset
>> (bits
- 1) & 1))
2636 offset_p
= (s_offset
!= 0
2637 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
2638 ratio_p
= (ratio
!= 1
2639 && -MAX_RATIO
<= ratio
&& ratio
<= MAX_RATIO
2640 && TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
));
2642 if (ratio
!= 1 && !ratio_p
)
2643 cost
+= multiply_by_cost (ratio
, Pmode
);
2645 if (s_offset
&& !offset_p
&& !symbol_present
)
2647 cost
+= add_cost (Pmode
);
2651 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
2656 addr
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
);
2657 reg1
= gen_raw_REG (Pmode
, FIRST_PSEUDO_REGISTER
+ 1);
2659 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, GEN_INT (rat
));
2662 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
2666 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
2668 base
= gen_rtx_fmt_e (CONST
, Pmode
,
2669 gen_rtx_fmt_ee (PLUS
, Pmode
,
2674 base
= GEN_INT (off
);
2679 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
2682 addr
= memory_address (Pmode
, addr
);
2686 acost
= seq_cost (seq
);
2687 acost
+= address_cost (addr
, Pmode
);
2691 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
2694 return cost
+ acost
;
2697 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2698 the bitmap to that we should store it. */
2700 static struct ivopts_data
*fd_ivopts_data
;
2702 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2704 bitmap
*depends_on
= data
;
2705 struct version_info
*info
;
2707 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2709 info
= name_info (fd_ivopts_data
, *expr_p
);
2711 if (!info
->inv_id
|| info
->has_nonlin_use
)
2715 *depends_on
= BITMAP_XMALLOC ();
2716 bitmap_set_bit (*depends_on
, info
->inv_id
);
2721 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2722 invariants the computation depends on. */
2725 force_var_cost (struct ivopts_data
*data
,
2726 tree expr
, bitmap
*depends_on
)
2728 static bool costs_initialized
= false;
2729 static unsigned integer_cost
;
2730 static unsigned symbol_cost
;
2731 static unsigned address_cost
;
2733 unsigned cost0
, cost1
, cost
;
2734 enum machine_mode mode
;
2736 if (!costs_initialized
)
2738 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
2739 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
2740 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
2742 tree type
= build_pointer_type (integer_type_node
);
2744 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
2747 SET_DECL_RTL (var
, x
);
2748 TREE_STATIC (var
) = 1;
2749 addr
= build1 (ADDR_EXPR
, type
, var
);
2750 symbol_cost
= computation_cost (addr
) + 1;
2753 = computation_cost (build2 (PLUS_EXPR
, type
,
2755 build_int_cst_type (type
, 2000))) + 1;
2756 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2758 fprintf (dump_file
, "force_var_cost:\n");
2759 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
2760 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
2761 fprintf (dump_file
, " address %d\n", (int) address_cost
);
2762 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
2763 fprintf (dump_file
, "\n");
2766 costs_initialized
= true;
2771 fd_ivopts_data
= data
;
2772 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
2775 if (SSA_VAR_P (expr
))
2778 if (TREE_INVARIANT (expr
))
2780 if (TREE_CODE (expr
) == INTEGER_CST
)
2781 return integer_cost
;
2783 if (TREE_CODE (expr
) == ADDR_EXPR
)
2785 tree obj
= TREE_OPERAND (expr
, 0);
2787 if (TREE_CODE (obj
) == VAR_DECL
2788 || TREE_CODE (obj
) == PARM_DECL
2789 || TREE_CODE (obj
) == RESULT_DECL
)
2793 return address_cost
;
2796 switch (TREE_CODE (expr
))
2801 op0
= TREE_OPERAND (expr
, 0);
2802 op1
= TREE_OPERAND (expr
, 1);
2804 if (is_gimple_val (op0
))
2807 cost0
= force_var_cost (data
, op0
, NULL
);
2809 if (is_gimple_val (op1
))
2812 cost1
= force_var_cost (data
, op1
, NULL
);
2817 /* Just an arbitrary value, FIXME. */
2818 return target_spill_cost
;
2821 mode
= TYPE_MODE (TREE_TYPE (expr
));
2822 switch (TREE_CODE (expr
))
2826 cost
= add_cost (mode
);
2830 if (cst_and_fits_in_hwi (op0
))
2831 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
2832 else if (cst_and_fits_in_hwi (op1
))
2833 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
2835 return target_spill_cost
;
2845 /* Bound the cost by target_spill_cost. The parts of complicated
2846 computations often are either loop invariant or at least can
2847 be shared between several iv uses, so letting this grow without
2848 limits would not give reasonable results. */
2849 return cost
< target_spill_cost
? cost
: target_spill_cost
;
2852 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2853 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2854 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2855 invariants the computation depends on. */
2858 split_address_cost (struct ivopts_data
*data
,
2859 tree addr
, bool *symbol_present
, bool *var_present
,
2860 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2863 HOST_WIDE_INT bitsize
;
2864 HOST_WIDE_INT bitpos
;
2866 enum machine_mode mode
;
2867 int unsignedp
, volatilep
;
2869 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
2870 &unsignedp
, &volatilep
, false);
2873 || bitpos
% BITS_PER_UNIT
!= 0
2874 || TREE_CODE (core
) != VAR_DECL
)
2876 *symbol_present
= false;
2877 *var_present
= true;
2878 fd_ivopts_data
= data
;
2879 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
2880 return target_spill_cost
;
2883 *offset
+= bitpos
/ BITS_PER_UNIT
;
2884 if (TREE_STATIC (core
)
2885 || DECL_EXTERNAL (core
))
2887 *symbol_present
= true;
2888 *var_present
= false;
2892 *symbol_present
= false;
2893 *var_present
= true;
2897 /* Estimates cost of expressing difference of addresses E1 - E2 as
2898 var + symbol + offset. The value of offset is added to OFFSET,
2899 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2900 part is missing. DEPENDS_ON is a set of the invariants the computation
2904 ptr_difference_cost (struct ivopts_data
*data
,
2905 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2906 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2908 HOST_WIDE_INT diff
= 0;
2911 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
2913 if (ptr_difference_const (e1
, e2
, &diff
))
2916 *symbol_present
= false;
2917 *var_present
= false;
2921 if (e2
== integer_zero_node
)
2922 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
2923 symbol_present
, var_present
, offset
, depends_on
);
2925 *symbol_present
= false;
2926 *var_present
= true;
2928 cost
= force_var_cost (data
, e1
, depends_on
);
2929 cost
+= force_var_cost (data
, e2
, depends_on
);
2930 cost
+= add_cost (Pmode
);
2935 /* Estimates cost of expressing difference E1 - E2 as
2936 var + symbol + offset. The value of offset is added to OFFSET,
2937 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2938 part is missing. DEPENDS_ON is a set of the invariants the computation
2942 difference_cost (struct ivopts_data
*data
,
2943 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
2944 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
2947 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
2949 strip_offset (&e1
, offset
);
2951 strip_offset (&e2
, offset
);
2954 if (TREE_CODE (e1
) == ADDR_EXPR
)
2955 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
2957 *symbol_present
= false;
2959 if (operand_equal_p (e1
, e2
, 0))
2961 *var_present
= false;
2964 *var_present
= true;
2966 return force_var_cost (data
, e1
, depends_on
);
2970 cost
= force_var_cost (data
, e2
, depends_on
);
2971 cost
+= multiply_by_cost (-1, mode
);
2976 cost
= force_var_cost (data
, e1
, depends_on
);
2977 cost
+= force_var_cost (data
, e2
, depends_on
);
2978 cost
+= add_cost (mode
);
2983 /* Determines the cost of the computation by that USE is expressed
2984 from induction variable CAND. If ADDRESS_P is true, we just need
2985 to create an address from it, otherwise we want to get it into
2986 register. A set of invariants we depend on is stored in
2987 DEPENDS_ON. AT is the statement at that the value is computed. */
2990 get_computation_cost_at (struct ivopts_data
*data
,
2991 struct iv_use
*use
, struct iv_cand
*cand
,
2992 bool address_p
, bitmap
*depends_on
, tree at
)
2994 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
2996 tree utype
= TREE_TYPE (ubase
), ctype
;
2997 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
2998 HOST_WIDE_INT ratio
, aratio
;
2999 bool var_present
, symbol_present
;
3000 unsigned cost
= 0, n_sums
;
3004 /* Only consider real candidates. */
3008 cbase
= cand
->iv
->base
;
3009 cstep
= cand
->iv
->step
;
3010 ctype
= TREE_TYPE (cbase
);
3012 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3014 /* We do not have a precision to express the values of use. */
3020 /* Do not try to express address of an object with computation based
3021 on address of a different object. This may cause problems in rtl
3022 level alias analysis (that does not expect this to be happening,
3023 as this is illegal in C), and would be unlikely to be useful
3025 if (use
->iv
->base_object
3026 && cand
->iv
->base_object
3027 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3031 if (!cst_and_fits_in_hwi (ustep
)
3032 || !cst_and_fits_in_hwi (cstep
))
3035 if (TREE_CODE (ubase
) == INTEGER_CST
3036 && !cst_and_fits_in_hwi (ubase
))
3039 if (TREE_CODE (cbase
) == INTEGER_CST
3040 && !cst_and_fits_in_hwi (cbase
))
3043 ustepi
= int_cst_value (ustep
);
3044 cstepi
= int_cst_value (cstep
);
3046 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3048 /* TODO -- add direct handling of this case. */
3052 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3055 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3056 or ratio == 1, it is better to handle this like
3058 ubase - ratio * cbase + ratio * var
3060 (also holds in the case ratio == -1, TODO. */
3062 if (TREE_CODE (cbase
) == INTEGER_CST
)
3064 offset
= - ratio
* int_cst_value (cbase
);
3065 cost
+= difference_cost (data
,
3066 ubase
, integer_zero_node
,
3067 &symbol_present
, &var_present
, &offset
,
3070 else if (ratio
== 1)
3072 cost
+= difference_cost (data
,
3074 &symbol_present
, &var_present
, &offset
,
3079 cost
+= force_var_cost (data
, cbase
, depends_on
);
3080 cost
+= add_cost (TYPE_MODE (ctype
));
3081 cost
+= difference_cost (data
,
3082 ubase
, integer_zero_node
,
3083 &symbol_present
, &var_present
, &offset
,
3087 /* If we are after the increment, the value of the candidate is higher by
3089 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3090 offset
-= ratio
* cstepi
;
3092 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3093 (symbol/var/const parts may be omitted). If we are looking for an address,
3094 find the cost of addressing this. */
3096 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3098 /* Otherwise estimate the costs for computing the expression. */
3099 aratio
= ratio
> 0 ? ratio
: -ratio
;
3100 if (!symbol_present
&& !var_present
&& !offset
)
3103 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3109 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3113 /* Symbol + offset should be compile-time computable. */
3114 && (symbol_present
|| offset
))
3117 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3121 /* Just get the expression, expand it and measure the cost. */
3122 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3128 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3130 return computation_cost (comp
);
3134 /* Determines the cost of the computation by that USE is expressed
3135 from induction variable CAND. If ADDRESS_P is true, we just need
3136 to create an address from it, otherwise we want to get it into
3137 register. A set of invariants we depend on is stored in
3141 get_computation_cost (struct ivopts_data
*data
,
3142 struct iv_use
*use
, struct iv_cand
*cand
,
3143 bool address_p
, bitmap
*depends_on
)
3145 return get_computation_cost_at (data
,
3146 use
, cand
, address_p
, depends_on
, use
->stmt
);
3149 /* Determines cost of basing replacement of USE on CAND in a generic
3153 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3154 struct iv_use
*use
, struct iv_cand
*cand
)
3159 /* The simple case first -- if we need to express value of the preserved
3160 original biv, the cost is 0. This also prevents us from counting the
3161 cost of increment twice -- once at this use and once in the cost of
3163 if (cand
->pos
== IP_ORIGINAL
3164 && cand
->incremented_at
== use
->stmt
)
3166 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3170 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3171 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3173 return cost
!= INFTY
;
3176 /* Determines cost of basing replacement of USE on CAND in an address. */
3179 determine_use_iv_cost_address (struct ivopts_data
*data
,
3180 struct iv_use
*use
, struct iv_cand
*cand
)
3183 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3185 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3187 return cost
!= INFTY
;
3190 /* Computes value of induction variable IV in iteration NITER. */
3193 iv_value (struct iv
*iv
, tree niter
)
3196 tree type
= TREE_TYPE (iv
->base
);
3198 niter
= fold_convert (type
, niter
);
3199 val
= fold (build2 (MULT_EXPR
, type
, iv
->step
, niter
));
3201 return fold (build2 (PLUS_EXPR
, type
, iv
->base
, val
));
3204 /* Computes value of candidate CAND at position AT in iteration NITER. */
3207 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3209 tree val
= iv_value (cand
->iv
, niter
);
3210 tree type
= TREE_TYPE (cand
->iv
->base
);
3212 if (stmt_after_increment (loop
, cand
, at
))
3213 val
= fold (build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
));
3218 /* Check whether it is possible to express the condition in USE by comparison
3219 of candidate CAND. If so, store the comparison code to COMPARE and the
3220 value compared with to BOUND. */
3223 may_eliminate_iv (struct loop
*loop
,
3224 struct iv_use
*use
, struct iv_cand
*cand
,
3225 enum tree_code
*compare
, tree
*bound
)
3229 struct tree_niter_desc niter
, new_niter
;
3230 tree wider_type
, type
, base
;
3232 /* For now works only for exits that dominate the loop latch. TODO -- extend
3233 for other conditions inside loop body. */
3234 ex_bb
= bb_for_stmt (use
->stmt
);
3235 if (use
->stmt
!= last_stmt (ex_bb
)
3236 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3238 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3241 exit
= EDGE_SUCC (ex_bb
, 0);
3242 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3243 exit
= EDGE_SUCC (ex_bb
, 1);
3244 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3247 niter
.niter
= NULL_TREE
;
3248 number_of_iterations_exit (loop
, exit
, &niter
);
3250 || !integer_nonzerop (niter
.assumptions
)
3251 || !integer_zerop (niter
.may_be_zero
))
3254 if (exit
->flags
& EDGE_TRUE_VALUE
)
3259 *bound
= cand_value_at (loop
, cand
, use
->stmt
, niter
.niter
);
3261 /* Let us check there is not some problem with overflows, by checking that
3262 the number of iterations is unchanged. */
3263 base
= cand
->iv
->base
;
3264 type
= TREE_TYPE (base
);
3265 if (stmt_after_increment (loop
, cand
, use
->stmt
))
3266 base
= fold (build2 (PLUS_EXPR
, type
, base
, cand
->iv
->step
));
3268 new_niter
.niter
= NULL_TREE
;
3269 number_of_iterations_cond (TREE_TYPE (cand
->iv
->base
), base
,
3270 cand
->iv
->step
, NE_EXPR
, *bound
, NULL_TREE
,
3272 if (!new_niter
.niter
3273 || !integer_nonzerop (new_niter
.assumptions
)
3274 || !integer_zerop (new_niter
.may_be_zero
))
3277 wider_type
= TREE_TYPE (new_niter
.niter
);
3278 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (TREE_TYPE (niter
.niter
)))
3279 wider_type
= TREE_TYPE (niter
.niter
);
3280 if (!operand_equal_p (fold_convert (wider_type
, niter
.niter
),
3281 fold_convert (wider_type
, new_niter
.niter
), 0))
3287 /* Determines cost of basing replacement of USE on CAND in a condition. */
3290 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3291 struct iv_use
*use
, struct iv_cand
*cand
)
3294 enum tree_code compare
;
3296 /* Only consider real candidates. */
3299 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3303 if (may_eliminate_iv (data
->current_loop
, use
, cand
, &compare
, &bound
))
3305 bitmap depends_on
= NULL
;
3306 unsigned cost
= force_var_cost (data
, bound
, &depends_on
);
3308 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3309 return cost
!= INFTY
;
3312 /* The induction variable elimination failed; just express the original
3313 giv. If it is compared with an invariant, note that we cannot get
3315 if (TREE_CODE (*use
->op_p
) == SSA_NAME
)
3316 record_invariant (data
, *use
->op_p
, true);
3319 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 0), true);
3320 record_invariant (data
, TREE_OPERAND (*use
->op_p
, 1), true);
3323 return determine_use_iv_cost_generic (data
, use
, cand
);
3326 /* Checks whether it is possible to replace the final value of USE by
3327 a direct computation. If so, the formula is stored to *VALUE. */
3330 may_replace_final_value (struct loop
*loop
, struct iv_use
*use
, tree
*value
)
3333 struct tree_niter_desc
*niter
;
3335 exit
= single_dom_exit (loop
);
3339 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
3340 bb_for_stmt (use
->stmt
)));
3342 niter
= &loop_data (loop
)->niter
;
3344 || !operand_equal_p (niter
->assumptions
, boolean_true_node
, 0)
3345 || !operand_equal_p (niter
->may_be_zero
, boolean_false_node
, 0))
3348 *value
= iv_value (use
->iv
, niter
->niter
);
3353 /* Determines cost of replacing final value of USE using CAND. */
3356 determine_use_iv_cost_outer (struct ivopts_data
*data
,
3357 struct iv_use
*use
, struct iv_cand
*cand
)
3363 struct loop
*loop
= data
->current_loop
;
3365 /* The simple case first -- if we need to express value of the preserved
3366 original biv, the cost is 0. This also prevents us from counting the
3367 cost of increment twice -- once at this use and once in the cost of
3369 if (cand
->pos
== IP_ORIGINAL
3370 && cand
->incremented_at
== use
->stmt
)
3372 set_use_iv_cost (data
, use
, cand
, 0, NULL
);
3378 if (!may_replace_final_value (loop
, use
, &value
))
3380 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
);
3385 cost
= force_var_cost (data
, value
, &depends_on
);
3387 cost
/= AVG_LOOP_NITER (loop
);
3389 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3390 return cost
!= INFTY
;
3393 exit
= single_dom_exit (loop
);
3396 /* If there is just a single exit, we may use value of the candidate
3397 after we take it to determine the value of use. */
3398 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
3399 last_stmt (exit
->src
));
3401 cost
/= AVG_LOOP_NITER (loop
);
3405 /* Otherwise we just need to compute the iv. */
3406 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3409 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
);
3411 return cost
!= INFTY
;
3414 /* Determines cost of basing replacement of USE on CAND. Returns false
3415 if USE cannot be based on CAND. */
3418 determine_use_iv_cost (struct ivopts_data
*data
,
3419 struct iv_use
*use
, struct iv_cand
*cand
)
3423 case USE_NONLINEAR_EXPR
:
3424 return determine_use_iv_cost_generic (data
, use
, cand
);
3427 return determine_use_iv_cost_outer (data
, use
, cand
);
3430 return determine_use_iv_cost_address (data
, use
, cand
);
3433 return determine_use_iv_cost_condition (data
, use
, cand
);
3440 /* Determines costs of basing the use of the iv on an iv candidate. */
3443 determine_use_iv_costs (struct ivopts_data
*data
)
3447 struct iv_cand
*cand
;
3448 bitmap to_clear
= BITMAP_XMALLOC ();
3450 alloc_use_cost_map (data
);
3452 for (i
= 0; i
< n_iv_uses (data
); i
++)
3454 use
= iv_use (data
, i
);
3456 if (data
->consider_all_candidates
)
3458 for (j
= 0; j
< n_iv_cands (data
); j
++)
3460 cand
= iv_cand (data
, j
);
3461 determine_use_iv_cost (data
, use
, cand
);
3468 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3470 cand
= iv_cand (data
, j
);
3471 if (!determine_use_iv_cost (data
, use
, cand
))
3472 bitmap_set_bit (to_clear
, j
);
3475 /* Remove the candidates for that the cost is infinite from
3476 the list of related candidates. */
3477 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3478 bitmap_clear (to_clear
);
3482 BITMAP_XFREE (to_clear
);
3484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3486 fprintf (dump_file
, "Use-candidate costs:\n");
3488 for (i
= 0; i
< n_iv_uses (data
); i
++)
3490 use
= iv_use (data
, i
);
3492 fprintf (dump_file
, "Use %d:\n", i
);
3493 fprintf (dump_file
, " cand\tcost\tdepends on\n");
3494 for (j
= 0; j
< use
->n_map_members
; j
++)
3496 if (!use
->cost_map
[j
].cand
3497 || use
->cost_map
[j
].cost
== INFTY
)
3500 fprintf (dump_file
, " %d\t%d\t",
3501 use
->cost_map
[j
].cand
->id
,
3502 use
->cost_map
[j
].cost
);
3503 if (use
->cost_map
[j
].depends_on
)
3504 bitmap_print (dump_file
,
3505 use
->cost_map
[j
].depends_on
, "","");
3506 fprintf (dump_file
, "\n");
3509 fprintf (dump_file
, "\n");
3511 fprintf (dump_file
, "\n");
3515 /* Determines cost of the candidate CAND. */
3518 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
3520 unsigned cost_base
, cost_step
;
3530 /* There are two costs associated with the candidate -- its increment
3531 and its initialization. The second is almost negligible for any loop
3532 that rolls enough, so we take it just very little into account. */
3534 base
= cand
->iv
->base
;
3535 cost_base
= force_var_cost (data
, base
, NULL
);
3536 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
3538 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
3540 /* Prefer the original iv unless we may gain something by replacing it. */
3541 if (cand
->pos
== IP_ORIGINAL
)
3544 /* Prefer not to insert statements into latch unless there are some
3545 already (so that we do not create unnecessary jumps). */
3546 if (cand
->pos
== IP_END
)
3548 bb
= ip_end_pos (data
->current_loop
);
3549 last
= last_stmt (bb
);
3552 || TREE_CODE (last
) == LABEL_EXPR
)
3557 /* Determines costs of computation of the candidates. */
3560 determine_iv_costs (struct ivopts_data
*data
)
3564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3566 fprintf (dump_file
, "Candidate costs:\n");
3567 fprintf (dump_file
, " cand\tcost\n");
3570 for (i
= 0; i
< n_iv_cands (data
); i
++)
3572 struct iv_cand
*cand
= iv_cand (data
, i
);
3574 determine_iv_cost (data
, cand
);
3576 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3577 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
3580 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3581 fprintf (dump_file
, "\n");
3584 /* Calculates cost for having SIZE induction variables. */
3587 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
3589 return global_cost_for_size (size
,
3590 loop_data (data
->current_loop
)->regs_used
,
3594 /* For each size of the induction variable set determine the penalty. */
3597 determine_set_costs (struct ivopts_data
*data
)
3601 struct loop
*loop
= data
->current_loop
;
3604 /* We use the following model (definitely improvable, especially the
3605 cost function -- TODO):
3607 We estimate the number of registers available (using MD data), name it A.
3609 We estimate the number of registers used by the loop, name it U. This
3610 number is obtained as the number of loop phi nodes (not counting virtual
3611 registers and bivs) + the number of variables from outside of the loop.
3613 We set a reserve R (free regs that are used for temporary computations,
3614 etc.). For now the reserve is a constant 3.
3616 Let I be the number of induction variables.
3618 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3619 make a lot of ivs without a reason).
3620 -- if A - R < U + I <= A, the cost is I * PRES_COST
3621 -- if U + I > A, the cost is I * PRES_COST and
3622 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3624 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3626 fprintf (dump_file
, "Global costs:\n");
3627 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
3628 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
3629 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
3630 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
3634 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
3636 op
= PHI_RESULT (phi
);
3638 if (!is_gimple_reg (op
))
3641 if (get_iv (data
, op
))
3647 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
3649 struct version_info
*info
= ver_info (data
, j
);
3651 if (info
->inv_id
&& info
->has_nonlin_use
)
3655 loop_data (loop
)->regs_used
= n
;
3656 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3657 fprintf (dump_file
, " regs_used %d\n", n
);
3659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3661 fprintf (dump_file
, " cost for size:\n");
3662 fprintf (dump_file
, " ivs\tcost\n");
3663 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
3664 fprintf (dump_file
, " %d\t%d\n", j
,
3665 ivopts_global_cost_for_size (data
, j
));
3666 fprintf (dump_file
, "\n");
3670 /* Returns true if A is a cheaper cost pair than B. */
3673 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
3681 if (a
->cost
< b
->cost
)
3684 if (a
->cost
> b
->cost
)
3687 /* In case the costs are the same, prefer the cheaper candidate. */
3688 if (a
->cand
->cost
< b
->cand
->cost
)
3694 /* Computes the cost field of IVS structure. */
3697 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
3701 cost
+= ivs
->cand_use_cost
;
3702 cost
+= ivs
->cand_cost
;
3703 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
3708 /* Set USE not to be expressed by any candidate in IVS. */
3711 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3714 unsigned uid
= use
->id
, cid
, iid
;
3716 struct cost_pair
*cp
;
3719 cp
= ivs
->cand_for_use
[uid
];
3725 ivs
->cand_for_use
[uid
] = NULL
;
3726 ivs
->n_cand_uses
[cid
]--;
3728 if (ivs
->n_cand_uses
[cid
] == 0)
3730 bitmap_clear_bit (ivs
->cands
, cid
);
3731 /* Do not count the pseudocandidates. */
3735 ivs
->cand_cost
-= cp
->cand
->cost
;
3738 ivs
->cand_use_cost
-= cp
->cost
;
3740 deps
= cp
->depends_on
;
3744 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3746 ivs
->n_invariant_uses
[iid
]--;
3747 if (ivs
->n_invariant_uses
[iid
] == 0)
3752 iv_ca_recount_cost (data
, ivs
);
3755 /* Set cost pair for USE in set IVS to CP. */
3758 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3759 struct iv_use
*use
, struct cost_pair
*cp
)
3761 unsigned uid
= use
->id
, cid
, iid
;
3765 if (ivs
->cand_for_use
[uid
] == cp
)
3768 if (ivs
->cand_for_use
[uid
])
3769 iv_ca_set_no_cp (data
, ivs
, use
);
3776 ivs
->cand_for_use
[uid
] = cp
;
3777 ivs
->n_cand_uses
[cid
]++;
3778 if (ivs
->n_cand_uses
[cid
] == 1)
3780 bitmap_set_bit (ivs
->cands
, cid
);
3781 /* Do not count the pseudocandidates. */
3785 ivs
->cand_cost
+= cp
->cand
->cost
;
3788 ivs
->cand_use_cost
+= cp
->cost
;
3790 deps
= cp
->depends_on
;
3794 EXECUTE_IF_SET_IN_BITMAP (deps
, 0, iid
, bi
)
3796 ivs
->n_invariant_uses
[iid
]++;
3797 if (ivs
->n_invariant_uses
[iid
] == 1)
3802 iv_ca_recount_cost (data
, ivs
);
3806 /* Extend set IVS by expressing USE by some of the candidates in it
3810 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3813 struct cost_pair
*best_cp
= NULL
, *cp
;
3817 gcc_assert (ivs
->upto
>= use
->id
);
3819 if (ivs
->upto
== use
->id
)
3825 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
3827 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
3829 if (cheaper_cost_pair (cp
, best_cp
))
3833 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
3836 /* Get cost for assignment IVS. */
3839 iv_ca_cost (struct iv_ca
*ivs
)
3841 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
3844 /* Returns true if all dependences of CP are among invariants in IVS. */
3847 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
3852 if (!cp
->depends_on
)
3855 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
3857 if (ivs
->n_invariant_uses
[i
] == 0)
3864 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3865 it before NEXT_CHANGE. */
3867 static struct iv_ca_delta
*
3868 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
3869 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
3871 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
3874 change
->old_cp
= old_cp
;
3875 change
->new_cp
= new_cp
;
3876 change
->next_change
= next_change
;
3881 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3884 static struct iv_ca_delta
*
3885 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
3887 struct iv_ca_delta
*last
;
3895 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
3897 last
->next_change
= l2
;
3902 /* Returns candidate by that USE is expressed in IVS. */
3904 static struct cost_pair
*
3905 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
3907 return ivs
->cand_for_use
[use
->id
];
3910 /* Reverse the list of changes DELTA, forming the inverse to it. */
3912 static struct iv_ca_delta
*
3913 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
3915 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
3916 struct cost_pair
*tmp
;
3918 for (act
= delta
; act
; act
= next
)
3920 next
= act
->next_change
;
3921 act
->next_change
= prev
;
3925 act
->old_cp
= act
->new_cp
;
3932 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3933 reverted instead. */
3936 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
3937 struct iv_ca_delta
*delta
, bool forward
)
3939 struct cost_pair
*from
, *to
;
3940 struct iv_ca_delta
*act
;
3943 delta
= iv_ca_delta_reverse (delta
);
3945 for (act
= delta
; act
; act
= act
->next_change
)
3949 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
3950 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
3954 iv_ca_delta_reverse (delta
);
3957 /* Returns true if CAND is used in IVS. */
3960 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
3962 return ivs
->n_cand_uses
[cand
->id
] > 0;
3965 /* Returns number of induction variable candidates in the set IVS. */
3968 iv_ca_n_cands (struct iv_ca
*ivs
)
3970 return ivs
->n_cands
;
3973 /* Free the list of changes DELTA. */
3976 iv_ca_delta_free (struct iv_ca_delta
**delta
)
3978 struct iv_ca_delta
*act
, *next
;
3980 for (act
= *delta
; act
; act
= next
)
3982 next
= act
->next_change
;
3989 /* Allocates new iv candidates assignment. */
3991 static struct iv_ca
*
3992 iv_ca_new (struct ivopts_data
*data
)
3994 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
3998 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
3999 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
4000 nw
->cands
= BITMAP_XMALLOC ();
4003 nw
->cand_use_cost
= 0;
4005 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
4011 /* Free memory occupied by the set IVS. */
4014 iv_ca_free (struct iv_ca
**ivs
)
4016 free ((*ivs
)->cand_for_use
);
4017 free ((*ivs
)->n_cand_uses
);
4018 BITMAP_XFREE ((*ivs
)->cands
);
4019 free ((*ivs
)->n_invariant_uses
);
4024 /* Dumps IVS to FILE. */
4027 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4029 const char *pref
= " invariants ";
4032 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4033 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4035 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4036 if (ivs
->n_invariant_uses
[i
])
4038 fprintf (file
, "%s%d", pref
, i
);
4041 fprintf (file
, "\n");
4044 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4045 new set, and store differences in DELTA. Number of induction variables
4046 in the new set is stored to N_IVS. */
4049 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4050 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4055 struct cost_pair
*old_cp
, *new_cp
;
4058 for (i
= 0; i
< ivs
->upto
; i
++)
4060 use
= iv_use (data
, i
);
4061 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4064 && old_cp
->cand
== cand
)
4067 new_cp
= get_use_iv_cost (data
, use
, cand
);
4071 if (!iv_ca_has_deps (ivs
, new_cp
))
4074 if (!cheaper_cost_pair (new_cp
, old_cp
))
4077 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4080 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4081 cost
= iv_ca_cost (ivs
);
4083 *n_ivs
= iv_ca_n_cands (ivs
);
4084 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4089 /* Try narrowing set IVS by removing CAND. Return the cost of
4090 the new set and store the differences in DELTA. */
4093 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4094 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4098 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4100 struct iv_cand
*cnd
;
4104 for (i
= 0; i
< n_iv_uses (data
); i
++)
4106 use
= iv_use (data
, i
);
4108 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4109 if (old_cp
->cand
!= cand
)
4114 if (data
->consider_all_candidates
)
4116 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4121 cnd
= iv_cand (data
, ci
);
4123 cp
= get_use_iv_cost (data
, use
, cnd
);
4126 if (!iv_ca_has_deps (ivs
, cp
))
4129 if (!cheaper_cost_pair (cp
, new_cp
))
4137 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4142 cnd
= iv_cand (data
, ci
);
4144 cp
= get_use_iv_cost (data
, use
, cnd
);
4147 if (!iv_ca_has_deps (ivs
, cp
))
4150 if (!cheaper_cost_pair (cp
, new_cp
))
4159 iv_ca_delta_free (delta
);
4163 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4166 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4167 cost
= iv_ca_cost (ivs
);
4168 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4173 /* Try optimizing the set of candidates IVS by removing candidates different
4174 from to EXCEPT_CAND from it. Return cost of the new set, and store
4175 differences in DELTA. */
4178 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4179 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4182 struct iv_ca_delta
*act_delta
, *best_delta
;
4183 unsigned i
, best_cost
, acost
;
4184 struct iv_cand
*cand
;
4187 best_cost
= iv_ca_cost (ivs
);
4189 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4191 cand
= iv_cand (data
, i
);
4193 if (cand
== except_cand
)
4196 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4198 if (acost
< best_cost
)
4201 iv_ca_delta_free (&best_delta
);
4202 best_delta
= act_delta
;
4205 iv_ca_delta_free (&act_delta
);
4214 /* Recurse to possibly remove other unnecessary ivs. */
4215 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4216 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4217 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4218 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4222 /* Tries to extend the sets IVS in the best possible way in order
4223 to express the USE. */
4226 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4229 unsigned best_cost
, act_cost
;
4232 struct iv_cand
*cand
;
4233 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4234 struct cost_pair
*cp
;
4236 iv_ca_add_use (data
, ivs
, use
);
4237 best_cost
= iv_ca_cost (ivs
);
4239 cp
= iv_ca_cand_for_use (ivs
, use
);
4242 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4243 iv_ca_set_no_cp (data
, ivs
, use
);
4246 /* First try important candidates. Only if it fails, try the specific ones.
4247 Rationale -- in loops with many variables the best choice often is to use
4248 just one generic biv. If we added here many ivs specific to the uses,
4249 the optimization algorithm later would be likely to get stuck in a local
4250 minimum, thus causing us to create too many ivs. The approach from
4251 few ivs to more seems more likely to be successful -- starting from few
4252 ivs, replacing an expensive use by a specific iv should always be a
4254 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4256 cand
= iv_cand (data
, i
);
4258 if (iv_ca_cand_used_p (ivs
, cand
))
4261 cp
= get_use_iv_cost (data
, use
, cand
);
4265 iv_ca_set_cp (data
, ivs
, use
, cp
);
4266 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4267 iv_ca_set_no_cp (data
, ivs
, use
);
4268 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4270 if (act_cost
< best_cost
)
4272 best_cost
= act_cost
;
4274 iv_ca_delta_free (&best_delta
);
4275 best_delta
= act_delta
;
4278 iv_ca_delta_free (&act_delta
);
4281 if (best_cost
== INFTY
)
4283 for (i
= 0; i
< use
->n_map_members
; i
++)
4285 cp
= use
->cost_map
+ i
;
4290 /* Already tried this. */
4291 if (cand
->important
)
4294 if (iv_ca_cand_used_p (ivs
, cand
))
4298 iv_ca_set_cp (data
, ivs
, use
, cp
);
4299 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4300 iv_ca_set_no_cp (data
, ivs
, use
);
4301 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4304 if (act_cost
< best_cost
)
4306 best_cost
= act_cost
;
4309 iv_ca_delta_free (&best_delta
);
4310 best_delta
= act_delta
;
4313 iv_ca_delta_free (&act_delta
);
4317 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4318 iv_ca_delta_free (&best_delta
);
4320 return (best_cost
!= INFTY
);
4323 /* Finds an initial assignment of candidates to uses. */
4325 static struct iv_ca
*
4326 get_initial_solution (struct ivopts_data
*data
)
4328 struct iv_ca
*ivs
= iv_ca_new (data
);
4331 for (i
= 0; i
< n_iv_uses (data
); i
++)
4332 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4341 /* Tries to improve set of induction variables IVS. */
4344 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4346 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4347 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4348 struct iv_cand
*cand
;
4350 /* Try extending the set of induction variables by one. */
4351 for (i
= 0; i
< n_iv_cands (data
); i
++)
4353 cand
= iv_cand (data
, i
);
4355 if (iv_ca_cand_used_p (ivs
, cand
))
4358 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4362 /* If we successfully added the candidate and the set is small enough,
4363 try optimizing it by removing other candidates. */
4364 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4366 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4367 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4368 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4369 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4372 if (acost
< best_cost
)
4375 iv_ca_delta_free (&best_delta
);
4376 best_delta
= act_delta
;
4379 iv_ca_delta_free (&act_delta
);
4384 /* Try removing the candidates from the set instead. */
4385 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4387 /* Nothing more we can do. */
4392 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4393 gcc_assert (best_cost
== iv_ca_cost (ivs
));
4394 iv_ca_delta_free (&best_delta
);
4398 /* Attempts to find the optimal set of induction variables. We do simple
4399 greedy heuristic -- we try to replace at most one candidate in the selected
4400 solution and remove the unused ivs while this improves the cost. */
4402 static struct iv_ca
*
4403 find_optimal_iv_set (struct ivopts_data
*data
)
4409 /* Get the initial solution. */
4410 set
= get_initial_solution (data
);
4413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4414 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4420 fprintf (dump_file
, "Initial set of candidates:\n");
4421 iv_ca_dump (data
, dump_file
, set
);
4424 while (try_improve_iv_set (data
, set
))
4426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4428 fprintf (dump_file
, "Improved to:\n");
4429 iv_ca_dump (data
, dump_file
, set
);
4433 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4434 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
4436 for (i
= 0; i
< n_iv_uses (data
); i
++)
4438 use
= iv_use (data
, i
);
4439 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4445 /* Creates a new induction variable corresponding to CAND. */
4448 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4450 block_stmt_iterator incr_pos
;
4460 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
4464 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
4469 /* Mark that the iv is preserved. */
4470 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4471 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4473 /* Rewrite the increment so that it uses var_before directly. */
4474 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
4479 gimple_add_tmp_var (cand
->var_before
);
4480 add_referenced_tmp_var (cand
->var_before
);
4482 base
= unshare_expr (cand
->iv
->base
);
4484 create_iv (base
, cand
->iv
->step
, cand
->var_before
, data
->current_loop
,
4485 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
4488 /* Creates new induction variables described in SET. */
4491 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
4494 struct iv_cand
*cand
;
4497 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
4499 cand
= iv_cand (data
, i
);
4500 create_new_iv (data
, cand
);
4504 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4505 is true, remove also the ssa name defined by the statement. */
4508 remove_statement (tree stmt
, bool including_defined_name
)
4510 if (TREE_CODE (stmt
) == PHI_NODE
)
4512 if (!including_defined_name
)
4514 /* Prevent the ssa name defined by the statement from being removed. */
4515 SET_PHI_RESULT (stmt
, NULL
);
4517 remove_phi_node (stmt
, NULL_TREE
, bb_for_stmt (stmt
));
4521 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
4527 /* Rewrites USE (definition of iv used in a nonlinear expression)
4528 using candidate CAND. */
4531 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
4532 struct iv_use
*use
, struct iv_cand
*cand
)
4534 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4536 tree op
, stmts
, tgt
, ass
;
4537 block_stmt_iterator bsi
, pbsi
;
4539 switch (TREE_CODE (use
->stmt
))
4542 tgt
= PHI_RESULT (use
->stmt
);
4544 /* If we should keep the biv, do not replace it. */
4545 if (name_info (data
, tgt
)->preserve_biv
)
4548 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
4549 while (!bsi_end_p (pbsi
)
4550 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
4558 tgt
= TREE_OPERAND (use
->stmt
, 0);
4559 bsi
= bsi_for_stmt (use
->stmt
);
4566 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
4568 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
4571 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4572 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
4573 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
4574 remove_statement (use
->stmt
, false);
4575 SSA_NAME_DEF_STMT (tgt
) = ass
;
4580 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4581 TREE_OPERAND (use
->stmt
, 1) = op
;
4585 /* Replaces ssa name in index IDX by its basic variable. Callback for
4589 idx_remove_ssa_names (tree base
, tree
*idx
,
4590 void *data ATTRIBUTE_UNUSED
)
4594 if (TREE_CODE (*idx
) == SSA_NAME
)
4595 *idx
= SSA_NAME_VAR (*idx
);
4597 if (TREE_CODE (base
) == ARRAY_REF
)
4599 op
= &TREE_OPERAND (base
, 2);
4601 && TREE_CODE (*op
) == SSA_NAME
)
4602 *op
= SSA_NAME_VAR (*op
);
4603 op
= &TREE_OPERAND (base
, 3);
4605 && TREE_CODE (*op
) == SSA_NAME
)
4606 *op
= SSA_NAME_VAR (*op
);
4612 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4615 unshare_and_remove_ssa_names (tree ref
)
4617 ref
= unshare_expr (ref
);
4618 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
4623 /* Rewrites base of memory access OP with expression WITH in statement
4624 pointed to by BSI. */
4627 rewrite_address_base (block_stmt_iterator
*bsi
, tree
*op
, tree with
)
4629 tree bvar
, var
, new_var
, new_name
, copy
, name
;
4632 var
= bvar
= get_base_address (*op
);
4634 if (!var
|| TREE_CODE (with
) != SSA_NAME
)
4637 gcc_assert (TREE_CODE (var
) != ALIGN_INDIRECT_REF
);
4638 gcc_assert (TREE_CODE (var
) != MISALIGNED_INDIRECT_REF
);
4639 if (TREE_CODE (var
) == INDIRECT_REF
)
4640 var
= TREE_OPERAND (var
, 0);
4641 if (TREE_CODE (var
) == SSA_NAME
)
4644 var
= SSA_NAME_VAR (var
);
4646 else if (DECL_P (var
))
4651 if (var_ann (var
)->type_mem_tag
)
4652 var
= var_ann (var
)->type_mem_tag
;
4654 /* We need to add a memory tag for the variable. But we do not want
4655 to add it to the temporary used for the computations, since this leads
4656 to problems in redundancy elimination when there are common parts
4657 in two computations referring to the different arrays. So we copy
4658 the variable to a new temporary. */
4659 copy
= build2 (MODIFY_EXPR
, void_type_node
, NULL_TREE
, with
);
4661 new_name
= duplicate_ssa_name (name
, copy
);
4664 new_var
= create_tmp_var (TREE_TYPE (with
), "ruatmp");
4665 add_referenced_tmp_var (new_var
);
4666 var_ann (new_var
)->type_mem_tag
= var
;
4667 new_name
= make_ssa_name (new_var
, copy
);
4669 TREE_OPERAND (copy
, 0) = new_name
;
4670 bsi_insert_before (bsi
, copy
, BSI_SAME_STMT
);
4676 gcc_assert (TREE_CODE (*op
) != ALIGN_INDIRECT_REF
);
4677 gcc_assert (TREE_CODE (*op
) != MISALIGNED_INDIRECT_REF
);
4679 if (TREE_CODE (*op
) == INDIRECT_REF
)
4680 orig
= REF_ORIGINAL (*op
);
4682 orig
= unshare_and_remove_ssa_names (*op
);
4684 *op
= build1 (INDIRECT_REF
, TREE_TYPE (*op
), with
);
4686 /* Record the original reference, for purposes of alias analysis. */
4687 REF_ORIGINAL (*op
) = orig
;
4690 /* Rewrites USE (address that is an iv) using candidate CAND. */
4693 rewrite_use_address (struct ivopts_data
*data
,
4694 struct iv_use
*use
, struct iv_cand
*cand
)
4696 tree comp
= unshare_expr (get_computation (data
->current_loop
,
4698 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4700 tree op
= force_gimple_operand (comp
, &stmts
, true, NULL_TREE
);
4703 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4705 rewrite_address_base (&bsi
, use
->op_p
, op
);
4708 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4712 rewrite_use_compare (struct ivopts_data
*data
,
4713 struct iv_use
*use
, struct iv_cand
*cand
)
4716 tree
*op_p
, cond
, op
, stmts
, bound
;
4717 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
4718 enum tree_code compare
;
4720 if (may_eliminate_iv (data
->current_loop
,
4721 use
, cand
, &compare
, &bound
))
4723 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
4727 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4729 *use
->op_p
= build2 (compare
, boolean_type_node
,
4730 var_at_stmt (data
->current_loop
,
4731 cand
, use
->stmt
), op
);
4732 modify_stmt (use
->stmt
);
4736 /* The induction variable elimination failed; just express the original
4738 comp
= unshare_expr (get_computation (data
->current_loop
, use
, cand
));
4741 op_p
= &TREE_OPERAND (cond
, 0);
4742 if (TREE_CODE (*op_p
) != SSA_NAME
4743 || zero_p (get_iv (data
, *op_p
)->step
))
4744 op_p
= &TREE_OPERAND (cond
, 1);
4746 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
4748 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4753 /* Ensure that operand *OP_P may be used at the end of EXIT without
4754 violating loop closed ssa form. */
4757 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
4760 struct loop
*def_loop
;
4763 use
= USE_FROM_PTR (op_p
);
4764 if (TREE_CODE (use
) != SSA_NAME
)
4767 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
4771 def_loop
= def_bb
->loop_father
;
4772 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
4775 /* Try finding a phi node that copies the value out of the loop. */
4776 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4777 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
4782 /* Create such a phi node. */
4783 tree new_name
= duplicate_ssa_name (use
, NULL
);
4785 phi
= create_phi_node (new_name
, exit
->dest
);
4786 SSA_NAME_DEF_STMT (new_name
) = phi
;
4787 add_phi_arg (phi
, use
, exit
);
4790 SET_USE (op_p
, PHI_RESULT (phi
));
4793 /* Ensure that operands of STMT may be used at the end of EXIT without
4794 violating loop closed ssa form. */
4797 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
4801 v_may_def_optype v_may_defs
;
4804 get_stmt_operands (stmt
);
4806 uses
= STMT_USE_OPS (stmt
);
4807 for (i
= 0; i
< NUM_USES (uses
); i
++)
4808 protect_loop_closed_ssa_form_use (exit
, USE_OP_PTR (uses
, i
));
4810 vuses
= STMT_VUSE_OPS (stmt
);
4811 for (i
= 0; i
< NUM_VUSES (vuses
); i
++)
4812 protect_loop_closed_ssa_form_use (exit
, VUSE_OP_PTR (vuses
, i
));
4814 v_may_defs
= STMT_V_MAY_DEF_OPS (stmt
);
4815 for (i
= 0; i
< NUM_V_MAY_DEFS (v_may_defs
); i
++)
4816 protect_loop_closed_ssa_form_use (exit
, V_MAY_DEF_OP_PTR (v_may_defs
, i
));
4819 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4820 so that they are emitted on the correct place, and so that the loop closed
4821 ssa form is preserved. */
4824 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
4826 tree_stmt_iterator tsi
;
4827 block_stmt_iterator bsi
;
4828 tree phi
, stmt
, def
, next
;
4830 if (EDGE_COUNT (exit
->dest
->preds
) > 1)
4831 split_loop_exit_edge (exit
);
4833 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
4835 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
4836 protect_loop_closed_ssa_form (exit
, tsi_stmt (tsi
));
4839 protect_loop_closed_ssa_form (exit
, stmts
);
4841 /* Ensure there is label in exit->dest, so that we can
4843 tree_block_label (exit
->dest
);
4844 bsi
= bsi_after_labels (exit
->dest
);
4845 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
4850 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
4852 next
= PHI_CHAIN (phi
);
4854 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
4856 def
= PHI_RESULT (phi
);
4857 remove_statement (phi
, false);
4858 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
4860 SSA_NAME_DEF_STMT (def
) = stmt
;
4861 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
4866 /* Rewrites the final value of USE (that is only needed outside of the loop)
4867 using candidate CAND. */
4870 rewrite_use_outer (struct ivopts_data
*data
,
4871 struct iv_use
*use
, struct iv_cand
*cand
)
4874 tree value
, op
, stmts
, tgt
;
4877 switch (TREE_CODE (use
->stmt
))
4880 tgt
= PHI_RESULT (use
->stmt
);
4883 tgt
= TREE_OPERAND (use
->stmt
, 0);
4889 exit
= single_dom_exit (data
->current_loop
);
4895 bool ok
= may_replace_final_value (data
->current_loop
, use
, &value
);
4899 value
= get_computation_at (data
->current_loop
,
4900 use
, cand
, last_stmt (exit
->src
));
4902 value
= unshare_expr (value
);
4903 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
4905 /* If we will preserve the iv anyway and we would need to perform
4906 some computation to replace the final value, do nothing. */
4907 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
4910 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
4912 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
4914 if (USE_FROM_PTR (use_p
) == tgt
)
4915 SET_USE (use_p
, op
);
4919 compute_phi_arg_on_exit (exit
, stmts
, op
);
4921 /* Enable removal of the statement. We cannot remove it directly,
4922 since we may still need the aliasing information attached to the
4923 ssa name defined by it. */
4924 name_info (data
, tgt
)->iv
->have_use_for
= false;
4928 /* If the variable is going to be preserved anyway, there is nothing to
4930 if (name_info (data
, tgt
)->preserve_biv
)
4933 /* Otherwise we just need to compute the iv. */
4934 rewrite_use_nonlinear_expr (data
, use
, cand
);
4937 /* Rewrites USE using candidate CAND. */
4940 rewrite_use (struct ivopts_data
*data
,
4941 struct iv_use
*use
, struct iv_cand
*cand
)
4945 case USE_NONLINEAR_EXPR
:
4946 rewrite_use_nonlinear_expr (data
, use
, cand
);
4950 rewrite_use_outer (data
, use
, cand
);
4954 rewrite_use_address (data
, use
, cand
);
4958 rewrite_use_compare (data
, use
, cand
);
4964 modify_stmt (use
->stmt
);
4967 /* Rewrite the uses using the selected induction variables. */
4970 rewrite_uses (struct ivopts_data
*data
)
4973 struct iv_cand
*cand
;
4976 for (i
= 0; i
< n_iv_uses (data
); i
++)
4978 use
= iv_use (data
, i
);
4979 cand
= use
->selected
;
4982 rewrite_use (data
, use
, cand
);
4986 /* Removes the ivs that are not used after rewriting. */
4989 remove_unused_ivs (struct ivopts_data
*data
)
4994 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4996 struct version_info
*info
;
4998 info
= ver_info (data
, j
);
5000 && !zero_p (info
->iv
->step
)
5002 && !info
->iv
->have_use_for
5003 && !info
->preserve_biv
)
5004 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5008 /* Frees data allocated by the optimization of a single loop. */
5011 free_loop_data (struct ivopts_data
*data
)
5016 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5018 struct version_info
*info
;
5020 info
= ver_info (data
, i
);
5024 info
->has_nonlin_use
= false;
5025 info
->preserve_biv
= false;
5028 bitmap_clear (data
->relevant
);
5029 bitmap_clear (data
->important_candidates
);
5031 for (i
= 0; i
< n_iv_uses (data
); i
++)
5033 struct iv_use
*use
= iv_use (data
, i
);
5036 BITMAP_XFREE (use
->related_cands
);
5037 for (j
= 0; j
< use
->n_map_members
; j
++)
5038 if (use
->cost_map
[j
].depends_on
)
5039 BITMAP_XFREE (use
->cost_map
[j
].depends_on
);
5040 free (use
->cost_map
);
5043 VARRAY_POP_ALL (data
->iv_uses
);
5045 for (i
= 0; i
< n_iv_cands (data
); i
++)
5047 struct iv_cand
*cand
= iv_cand (data
, i
);
5053 VARRAY_POP_ALL (data
->iv_candidates
);
5055 if (data
->version_info_size
< num_ssa_names
)
5057 data
->version_info_size
= 2 * num_ssa_names
;
5058 free (data
->version_info
);
5059 data
->version_info
= xcalloc (data
->version_info_size
,
5060 sizeof (struct version_info
));
5063 data
->max_inv_id
= 0;
5065 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (decl_rtl_to_reset
); i
++)
5067 tree obj
= VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset
, i
);
5069 SET_DECL_RTL (obj
, NULL_RTX
);
5071 VARRAY_POP_ALL (decl_rtl_to_reset
);
5074 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5078 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5082 for (i
= 1; i
< loops
->num
; i
++)
5083 if (loops
->parray
[i
])
5085 free (loops
->parray
[i
]->aux
);
5086 loops
->parray
[i
]->aux
= NULL
;
5089 free_loop_data (data
);
5090 free (data
->version_info
);
5091 BITMAP_XFREE (data
->relevant
);
5092 BITMAP_XFREE (data
->important_candidates
);
5094 VARRAY_FREE (decl_rtl_to_reset
);
5095 VARRAY_FREE (data
->iv_uses
);
5096 VARRAY_FREE (data
->iv_candidates
);
5099 /* Optimizes the LOOP. Returns true if anything changed. */
5102 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5104 bool changed
= false;
5105 struct iv_ca
*iv_ca
;
5108 data
->current_loop
= loop
;
5110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5112 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5114 exit
= single_dom_exit (loop
);
5117 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5118 exit
->src
->index
, exit
->dest
->index
);
5119 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5120 fprintf (dump_file
, "\n");
5123 fprintf (dump_file
, "\n");
5126 /* For each ssa name determines whether it behaves as an induction variable
5128 if (!find_induction_variables (data
))
5131 /* Finds interesting uses (item 1). */
5132 find_interesting_uses (data
);
5133 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5136 /* Finds candidates for the induction variables (item 2). */
5137 find_iv_candidates (data
);
5139 /* Calculates the costs (item 3, part 1). */
5140 determine_use_iv_costs (data
);
5141 determine_iv_costs (data
);
5142 determine_set_costs (data
);
5144 /* Find the optimal set of induction variables (item 3, part 2). */
5145 iv_ca
= find_optimal_iv_set (data
);
5150 /* Create the new induction variables (item 4, part 1). */
5151 create_new_ivs (data
, iv_ca
);
5152 iv_ca_free (&iv_ca
);
5154 /* Rewrite the uses (item 4, part 2). */
5155 rewrite_uses (data
);
5157 /* Remove the ivs that are unused after rewriting. */
5158 remove_unused_ivs (data
);
5160 loop_commit_inserts ();
5162 /* We have changed the structure of induction variables; it might happen
5163 that definitions in the scev database refer to some of them that were
5168 free_loop_data (data
);
5173 /* Main entry point. Optimizes induction variables in LOOPS. */
5176 tree_ssa_iv_optimize (struct loops
*loops
)
5179 struct ivopts_data data
;
5181 tree_ssa_iv_optimize_init (loops
, &data
);
5183 /* Optimize the loops starting with the innermost ones. */
5184 loop
= loops
->tree_root
;
5188 #ifdef ENABLE_CHECKING
5189 verify_loop_closed_ssa ();
5193 /* Scan the loops, inner ones first. */
5194 while (loop
!= loops
->tree_root
)
5196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5197 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5199 tree_ssa_iv_optimize_loop (&data
, loop
);
5211 #ifdef ENABLE_CHECKING
5212 verify_loop_closed_ssa ();
5216 tree_ssa_iv_optimize_finalize (loops
, &data
);