1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base
; /* Initial value of the iv. */
108 tree base_object
; /* A memory object to that the induction variable points. */
109 tree step
; /* Step of the iv (constant only). */
110 tree ssa_name
; /* The ssa name with the value. */
111 bool biv_p
; /* Is it a biv? */
112 bool have_use_for
; /* Do we already have a use for it? */
113 unsigned use_id
; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name
; /* The ssa name. */
120 struct iv
*iv
; /* Induction variable description. */
121 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id
; /* Id of an invariant. */
124 bool preserve_biv
; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
131 USE_ADDRESS
, /* Use in an address. */
132 USE_COMPARE
/* Use is a compare. */
135 /* The candidate - cost pair. */
138 struct iv_cand
*cand
; /* The candidate. */
139 unsigned cost
; /* The cost. */
140 bitmap depends_on
; /* The list of invariants that have to be
142 tree value
; /* For final value elimination, the expression for
143 the final value of the iv. For iv elimination,
144 the new bound to compare with. */
150 unsigned id
; /* The id of the use. */
151 enum use_type type
; /* Type of the use. */
152 struct iv
*iv
; /* The induction variable it is based on. */
153 tree stmt
; /* Statement in that it occurs. */
154 tree
*op_p
; /* The place where it occurs. */
155 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
158 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
159 struct cost_pair
*cost_map
;
160 /* The costs wrto the iv candidates. */
162 struct iv_cand
*selected
;
163 /* The selected candidate. */
166 /* The position where the iv is computed. */
169 IP_NORMAL
, /* At the end, just before the exit condition. */
170 IP_END
, /* At the end of the latch block. */
171 IP_ORIGINAL
/* The original biv. */
174 /* The induction variable candidate. */
177 unsigned id
; /* The number of the candidate. */
178 bool important
; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos
; /* Where it is computed. */
181 tree incremented_at
; /* For original biv, the statement where it is
183 tree var_before
; /* The variable used for it before increment. */
184 tree var_after
; /* The variable used for it after increment. */
185 struct iv
*iv
; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost
; /* Cost of the candidate. */
190 bitmap depends_on
; /* The list of invariants that are used in step of the
194 /* The data used by the induction variable optimizations. */
196 typedef struct iv_use
*iv_use_p
;
198 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
200 typedef struct iv_cand
*iv_cand_p
;
201 DEF_VEC_P(iv_cand_p
);
202 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
206 /* The currently optimized loop. */
207 struct loop
*current_loop
;
209 /* Number of registers used in it. */
212 /* Numbers of iterations for all exits of the current loop. */
213 struct pointer_map_t
*niters
;
215 /* The size of version_info array allocated. */
216 unsigned version_info_size
;
218 /* The array of information for the ssa names. */
219 struct version_info
*version_info
;
221 /* The bitmap of indices in version_info whose value was changed. */
224 /* The maximum invariant id. */
227 /* The uses of induction variables. */
228 VEC(iv_use_p
,heap
) *iv_uses
;
230 /* The candidates. */
231 VEC(iv_cand_p
,heap
) *iv_candidates
;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates
;
236 /* Whether to consider just related and important candidates when replacing a
238 bool consider_all_candidates
;
241 /* An assignment of iv candidates to uses. */
245 /* The number of uses covered by the assignment. */
248 /* Number of uses that cannot be expressed by the candidates in the set. */
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair
**cand_for_use
;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses
;
257 /* The candidates used. */
260 /* The number of candidates in the set. */
263 /* Total number of registers needed. */
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost
;
269 /* Total cost of candidates. */
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses
;
275 /* Total cost of the assignment. */
279 /* Difference of two iv candidate assignments. */
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair
*old_cp
;
289 /* A new assignment. */
290 struct cost_pair
*new_cp
;
292 /* Next change in the list. */
293 struct iv_ca_delta
*next_change
;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
316 static VEC(tree
,heap
) *decl_rtl_to_reset
;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data
*data
)
323 return VEC_length (iv_use_p
, data
->iv_uses
);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use
*
329 iv_use (struct ivopts_data
*data
, unsigned i
)
331 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data
*data
)
339 return VEC_length (iv_cand_p
, data
->iv_candidates
);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand
*
345 iv_cand (struct ivopts_data
*data
, unsigned i
)
347 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
350 /* The single loop exit if it dominates the latch, NULL otherwise. */
353 single_dom_exit (struct loop
*loop
)
355 edge exit
= single_exit (loop
);
360 if (!just_once_each_iteration_p (loop
, exit
->src
))
366 /* Dumps information about the induction variable IV to FILE. */
368 extern void dump_iv (FILE *, struct iv
*);
370 dump_iv (FILE *file
, struct iv
*iv
)
374 fprintf (file
, "ssa name ");
375 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
376 fprintf (file
, "\n");
379 fprintf (file
, " type ");
380 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
381 fprintf (file
, "\n");
385 fprintf (file
, " base ");
386 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
387 fprintf (file
, "\n");
389 fprintf (file
, " step ");
390 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
391 fprintf (file
, "\n");
395 fprintf (file
, " invariant ");
396 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
397 fprintf (file
, "\n");
402 fprintf (file
, " base object ");
403 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
404 fprintf (file
, "\n");
408 fprintf (file
, " is a biv\n");
411 /* Dumps information about the USE to FILE. */
413 extern void dump_use (FILE *, struct iv_use
*);
415 dump_use (FILE *file
, struct iv_use
*use
)
417 fprintf (file
, "use %d\n", use
->id
);
421 case USE_NONLINEAR_EXPR
:
422 fprintf (file
, " generic\n");
426 fprintf (file
, " address\n");
430 fprintf (file
, " compare\n");
437 fprintf (file
, " in statement ");
438 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
439 fprintf (file
, "\n");
441 fprintf (file
, " at position ");
443 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
444 fprintf (file
, "\n");
446 dump_iv (file
, use
->iv
);
448 if (use
->related_cands
)
450 fprintf (file
, " related candidates ");
451 dump_bitmap (file
, use
->related_cands
);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data
*);
459 dump_uses (FILE *file
, struct ivopts_data
*data
)
464 for (i
= 0; i
< n_iv_uses (data
); i
++)
466 use
= iv_use (data
, i
);
468 dump_use (file
, use
);
469 fprintf (file
, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand
*);
477 dump_cand (FILE *file
, struct iv_cand
*cand
)
479 struct iv
*iv
= cand
->iv
;
481 fprintf (file
, "candidate %d%s\n",
482 cand
->id
, cand
->important
? " (important)" : "");
484 if (cand
->depends_on
)
486 fprintf (file
, " depends on ");
487 dump_bitmap (file
, cand
->depends_on
);
492 fprintf (file
, " final value replacement\n");
499 fprintf (file
, " incremented before exit test\n");
503 fprintf (file
, " incremented at end\n");
507 fprintf (file
, " original biv\n");
514 /* Returns the info for ssa version VER. */
516 static inline struct version_info
*
517 ver_info (struct ivopts_data
*data
, unsigned ver
)
519 return data
->version_info
+ ver
;
522 /* Returns the info for ssa name NAME. */
524 static inline struct version_info
*
525 name_info (struct ivopts_data
*data
, tree name
)
527 return ver_info (data
, SSA_NAME_VERSION (name
));
530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
534 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
536 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
540 if (sbb
== loop
->latch
)
546 return stmt
== last_stmt (bb
);
549 /* Returns true if STMT if after the place where the original induction
550 variable CAND is incremented. */
553 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
555 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
556 basic_block stmt_bb
= bb_for_stmt (stmt
);
557 block_stmt_iterator bsi
;
559 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
562 if (stmt_bb
!= cand_bb
)
565 /* Scan the block from the end, since the original ivs are usually
566 incremented at the end of the loop body. */
567 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
569 if (bsi_stmt (bsi
) == cand
->incremented_at
)
571 if (bsi_stmt (bsi
) == stmt
)
576 /* Returns true if STMT if after the place where the induction variable
577 CAND is incremented in LOOP. */
580 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
588 return stmt_after_ip_normal_pos (loop
, stmt
);
591 return stmt_after_ip_original_pos (cand
, stmt
);
598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
601 abnormal_ssa_name_p (tree exp
)
606 if (TREE_CODE (exp
) != SSA_NAME
)
609 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
613 abnormal phi node. Callback for for_each_index. */
616 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
617 void *data ATTRIBUTE_UNUSED
)
619 if (TREE_CODE (base
) == ARRAY_REF
)
621 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
623 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
627 return !abnormal_ssa_name_p (*index
);
630 /* Returns true if EXPR contains a ssa name that occurs in an
631 abnormal phi node. */
634 contains_abnormal_ssa_name_p (tree expr
)
637 enum tree_code_class codeclass
;
642 code
= TREE_CODE (expr
);
643 codeclass
= TREE_CODE_CLASS (code
);
645 if (code
== SSA_NAME
)
646 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
648 if (code
== INTEGER_CST
649 || is_gimple_min_invariant (expr
))
652 if (code
== ADDR_EXPR
)
653 return !for_each_index (&TREE_OPERAND (expr
, 0),
654 idx_contains_abnormal_ssa_name_p
,
661 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
666 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
678 /* Returns tree describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
682 niter_for_exit (struct ivopts_data
*data
, edge exit
)
684 struct tree_niter_desc desc
;
690 data
->niters
= pointer_map_create ();
694 slot
= pointer_map_contains (data
->niters
, exit
);
698 /* Try to determine number of iterations. We must know it
699 unconditionally (i.e., without possibility of # of iterations
700 being zero). Also, we cannot safely work with ssa names that
701 appear in phi nodes on abnormal edges, so that we do not create
702 overlapping life ranges for them (PR 27283). */
703 if (number_of_iterations_exit (data
->current_loop
,
705 && integer_zerop (desc
.may_be_zero
)
706 && !contains_abnormal_ssa_name_p (desc
.niter
))
711 *pointer_map_insert (data
->niters
, exit
) = niter
;
714 niter
= (tree
) *slot
;
719 /* Returns tree describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
724 niter_for_single_dom_exit (struct ivopts_data
*data
)
726 edge exit
= single_dom_exit (data
->current_loop
);
731 return niter_for_exit (data
, exit
);
734 /* Initializes data structures used by the iv optimization pass, stored
738 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
740 data
->version_info_size
= 2 * num_ssa_names
;
741 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
742 data
->relevant
= BITMAP_ALLOC (NULL
);
743 data
->important_candidates
= BITMAP_ALLOC (NULL
);
744 data
->max_inv_id
= 0;
746 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
747 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
748 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
751 /* Returns a memory object to that EXPR points. In case we are able to
752 determine that it does not point to any such object, NULL is returned. */
755 determine_base_object (tree expr
)
757 enum tree_code code
= TREE_CODE (expr
);
760 /* If this is a pointer casted to any type, we need to determine
761 the base object for the pointer; so handle conversions before
762 throwing away non-pointer expressions. */
763 if (TREE_CODE (expr
) == NOP_EXPR
764 || TREE_CODE (expr
) == CONVERT_EXPR
)
765 return determine_base_object (TREE_OPERAND (expr
, 0));
767 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
776 obj
= TREE_OPERAND (expr
, 0);
777 base
= get_base_address (obj
);
782 if (TREE_CODE (base
) == INDIRECT_REF
)
783 return determine_base_object (TREE_OPERAND (base
, 0));
785 return fold_convert (ptr_type_node
,
786 build_fold_addr_expr (base
));
788 case POINTER_PLUS_EXPR
:
789 return determine_base_object (TREE_OPERAND (expr
, 0));
793 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
797 return fold_convert (ptr_type_node
, expr
);
801 /* Allocates an induction variable with given initial value BASE and step STEP
805 alloc_iv (tree base
, tree step
)
807 struct iv
*iv
= XCNEW (struct iv
);
808 gcc_assert (step
!= NULL_TREE
);
811 iv
->base_object
= determine_base_object (base
);
814 iv
->have_use_for
= false;
816 iv
->ssa_name
= NULL_TREE
;
821 /* Sets STEP and BASE for induction variable IV. */
824 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
826 struct version_info
*info
= name_info (data
, iv
);
828 gcc_assert (!info
->iv
);
830 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
831 info
->iv
= alloc_iv (base
, step
);
832 info
->iv
->ssa_name
= iv
;
835 /* Finds induction variable declaration for VAR. */
838 get_iv (struct ivopts_data
*data
, tree var
)
841 tree type
= TREE_TYPE (var
);
843 if (!POINTER_TYPE_P (type
)
844 && !INTEGRAL_TYPE_P (type
))
847 if (!name_info (data
, var
)->iv
)
849 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
852 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
853 set_iv (data
, var
, var
, build_int_cst (type
, 0));
856 return name_info (data
, var
)->iv
;
859 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
860 not define a simple affine biv with nonzero step. */
863 determine_biv_step (tree phi
)
865 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
866 tree name
= PHI_RESULT (phi
);
869 if (!is_gimple_reg (name
))
872 if (!simple_iv (loop
, phi
, name
, &iv
, true))
875 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
878 /* Finds basic ivs. */
881 find_bivs (struct ivopts_data
*data
)
883 tree phi
, step
, type
, base
;
885 struct loop
*loop
= data
->current_loop
;
887 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
889 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
892 step
= determine_biv_step (phi
);
896 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
897 base
= expand_simple_operations (base
);
898 if (contains_abnormal_ssa_name_p (base
)
899 || contains_abnormal_ssa_name_p (step
))
902 type
= TREE_TYPE (PHI_RESULT (phi
));
903 base
= fold_convert (type
, base
);
905 step
= fold_convert (type
, step
);
907 set_iv (data
, PHI_RESULT (phi
), base
, step
);
914 /* Marks basic ivs. */
917 mark_bivs (struct ivopts_data
*data
)
920 struct iv
*iv
, *incr_iv
;
921 struct loop
*loop
= data
->current_loop
;
924 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
926 iv
= get_iv (data
, PHI_RESULT (phi
));
930 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
931 incr_iv
= get_iv (data
, var
);
935 /* If the increment is in the subloop, ignore it. */
936 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
937 if (incr_bb
->loop_father
!= data
->current_loop
938 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
942 incr_iv
->biv_p
= true;
946 /* Checks whether STMT defines a linear induction variable and stores its
950 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
953 struct loop
*loop
= data
->current_loop
;
955 iv
->base
= NULL_TREE
;
956 iv
->step
= NULL_TREE
;
958 if (TREE_CODE (stmt
) != GIMPLE_MODIFY_STMT
)
961 lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
962 if (TREE_CODE (lhs
) != SSA_NAME
)
965 if (!simple_iv (loop
, stmt
, GIMPLE_STMT_OPERAND (stmt
, 1), iv
, true))
967 iv
->base
= expand_simple_operations (iv
->base
);
969 if (contains_abnormal_ssa_name_p (iv
->base
)
970 || contains_abnormal_ssa_name_p (iv
->step
))
976 /* Finds general ivs in statement STMT. */
979 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
983 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
986 set_iv (data
, GIMPLE_STMT_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
989 /* Finds general ivs in basic block BB. */
992 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
994 block_stmt_iterator bsi
;
996 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
997 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1000 /* Finds general ivs. */
1003 find_givs (struct ivopts_data
*data
)
1005 struct loop
*loop
= data
->current_loop
;
1006 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1009 for (i
= 0; i
< loop
->num_nodes
; i
++)
1010 find_givs_in_bb (data
, body
[i
]);
1014 /* For each ssa name defined in LOOP determines whether it is an induction
1015 variable and if so, its initial value and step. */
1018 find_induction_variables (struct ivopts_data
*data
)
1023 if (!find_bivs (data
))
1029 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1031 tree niter
= niter_for_single_dom_exit (data
);
1035 fprintf (dump_file
, " number of iterations ");
1036 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1037 fprintf (dump_file
, "\n\n");
1040 fprintf (dump_file
, "Induction variables:\n\n");
1042 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1044 if (ver_info (data
, i
)->iv
)
1045 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1052 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1054 static struct iv_use
*
1055 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1056 tree stmt
, enum use_type use_type
)
1058 struct iv_use
*use
= XCNEW (struct iv_use
);
1060 use
->id
= n_iv_uses (data
);
1061 use
->type
= use_type
;
1065 use
->related_cands
= BITMAP_ALLOC (NULL
);
1067 /* To avoid showing ssa name in the dumps, if it was not reset by the
1069 iv
->ssa_name
= NULL_TREE
;
1071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1072 dump_use (dump_file
, use
);
1074 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1079 /* Checks whether OP is a loop-level invariant and if so, records it.
1080 NONLINEAR_USE is true if the invariant is used in a way we do not
1081 handle specially. */
1084 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1087 struct version_info
*info
;
1089 if (TREE_CODE (op
) != SSA_NAME
1090 || !is_gimple_reg (op
))
1093 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1095 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1098 info
= name_info (data
, op
);
1100 info
->has_nonlin_use
|= nonlinear_use
;
1102 info
->inv_id
= ++data
->max_inv_id
;
1103 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1106 /* Checks whether the use OP is interesting and if so, records it. */
1108 static struct iv_use
*
1109 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1116 if (TREE_CODE (op
) != SSA_NAME
)
1119 iv
= get_iv (data
, op
);
1123 if (iv
->have_use_for
)
1125 use
= iv_use (data
, iv
->use_id
);
1127 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1131 if (integer_zerop (iv
->step
))
1133 record_invariant (data
, op
, true);
1136 iv
->have_use_for
= true;
1138 civ
= XNEW (struct iv
);
1141 stmt
= SSA_NAME_DEF_STMT (op
);
1142 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1143 || TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
1145 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1146 iv
->use_id
= use
->id
;
1151 /* Given a condition *COND_P, checks whether it is a compare of an induction
1152 variable and an invariant. If this is the case, CONTROL_VAR is set
1153 to location of the iv, BOUND to the location of the invariant,
1154 IV_VAR and IV_BOUND are set to the corresponding induction variable
1155 descriptions, and true is returned. If this is not the case,
1156 CONTROL_VAR and BOUND are set to the arguments of the condition and
1157 false is returned. */
1160 extract_cond_operands (struct ivopts_data
*data
, tree
*cond_p
,
1161 tree
**control_var
, tree
**bound
,
1162 struct iv
**iv_var
, struct iv
**iv_bound
)
1164 /* The nodes returned when COND has just one operand. Note that you should
1165 not modify anything in BOUND or IV_BOUND because of this. */
1166 static struct iv const_iv
;
1168 tree cond
= *cond_p
;
1169 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1170 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1173 zero
= integer_zero_node
;
1174 const_iv
.step
= integer_zero_node
;
1176 if (TREE_CODE (cond
) == SSA_NAME
)
1179 iv0
= get_iv (data
, cond
);
1180 ret
= (iv0
&& !integer_zerop (iv0
->step
));
1184 if (!COMPARISON_CLASS_P (cond
))
1190 op0
= &TREE_OPERAND (cond
, 0);
1191 op1
= &TREE_OPERAND (cond
, 1);
1192 if (TREE_CODE (*op0
) == SSA_NAME
)
1193 iv0
= get_iv (data
, *op0
);
1194 if (TREE_CODE (*op1
) == SSA_NAME
)
1195 iv1
= get_iv (data
, *op1
);
1197 /* Exactly one of the compared values must be an iv, and the other one must
1202 if (integer_zerop (iv0
->step
))
1204 /* Control variable may be on the other side. */
1205 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1206 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1208 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1212 *control_var
= op0
;;
1223 /* Checks whether the condition *COND_P in STMT is interesting
1224 and if so, records it. */
1227 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1229 tree
*var_p
, *bound_p
;
1230 struct iv
*var_iv
, *civ
;
1232 if (!extract_cond_operands (data
, cond_p
, &var_p
, &bound_p
, &var_iv
, NULL
))
1234 find_interesting_uses_op (data
, *var_p
);
1235 find_interesting_uses_op (data
, *bound_p
);
1239 civ
= XNEW (struct iv
);
1241 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1244 /* Returns true if expression EXPR is obviously invariant in LOOP,
1245 i.e. if all its operands are defined outside of the LOOP. */
1248 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1253 if (is_gimple_min_invariant (expr
))
1256 if (TREE_CODE (expr
) == SSA_NAME
)
1258 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1260 && flow_bb_inside_loop_p (loop
, def_bb
))
1266 if (!EXPR_P (expr
) && !GIMPLE_STMT_P (expr
))
1269 len
= TREE_OPERAND_LENGTH (expr
);
1270 for (i
= 0; i
< len
; i
++)
1271 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1277 /* Cumulates the steps of indices into DATA and replaces their values with the
1278 initial ones. Returns false when the value of the index cannot be determined.
1279 Callback for for_each_index. */
1281 struct ifs_ivopts_data
1283 struct ivopts_data
*ivopts_data
;
1289 idx_find_step (tree base
, tree
*idx
, void *data
)
1291 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1293 tree step
, iv_base
, iv_step
, lbound
, off
;
1294 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1296 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1297 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1300 /* If base is a component ref, require that the offset of the reference
1302 if (TREE_CODE (base
) == COMPONENT_REF
)
1304 off
= component_ref_field_offset (base
);
1305 return expr_invariant_in_loop_p (loop
, off
);
1308 /* If base is array, first check whether we will be able to move the
1309 reference out of the loop (in order to take its address in strength
1310 reduction). In order for this to work we need both lower bound
1311 and step to be loop invariants. */
1312 if (TREE_CODE (base
) == ARRAY_REF
)
1314 step
= array_ref_element_size (base
);
1315 lbound
= array_ref_low_bound (base
);
1317 if (!expr_invariant_in_loop_p (loop
, step
)
1318 || !expr_invariant_in_loop_p (loop
, lbound
))
1322 if (TREE_CODE (*idx
) != SSA_NAME
)
1325 iv
= get_iv (dta
->ivopts_data
, *idx
);
1329 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1330 *&x[0], which is not folded and does not trigger the
1331 ARRAY_REF path below. */
1334 if (integer_zerop (iv
->step
))
1337 if (TREE_CODE (base
) == ARRAY_REF
)
1339 step
= array_ref_element_size (base
);
1341 /* We only handle addresses whose step is an integer constant. */
1342 if (TREE_CODE (step
) != INTEGER_CST
)
1346 /* The step for pointer arithmetics already is 1 byte. */
1347 step
= build_int_cst (sizetype
, 1);
1351 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1352 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1355 /* The index might wrap. */
1359 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1360 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1365 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1366 object is passed to it in DATA. */
1369 idx_record_use (tree base
, tree
*idx
,
1372 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1373 find_interesting_uses_op (data
, *idx
);
1374 if (TREE_CODE (base
) == ARRAY_REF
)
1376 find_interesting_uses_op (data
, array_ref_element_size (base
));
1377 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1382 /* Returns true if memory reference REF may be unaligned. */
1385 may_be_unaligned_p (tree ref
)
1389 HOST_WIDE_INT bitsize
;
1390 HOST_WIDE_INT bitpos
;
1392 enum machine_mode mode
;
1393 int unsignedp
, volatilep
;
1394 unsigned base_align
;
1396 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1397 thus they are not misaligned. */
1398 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1401 /* The test below is basically copy of what expr.c:normal_inner_ref
1402 does to check whether the object must be loaded by parts when
1403 STRICT_ALIGNMENT is true. */
1404 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1405 &unsignedp
, &volatilep
, true);
1406 base_type
= TREE_TYPE (base
);
1407 base_align
= TYPE_ALIGN (base_type
);
1410 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1411 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1412 || bitpos
% BITS_PER_UNIT
!= 0))
1418 /* Return true if EXPR may be non-addressable. */
1421 may_be_nonaddressable_p (tree expr
)
1423 switch (TREE_CODE (expr
))
1426 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1427 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1430 case ARRAY_RANGE_REF
:
1431 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1433 case VIEW_CONVERT_EXPR
:
1434 /* This kind of view-conversions may wrap non-addressable objects
1435 and make them look addressable. After some processing the
1436 non-addressability may be uncovered again, causing ADDR_EXPRs
1437 of inappropriate objects to be built. */
1438 return AGGREGATE_TYPE_P (TREE_TYPE (expr
))
1439 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr
, 0)));
1448 /* Finds addresses in *OP_P inside STMT. */
1451 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1453 tree base
= *op_p
, step
= build_int_cst (sizetype
, 0);
1455 struct ifs_ivopts_data ifs_ivopts_data
;
1457 /* Do not play with volatile memory references. A bit too conservative,
1458 perhaps, but safe. */
1459 if (stmt_ann (stmt
)->has_volatile_ops
)
1462 /* Ignore bitfields for now. Not really something terribly complicated
1464 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1467 if (may_be_nonaddressable_p (base
))
1470 if (STRICT_ALIGNMENT
1471 && may_be_unaligned_p (base
))
1474 base
= unshare_expr (base
);
1476 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1478 tree type
= build_pointer_type (TREE_TYPE (base
));
1482 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1484 civ
= get_iv (data
, TMR_BASE (base
));
1488 TMR_BASE (base
) = civ
->base
;
1491 if (TMR_INDEX (base
)
1492 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1494 civ
= get_iv (data
, TMR_INDEX (base
));
1498 TMR_INDEX (base
) = civ
->base
;
1503 if (TMR_STEP (base
))
1504 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1506 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1510 if (integer_zerop (step
))
1512 base
= tree_mem_ref_addr (type
, base
);
1516 ifs_ivopts_data
.ivopts_data
= data
;
1517 ifs_ivopts_data
.stmt
= stmt
;
1518 ifs_ivopts_data
.step
= build_int_cst (sizetype
, 0);
1519 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1520 || integer_zerop (ifs_ivopts_data
.step
))
1522 step
= ifs_ivopts_data
.step
;
1524 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1525 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1527 base
= build_fold_addr_expr (base
);
1529 /* Substituting bases of IVs into the base expression might
1530 have caused folding opportunities. */
1531 if (TREE_CODE (base
) == ADDR_EXPR
)
1533 tree
*ref
= &TREE_OPERAND (base
, 0);
1534 while (handled_component_p (*ref
))
1535 ref
= &TREE_OPERAND (*ref
, 0);
1536 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1537 *ref
= fold_indirect_ref (*ref
);
1541 civ
= alloc_iv (base
, step
);
1542 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1546 for_each_index (op_p
, idx_record_use
, data
);
1549 /* Finds and records invariants used in STMT. */
1552 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1555 use_operand_p use_p
;
1558 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1560 op
= USE_FROM_PTR (use_p
);
1561 record_invariant (data
, op
, false);
1565 /* Finds interesting uses of induction variables in the statement STMT. */
1568 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1573 use_operand_p use_p
;
1575 find_invariants_stmt (data
, stmt
);
1577 if (TREE_CODE (stmt
) == COND_EXPR
)
1579 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1583 if (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
)
1585 lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
1586 rhs
= GIMPLE_STMT_OPERAND (stmt
, 1);
1588 if (TREE_CODE (lhs
) == SSA_NAME
)
1590 /* If the statement defines an induction variable, the uses are not
1591 interesting by themselves. */
1593 iv
= get_iv (data
, lhs
);
1595 if (iv
&& !integer_zerop (iv
->step
))
1599 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1601 case tcc_comparison
:
1602 find_interesting_uses_cond (data
, stmt
,
1603 &GIMPLE_STMT_OPERAND (stmt
, 1));
1607 find_interesting_uses_address (data
, stmt
,
1608 &GIMPLE_STMT_OPERAND (stmt
, 1));
1609 if (REFERENCE_CLASS_P (lhs
))
1610 find_interesting_uses_address (data
, stmt
,
1611 &GIMPLE_STMT_OPERAND (stmt
, 0));
1617 if (REFERENCE_CLASS_P (lhs
)
1618 && is_gimple_val (rhs
))
1620 find_interesting_uses_address (data
, stmt
,
1621 &GIMPLE_STMT_OPERAND (stmt
, 0));
1622 find_interesting_uses_op (data
, rhs
);
1626 /* TODO -- we should also handle address uses of type
1628 memory = call (whatever);
1635 if (TREE_CODE (stmt
) == PHI_NODE
1636 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1638 lhs
= PHI_RESULT (stmt
);
1639 iv
= get_iv (data
, lhs
);
1641 if (iv
&& !integer_zerop (iv
->step
))
1645 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1647 op
= USE_FROM_PTR (use_p
);
1649 if (TREE_CODE (op
) != SSA_NAME
)
1652 iv
= get_iv (data
, op
);
1656 find_interesting_uses_op (data
, op
);
1660 /* Finds interesting uses of induction variables outside of loops
1661 on loop exit edge EXIT. */
1664 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1668 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1670 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1671 if (is_gimple_reg (def
))
1672 find_interesting_uses_op (data
, def
);
1676 /* Finds uses of the induction variables that are interesting. */
1679 find_interesting_uses (struct ivopts_data
*data
)
1682 block_stmt_iterator bsi
;
1684 basic_block
*body
= get_loop_body (data
->current_loop
);
1686 struct version_info
*info
;
1689 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1690 fprintf (dump_file
, "Uses:\n\n");
1692 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1697 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1698 if (e
->dest
!= EXIT_BLOCK_PTR
1699 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1700 find_interesting_uses_outside (data
, e
);
1702 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1703 find_interesting_uses_stmt (data
, phi
);
1704 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1705 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1712 fprintf (dump_file
, "\n");
1714 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1716 info
= ver_info (data
, i
);
1719 fprintf (dump_file
, " ");
1720 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1721 fprintf (dump_file
, " is invariant (%d)%s\n",
1722 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1726 fprintf (dump_file
, "\n");
1732 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1733 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1734 we are at the top-level of the processed address. */
1737 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1738 unsigned HOST_WIDE_INT
*offset
)
1740 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1741 enum tree_code code
;
1742 tree type
, orig_type
= TREE_TYPE (expr
);
1743 unsigned HOST_WIDE_INT off0
, off1
, st
;
1744 tree orig_expr
= expr
;
1748 type
= TREE_TYPE (expr
);
1749 code
= TREE_CODE (expr
);
1755 if (!cst_and_fits_in_hwi (expr
)
1756 || integer_zerop (expr
))
1759 *offset
= int_cst_value (expr
);
1760 return build_int_cst (orig_type
, 0);
1764 op0
= TREE_OPERAND (expr
, 0);
1765 op1
= TREE_OPERAND (expr
, 1);
1767 op0
= strip_offset_1 (op0
, false, false, &off0
);
1768 op1
= strip_offset_1 (op1
, false, false, &off1
);
1770 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1771 if (op0
== TREE_OPERAND (expr
, 0)
1772 && op1
== TREE_OPERAND (expr
, 1))
1775 if (integer_zerop (op1
))
1777 else if (integer_zerop (op0
))
1779 if (code
== PLUS_EXPR
)
1782 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1785 expr
= fold_build2 (code
, type
, op0
, op1
);
1787 return fold_convert (orig_type
, expr
);
1793 step
= array_ref_element_size (expr
);
1794 if (!cst_and_fits_in_hwi (step
))
1797 st
= int_cst_value (step
);
1798 op1
= TREE_OPERAND (expr
, 1);
1799 op1
= strip_offset_1 (op1
, false, false, &off1
);
1800 *offset
= off1
* st
;
1803 && integer_zerop (op1
))
1805 /* Strip the component reference completely. */
1806 op0
= TREE_OPERAND (expr
, 0);
1807 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1817 tmp
= component_ref_field_offset (expr
);
1819 && cst_and_fits_in_hwi (tmp
))
1821 /* Strip the component reference completely. */
1822 op0
= TREE_OPERAND (expr
, 0);
1823 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1824 *offset
= off0
+ int_cst_value (tmp
);
1830 op0
= TREE_OPERAND (expr
, 0);
1831 op0
= strip_offset_1 (op0
, true, true, &off0
);
1834 if (op0
== TREE_OPERAND (expr
, 0))
1837 expr
= build_fold_addr_expr (op0
);
1838 return fold_convert (orig_type
, expr
);
1841 inside_addr
= false;
1848 /* Default handling of expressions for that we want to recurse into
1849 the first operand. */
1850 op0
= TREE_OPERAND (expr
, 0);
1851 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1854 if (op0
== TREE_OPERAND (expr
, 0)
1855 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1858 expr
= copy_node (expr
);
1859 TREE_OPERAND (expr
, 0) = op0
;
1861 TREE_OPERAND (expr
, 1) = op1
;
1863 /* Inside address, we might strip the top level component references,
1864 thus changing type of the expression. Handling of ADDR_EXPR
1866 expr
= fold_convert (orig_type
, expr
);
1871 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1874 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1876 return strip_offset_1 (expr
, false, false, offset
);
1879 /* Returns variant of TYPE that can be used as base for different uses.
1880 We return unsigned type with the same precision, which avoids problems
1884 generic_type_for (tree type
)
1886 if (POINTER_TYPE_P (type
))
1887 return unsigned_type_for (type
);
1889 if (TYPE_UNSIGNED (type
))
1892 return unsigned_type_for (type
);
1895 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1896 the bitmap to that we should store it. */
1898 static struct ivopts_data
*fd_ivopts_data
;
1900 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1902 bitmap
*depends_on
= (bitmap
*) data
;
1903 struct version_info
*info
;
1905 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1907 info
= name_info (fd_ivopts_data
, *expr_p
);
1909 if (!info
->inv_id
|| info
->has_nonlin_use
)
1913 *depends_on
= BITMAP_ALLOC (NULL
);
1914 bitmap_set_bit (*depends_on
, info
->inv_id
);
1919 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1920 position to POS. If USE is not NULL, the candidate is set as related to
1921 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1922 replacement of the final value of the iv by a direct computation. */
1924 static struct iv_cand
*
1925 add_candidate_1 (struct ivopts_data
*data
,
1926 tree base
, tree step
, bool important
, enum iv_position pos
,
1927 struct iv_use
*use
, tree incremented_at
)
1930 struct iv_cand
*cand
= NULL
;
1931 tree type
, orig_type
;
1935 orig_type
= TREE_TYPE (base
);
1936 type
= generic_type_for (orig_type
);
1937 if (type
!= orig_type
)
1939 base
= fold_convert (type
, base
);
1940 step
= fold_convert (type
, step
);
1944 for (i
= 0; i
< n_iv_cands (data
); i
++)
1946 cand
= iv_cand (data
, i
);
1948 if (cand
->pos
!= pos
)
1951 if (cand
->incremented_at
!= incremented_at
)
1965 if (operand_equal_p (base
, cand
->iv
->base
, 0)
1966 && operand_equal_p (step
, cand
->iv
->step
, 0))
1970 if (i
== n_iv_cands (data
))
1972 cand
= XCNEW (struct iv_cand
);
1978 cand
->iv
= alloc_iv (base
, step
);
1981 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1983 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
1984 cand
->var_after
= cand
->var_before
;
1986 cand
->important
= important
;
1987 cand
->incremented_at
= incremented_at
;
1988 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
1991 && TREE_CODE (step
) != INTEGER_CST
)
1993 fd_ivopts_data
= data
;
1994 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
1997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1998 dump_cand (dump_file
, cand
);
2001 if (important
&& !cand
->important
)
2003 cand
->important
= true;
2004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2005 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2010 bitmap_set_bit (use
->related_cands
, i
);
2011 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2012 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2019 /* Returns true if incrementing the induction variable at the end of the LOOP
2022 The purpose is to avoid splitting latch edge with a biv increment, thus
2023 creating a jump, possibly confusing other optimization passes and leaving
2024 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2025 is not available (so we do not have a better alternative), or if the latch
2026 edge is already nonempty. */
2029 allow_ip_end_pos_p (struct loop
*loop
)
2031 if (!ip_normal_pos (loop
))
2034 if (!empty_block_p (ip_end_pos (loop
)))
2040 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2041 position to POS. If USE is not NULL, the candidate is set as related to
2042 it. The candidate computation is scheduled on all available positions. */
2045 add_candidate (struct ivopts_data
*data
,
2046 tree base
, tree step
, bool important
, struct iv_use
*use
)
2048 if (ip_normal_pos (data
->current_loop
))
2049 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2050 if (ip_end_pos (data
->current_loop
)
2051 && allow_ip_end_pos_p (data
->current_loop
))
2052 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2055 /* Add a standard "0 + 1 * iteration" iv candidate for a
2056 type with SIZE bits. */
2059 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2062 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2063 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2067 /* Adds standard iv candidates. */
2070 add_standard_iv_candidates (struct ivopts_data
*data
)
2072 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2074 /* The same for a double-integer type if it is still fast enough. */
2075 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2076 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2080 /* Adds candidates bases on the old induction variable IV. */
2083 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2086 struct iv_cand
*cand
;
2088 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2090 /* The same, but with initial value zero. */
2091 add_candidate (data
,
2092 build_int_cst (TREE_TYPE (iv
->base
), 0),
2093 iv
->step
, true, NULL
);
2095 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2096 if (TREE_CODE (phi
) == PHI_NODE
)
2098 /* Additionally record the possibility of leaving the original iv
2100 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2101 cand
= add_candidate_1 (data
,
2102 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2103 SSA_NAME_DEF_STMT (def
));
2104 cand
->var_before
= iv
->ssa_name
;
2105 cand
->var_after
= def
;
2109 /* Adds candidates based on the old induction variables. */
2112 add_old_ivs_candidates (struct ivopts_data
*data
)
2118 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2120 iv
= ver_info (data
, i
)->iv
;
2121 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2122 add_old_iv_candidates (data
, iv
);
2126 /* Adds candidates based on the value of the induction variable IV and USE. */
2129 add_iv_value_candidates (struct ivopts_data
*data
,
2130 struct iv
*iv
, struct iv_use
*use
)
2132 unsigned HOST_WIDE_INT offset
;
2135 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2137 /* The same, but with initial value zero. Make such variable important,
2138 since it is generic enough so that possibly many uses may be based
2140 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2141 iv
->step
, true, use
);
2143 /* Third, try removing the constant offset. */
2144 base
= strip_offset (iv
->base
, &offset
);
2146 add_candidate (data
, base
, iv
->step
, false, use
);
2149 /* Adds candidates based on the uses. */
2152 add_derived_ivs_candidates (struct ivopts_data
*data
)
2156 for (i
= 0; i
< n_iv_uses (data
); i
++)
2158 struct iv_use
*use
= iv_use (data
, i
);
2165 case USE_NONLINEAR_EXPR
:
2168 /* Just add the ivs based on the value of the iv used here. */
2169 add_iv_value_candidates (data
, use
->iv
, use
);
2178 /* Record important candidates and add them to related_cands bitmaps
2182 record_important_candidates (struct ivopts_data
*data
)
2187 for (i
= 0; i
< n_iv_cands (data
); i
++)
2189 struct iv_cand
*cand
= iv_cand (data
, i
);
2191 if (cand
->important
)
2192 bitmap_set_bit (data
->important_candidates
, i
);
2195 data
->consider_all_candidates
= (n_iv_cands (data
)
2196 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2198 if (data
->consider_all_candidates
)
2200 /* We will not need "related_cands" bitmaps in this case,
2201 so release them to decrease peak memory consumption. */
2202 for (i
= 0; i
< n_iv_uses (data
); i
++)
2204 use
= iv_use (data
, i
);
2205 BITMAP_FREE (use
->related_cands
);
2210 /* Add important candidates to the related_cands bitmaps. */
2211 for (i
= 0; i
< n_iv_uses (data
); i
++)
2212 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2213 data
->important_candidates
);
2217 /* Finds the candidates for the induction variables. */
2220 find_iv_candidates (struct ivopts_data
*data
)
2222 /* Add commonly used ivs. */
2223 add_standard_iv_candidates (data
);
2225 /* Add old induction variables. */
2226 add_old_ivs_candidates (data
);
2228 /* Add induction variables derived from uses. */
2229 add_derived_ivs_candidates (data
);
2231 /* Record the important candidates. */
2232 record_important_candidates (data
);
2235 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2236 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2237 we allocate a simple list to every use. */
2240 alloc_use_cost_map (struct ivopts_data
*data
)
2242 unsigned i
, size
, s
, j
;
2244 for (i
= 0; i
< n_iv_uses (data
); i
++)
2246 struct iv_use
*use
= iv_use (data
, i
);
2249 if (data
->consider_all_candidates
)
2250 size
= n_iv_cands (data
);
2254 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2259 /* Round up to the power of two, so that moduling by it is fast. */
2260 for (size
= 1; size
< s
; size
<<= 1)
2264 use
->n_map_members
= size
;
2265 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2269 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2270 on invariants DEPENDS_ON and that the value used in expressing it
2274 set_use_iv_cost (struct ivopts_data
*data
,
2275 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2276 bitmap depends_on
, tree value
)
2282 BITMAP_FREE (depends_on
);
2286 if (data
->consider_all_candidates
)
2288 use
->cost_map
[cand
->id
].cand
= cand
;
2289 use
->cost_map
[cand
->id
].cost
= cost
;
2290 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2291 use
->cost_map
[cand
->id
].value
= value
;
2295 /* n_map_members is a power of two, so this computes modulo. */
2296 s
= cand
->id
& (use
->n_map_members
- 1);
2297 for (i
= s
; i
< use
->n_map_members
; i
++)
2298 if (!use
->cost_map
[i
].cand
)
2300 for (i
= 0; i
< s
; i
++)
2301 if (!use
->cost_map
[i
].cand
)
2307 use
->cost_map
[i
].cand
= cand
;
2308 use
->cost_map
[i
].cost
= cost
;
2309 use
->cost_map
[i
].depends_on
= depends_on
;
2310 use
->cost_map
[i
].value
= value
;
2313 /* Gets cost of (USE, CANDIDATE) pair. */
2315 static struct cost_pair
*
2316 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2317 struct iv_cand
*cand
)
2320 struct cost_pair
*ret
;
2325 if (data
->consider_all_candidates
)
2327 ret
= use
->cost_map
+ cand
->id
;
2334 /* n_map_members is a power of two, so this computes modulo. */
2335 s
= cand
->id
& (use
->n_map_members
- 1);
2336 for (i
= s
; i
< use
->n_map_members
; i
++)
2337 if (use
->cost_map
[i
].cand
== cand
)
2338 return use
->cost_map
+ i
;
2340 for (i
= 0; i
< s
; i
++)
2341 if (use
->cost_map
[i
].cand
== cand
)
2342 return use
->cost_map
+ i
;
2347 /* Returns estimate on cost of computing SEQ. */
2355 for (; seq
; seq
= NEXT_INSN (seq
))
2357 set
= single_set (seq
);
2359 cost
+= rtx_cost (set
, SET
);
2367 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2369 produce_memory_decl_rtl (tree obj
, int *regno
)
2374 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2376 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2377 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2378 SET_SYMBOL_REF_DECL (x
, obj
);
2379 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2380 targetm
.encode_section_info (obj
, x
, true);
2384 x
= gen_raw_REG (Pmode
, (*regno
)++);
2385 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2391 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2392 walk_tree. DATA contains the actual fake register number. */
2395 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2397 tree obj
= NULL_TREE
;
2399 int *regno
= (int *) data
;
2401 switch (TREE_CODE (*expr_p
))
2404 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2405 handled_component_p (*expr_p
);
2406 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2409 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2410 x
= produce_memory_decl_rtl (obj
, regno
);
2415 obj
= SSA_NAME_VAR (*expr_p
);
2416 if (!DECL_RTL_SET_P (obj
))
2417 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2426 if (DECL_RTL_SET_P (obj
))
2429 if (DECL_MODE (obj
) == BLKmode
)
2430 x
= produce_memory_decl_rtl (obj
, regno
);
2432 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2442 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2443 SET_DECL_RTL (obj
, x
);
2449 /* Determines cost of the computation of EXPR. */
2452 computation_cost (tree expr
)
2455 tree type
= TREE_TYPE (expr
);
2457 /* Avoid using hard regs in ways which may be unsupported. */
2458 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2460 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2462 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2466 cost
= seq_cost (seq
);
2468 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2473 /* Returns variable containing the value of candidate CAND at statement AT. */
2476 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2478 if (stmt_after_increment (loop
, cand
, stmt
))
2479 return cand
->var_after
;
2481 return cand
->var_before
;
2484 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2485 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2488 tree_int_cst_sign_bit (tree t
)
2490 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2491 unsigned HOST_WIDE_INT w
;
2493 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2494 w
= TREE_INT_CST_LOW (t
);
2497 w
= TREE_INT_CST_HIGH (t
);
2498 bitno
-= HOST_BITS_PER_WIDE_INT
;
2501 return (w
>> bitno
) & 1;
2504 /* If we can prove that TOP = cst * BOT for some constant cst,
2505 store cst to MUL and return true. Otherwise return false.
2506 The returned value is always sign-extended, regardless of the
2507 signedness of TOP and BOT. */
2510 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
2513 enum tree_code code
;
2514 double_int res
, p0
, p1
;
2515 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2520 if (operand_equal_p (top
, bot
, 0))
2522 *mul
= double_int_one
;
2526 code
= TREE_CODE (top
);
2530 mby
= TREE_OPERAND (top
, 1);
2531 if (TREE_CODE (mby
) != INTEGER_CST
)
2534 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2537 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
2543 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2544 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2547 if (code
== MINUS_EXPR
)
2548 p1
= double_int_neg (p1
);
2549 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
2553 if (TREE_CODE (bot
) != INTEGER_CST
)
2556 p0
= double_int_sext (tree_to_double_int (top
), precision
);
2557 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
2558 if (double_int_zero_p (p1
))
2560 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
2562 return double_int_zero_p (res
);
2569 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2570 same precision that is at least as wide as the precision of TYPE, stores
2571 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2575 determine_common_wider_type (tree
*a
, tree
*b
)
2577 tree wider_type
= NULL
;
2579 tree atype
= TREE_TYPE (*a
);
2581 if ((TREE_CODE (*a
) == NOP_EXPR
2582 || TREE_CODE (*a
) == CONVERT_EXPR
))
2584 suba
= TREE_OPERAND (*a
, 0);
2585 wider_type
= TREE_TYPE (suba
);
2586 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2592 if ((TREE_CODE (*b
) == NOP_EXPR
2593 || TREE_CODE (*b
) == CONVERT_EXPR
))
2595 subb
= TREE_OPERAND (*b
, 0);
2596 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2607 /* Determines the expression by that USE is expressed from induction variable
2608 CAND at statement AT in LOOP. The expression is stored in a decomposed
2609 form into AFF. Returns false if USE cannot be expressed using CAND. */
2612 get_computation_aff (struct loop
*loop
,
2613 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2614 struct affine_tree_combination
*aff
)
2616 tree ubase
= use
->iv
->base
;
2617 tree ustep
= use
->iv
->step
;
2618 tree cbase
= cand
->iv
->base
;
2619 tree cstep
= cand
->iv
->step
, cstep_common
;
2620 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2621 tree common_type
, var
;
2623 aff_tree cbase_aff
, var_aff
;
2626 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2628 /* We do not have a precision to express the values of use. */
2632 var
= var_at_stmt (loop
, cand
, at
);
2633 uutype
= unsigned_type_for (utype
);
2635 /* If the conversion is not noop, perform it. */
2636 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2638 cstep
= fold_convert (uutype
, cstep
);
2639 cbase
= fold_convert (uutype
, cbase
);
2640 var
= fold_convert (uutype
, var
);
2643 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2646 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2647 type, we achieve better folding by computing their difference in this
2648 wider type, and cast the result to UUTYPE. We do not need to worry about
2649 overflows, as all the arithmetics will in the end be performed in UUTYPE
2651 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2653 /* use = ubase - ratio * cbase + ratio * var. */
2654 tree_to_aff_combination (ubase
, common_type
, aff
);
2655 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2656 tree_to_aff_combination (var
, uutype
, &var_aff
);
2658 /* We need to shift the value if we are after the increment. */
2659 if (stmt_after_increment (loop
, cand
, at
))
2663 if (common_type
!= uutype
)
2664 cstep_common
= fold_convert (common_type
, cstep
);
2666 cstep_common
= cstep
;
2668 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2669 aff_combination_add (&cbase_aff
, &cstep_aff
);
2672 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2673 aff_combination_add (aff
, &cbase_aff
);
2674 if (common_type
!= uutype
)
2675 aff_combination_convert (aff
, uutype
);
2677 aff_combination_scale (&var_aff
, rat
);
2678 aff_combination_add (aff
, &var_aff
);
2683 /* Determines the expression by that USE is expressed from induction variable
2684 CAND at statement AT in LOOP. The computation is unshared. */
2687 get_computation_at (struct loop
*loop
,
2688 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
2691 tree type
= TREE_TYPE (use
->iv
->base
);
2693 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
2695 unshare_aff_combination (&aff
);
2696 return fold_convert (type
, aff_combination_to_tree (&aff
));
2699 /* Determines the expression by that USE is expressed from induction variable
2700 CAND in LOOP. The computation is unshared. */
2703 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2705 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2708 /* Returns cost of addition in MODE. */
2711 add_cost (enum machine_mode mode
)
2713 static unsigned costs
[NUM_MACHINE_MODES
];
2721 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2722 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2723 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
2728 cost
= seq_cost (seq
);
2734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2735 fprintf (dump_file
, "Addition in %s costs %d\n",
2736 GET_MODE_NAME (mode
), cost
);
2740 /* Entry in a hashtable of already known costs for multiplication. */
2743 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2744 enum machine_mode mode
; /* In mode. */
2745 unsigned cost
; /* The cost. */
2748 /* Counts hash value for the ENTRY. */
2751 mbc_entry_hash (const void *entry
)
2753 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
2755 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2758 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2761 mbc_entry_eq (const void *entry1
, const void *entry2
)
2763 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
2764 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
2766 return (e1
->mode
== e2
->mode
2767 && e1
->cst
== e2
->cst
);
2770 /* Returns cost of multiplication by constant CST in MODE. */
2773 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
2775 static htab_t costs
;
2776 struct mbc_entry
**cached
, act
;
2781 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2785 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2787 return (*cached
)->cost
;
2789 *cached
= XNEW (struct mbc_entry
);
2790 (*cached
)->mode
= mode
;
2791 (*cached
)->cst
= cst
;
2794 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2795 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
2799 cost
= seq_cost (seq
);
2801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2802 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2803 (int) cst
, GET_MODE_NAME (mode
), cost
);
2805 (*cached
)->cost
= cost
;
2810 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2811 validity for a memory reference accessing memory of mode MODE. */
2814 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
)
2816 #define MAX_RATIO 128
2817 static sbitmap valid_mult
[MAX_MACHINE_MODE
];
2819 if (!valid_mult
[mode
])
2821 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
2825 valid_mult
[mode
] = sbitmap_alloc (2 * MAX_RATIO
+ 1);
2826 sbitmap_zero (valid_mult
[mode
]);
2827 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2828 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2830 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
2831 if (memory_address_p (mode
, addr
))
2832 SET_BIT (valid_mult
[mode
], i
+ MAX_RATIO
);
2835 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2837 fprintf (dump_file
, " allowed multipliers:");
2838 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2839 if (TEST_BIT (valid_mult
[mode
], i
+ MAX_RATIO
))
2840 fprintf (dump_file
, " %d", (int) i
);
2841 fprintf (dump_file
, "\n");
2842 fprintf (dump_file
, "\n");
2846 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
2849 return TEST_BIT (valid_mult
[mode
], ratio
+ MAX_RATIO
);
2852 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2853 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2854 variable is omitted. Compute the cost for a memory reference that accesses
2855 a memory location of mode MEM_MODE.
2857 TODO -- there must be some better way. This all is quite crude. */
2860 get_address_cost (bool symbol_present
, bool var_present
,
2861 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
2862 enum machine_mode mem_mode
)
2864 static bool initialized
[MAX_MACHINE_MODE
];
2865 static HOST_WIDE_INT rat
[MAX_MACHINE_MODE
], off
[MAX_MACHINE_MODE
];
2866 static HOST_WIDE_INT min_offset
[MAX_MACHINE_MODE
], max_offset
[MAX_MACHINE_MODE
];
2867 static unsigned costs
[MAX_MACHINE_MODE
][2][2][2][2];
2868 unsigned cost
, acost
;
2869 bool offset_p
, ratio_p
;
2870 HOST_WIDE_INT s_offset
;
2871 unsigned HOST_WIDE_INT mask
;
2874 if (!initialized
[mem_mode
])
2877 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
2878 int old_cse_not_expected
;
2879 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
2880 rtx seq
, addr
, base
;
2883 initialized
[mem_mode
] = true;
2885 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
2887 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
2888 for (i
= start
; i
<= 1 << 20; i
<<= 1)
2890 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
2891 if (!memory_address_p (mem_mode
, addr
))
2894 max_offset
[mem_mode
] = i
== start
? 0 : i
>> 1;
2895 off
[mem_mode
] = max_offset
[mem_mode
];
2897 for (i
= start
; i
<= 1 << 20; i
<<= 1)
2899 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
2900 if (!memory_address_p (mem_mode
, addr
))
2903 min_offset
[mem_mode
] = i
== start
? 0 : -(i
>> 1);
2905 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2907 fprintf (dump_file
, "get_address_cost:\n");
2908 fprintf (dump_file
, " min offset %s %d\n",
2909 GET_MODE_NAME (mem_mode
),
2910 (int) min_offset
[mem_mode
]);
2911 fprintf (dump_file
, " max offset %s %d\n",
2912 GET_MODE_NAME (mem_mode
),
2913 (int) max_offset
[mem_mode
]);
2917 for (i
= 2; i
<= MAX_RATIO
; i
++)
2918 if (multiplier_allowed_in_address_p (i
, mem_mode
))
2924 /* Compute the cost of various addressing modes. */
2926 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
2927 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
2929 for (i
= 0; i
< 16; i
++)
2932 var_p
= (i
>> 1) & 1;
2933 off_p
= (i
>> 2) & 1;
2934 rat_p
= (i
>> 3) & 1;
2938 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
,
2939 gen_int_mode (rat
[mem_mode
], Pmode
));
2942 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
2946 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
2947 /* ??? We can run into trouble with some backends by presenting
2948 it with symbols which havn't been properly passed through
2949 targetm.encode_section_info. By setting the local bit, we
2950 enhance the probability of things working. */
2951 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
2954 base
= gen_rtx_fmt_e (CONST
, Pmode
,
2955 gen_rtx_fmt_ee (PLUS
, Pmode
,
2957 gen_int_mode (off
[mem_mode
],
2961 base
= gen_int_mode (off
[mem_mode
], Pmode
);
2966 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
2969 /* To avoid splitting addressing modes, pretend that no cse will
2971 old_cse_not_expected
= cse_not_expected
;
2972 cse_not_expected
= true;
2973 addr
= memory_address (mem_mode
, addr
);
2974 cse_not_expected
= old_cse_not_expected
;
2978 acost
= seq_cost (seq
);
2979 acost
+= address_cost (addr
, mem_mode
);
2983 costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
] = acost
;
2986 /* On some targets, it is quite expensive to load symbol to a register,
2987 which makes addresses that contain symbols look much more expensive.
2988 However, the symbol will have to be loaded in any case before the
2989 loop (and quite likely we have it in register already), so it does not
2990 make much sense to penalize them too heavily. So make some final
2991 tweaks for the SYMBOL_PRESENT modes:
2993 If VAR_PRESENT is false, and the mode obtained by changing symbol to
2994 var is cheaper, use this mode with small penalty.
2995 If VAR_PRESENT is true, try whether the mode with
2996 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2997 if this is the case, use it. */
2998 add_c
= add_cost (Pmode
);
2999 for (i
= 0; i
< 8; i
++)
3002 off_p
= (i
>> 1) & 1;
3003 rat_p
= (i
>> 2) & 1;
3005 acost
= costs
[mem_mode
][0][1][off_p
][rat_p
] + 1;
3009 if (acost
< costs
[mem_mode
][1][var_p
][off_p
][rat_p
])
3010 costs
[mem_mode
][1][var_p
][off_p
][rat_p
] = acost
;
3013 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3015 fprintf (dump_file
, "Address costs:\n");
3017 for (i
= 0; i
< 16; i
++)
3020 var_p
= (i
>> 1) & 1;
3021 off_p
= (i
>> 2) & 1;
3022 rat_p
= (i
>> 3) & 1;
3024 fprintf (dump_file
, " ");
3026 fprintf (dump_file
, "sym + ");
3028 fprintf (dump_file
, "var + ");
3030 fprintf (dump_file
, "cst + ");
3032 fprintf (dump_file
, "rat * ");
3034 acost
= costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
];
3035 fprintf (dump_file
, "index costs %d\n", acost
);
3037 fprintf (dump_file
, "\n");
3041 bits
= GET_MODE_BITSIZE (Pmode
);
3042 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3044 if ((offset
>> (bits
- 1) & 1))
3049 offset_p
= (s_offset
!= 0
3050 && min_offset
[mem_mode
] <= s_offset
3051 && s_offset
<= max_offset
[mem_mode
]);
3052 ratio_p
= (ratio
!= 1
3053 && multiplier_allowed_in_address_p (ratio
, mem_mode
));
3055 if (ratio
!= 1 && !ratio_p
)
3056 cost
+= multiply_by_cost (ratio
, Pmode
);
3058 if (s_offset
&& !offset_p
&& !symbol_present
)
3059 cost
+= add_cost (Pmode
);
3061 acost
= costs
[mem_mode
][symbol_present
][var_present
][offset_p
][ratio_p
];
3062 return cost
+ acost
;
3065 /* Estimates cost of forcing expression EXPR into a variable. */
3068 force_expr_to_var_cost (tree expr
)
3070 static bool costs_initialized
= false;
3071 static unsigned integer_cost
;
3072 static unsigned symbol_cost
;
3073 static unsigned address_cost
;
3075 unsigned cost0
, cost1
, cost
;
3076 enum machine_mode mode
;
3078 if (!costs_initialized
)
3080 tree type
= build_pointer_type (integer_type_node
);
3084 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3085 TREE_STATIC (var
) = 1;
3086 x
= produce_memory_decl_rtl (var
, NULL
);
3087 SET_DECL_RTL (var
, x
);
3089 integer_cost
= computation_cost (build_int_cst (integer_type_node
,
3092 addr
= build1 (ADDR_EXPR
, type
, var
);
3093 symbol_cost
= computation_cost (addr
) + 1;
3096 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3098 build_int_cst (sizetype
, 2000))) + 1;
3099 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3101 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3102 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3103 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3104 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3105 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3106 fprintf (dump_file
, "\n");
3109 costs_initialized
= true;
3114 if (SSA_VAR_P (expr
))
3117 if (TREE_INVARIANT (expr
))
3119 if (TREE_CODE (expr
) == INTEGER_CST
)
3120 return integer_cost
;
3122 if (TREE_CODE (expr
) == ADDR_EXPR
)
3124 tree obj
= TREE_OPERAND (expr
, 0);
3126 if (TREE_CODE (obj
) == VAR_DECL
3127 || TREE_CODE (obj
) == PARM_DECL
3128 || TREE_CODE (obj
) == RESULT_DECL
)
3132 return address_cost
;
3135 switch (TREE_CODE (expr
))
3137 case POINTER_PLUS_EXPR
:
3141 op0
= TREE_OPERAND (expr
, 0);
3142 op1
= TREE_OPERAND (expr
, 1);
3146 if (is_gimple_val (op0
))
3149 cost0
= force_expr_to_var_cost (op0
);
3151 if (is_gimple_val (op1
))
3154 cost1
= force_expr_to_var_cost (op1
);
3159 /* Just an arbitrary value, FIXME. */
3160 return target_spill_cost
;
3163 mode
= TYPE_MODE (TREE_TYPE (expr
));
3164 switch (TREE_CODE (expr
))
3166 case POINTER_PLUS_EXPR
:
3169 cost
= add_cost (mode
);
3173 if (cst_and_fits_in_hwi (op0
))
3174 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3175 else if (cst_and_fits_in_hwi (op1
))
3176 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3178 return target_spill_cost
;
3188 /* Bound the cost by target_spill_cost. The parts of complicated
3189 computations often are either loop invariant or at least can
3190 be shared between several iv uses, so letting this grow without
3191 limits would not give reasonable results. */
3192 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3195 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3196 invariants the computation depends on. */
3199 force_var_cost (struct ivopts_data
*data
,
3200 tree expr
, bitmap
*depends_on
)
3204 fd_ivopts_data
= data
;
3205 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3208 return force_expr_to_var_cost (expr
);
3211 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3212 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3213 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3214 invariants the computation depends on. */
3217 split_address_cost (struct ivopts_data
*data
,
3218 tree addr
, bool *symbol_present
, bool *var_present
,
3219 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3222 HOST_WIDE_INT bitsize
;
3223 HOST_WIDE_INT bitpos
;
3225 enum machine_mode mode
;
3226 int unsignedp
, volatilep
;
3228 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3229 &unsignedp
, &volatilep
, false);
3232 || bitpos
% BITS_PER_UNIT
!= 0
3233 || TREE_CODE (core
) != VAR_DECL
)
3235 *symbol_present
= false;
3236 *var_present
= true;
3237 fd_ivopts_data
= data
;
3238 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3239 return target_spill_cost
;
3242 *offset
+= bitpos
/ BITS_PER_UNIT
;
3243 if (TREE_STATIC (core
)
3244 || DECL_EXTERNAL (core
))
3246 *symbol_present
= true;
3247 *var_present
= false;
3251 *symbol_present
= false;
3252 *var_present
= true;
3256 /* Estimates cost of expressing difference of addresses E1 - E2 as
3257 var + symbol + offset. The value of offset is added to OFFSET,
3258 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3259 part is missing. DEPENDS_ON is a set of the invariants the computation
3263 ptr_difference_cost (struct ivopts_data
*data
,
3264 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3265 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3267 HOST_WIDE_INT diff
= 0;
3270 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3272 if (ptr_difference_const (e1
, e2
, &diff
))
3275 *symbol_present
= false;
3276 *var_present
= false;
3280 if (e2
== integer_zero_node
)
3281 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3282 symbol_present
, var_present
, offset
, depends_on
);
3284 *symbol_present
= false;
3285 *var_present
= true;
3287 cost
= force_var_cost (data
, e1
, depends_on
);
3288 cost
+= force_var_cost (data
, e2
, depends_on
);
3289 cost
+= add_cost (Pmode
);
3294 /* Estimates cost of expressing difference E1 - E2 as
3295 var + symbol + offset. The value of offset is added to OFFSET,
3296 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3297 part is missing. DEPENDS_ON is a set of the invariants the computation
3301 difference_cost (struct ivopts_data
*data
,
3302 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3303 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3306 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3307 unsigned HOST_WIDE_INT off1
, off2
;
3309 e1
= strip_offset (e1
, &off1
);
3310 e2
= strip_offset (e2
, &off2
);
3311 *offset
+= off1
- off2
;
3316 if (TREE_CODE (e1
) == ADDR_EXPR
)
3317 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3319 *symbol_present
= false;
3321 if (operand_equal_p (e1
, e2
, 0))
3323 *var_present
= false;
3326 *var_present
= true;
3327 if (integer_zerop (e2
))
3328 return force_var_cost (data
, e1
, depends_on
);
3330 if (integer_zerop (e1
))
3332 cost
= force_var_cost (data
, e2
, depends_on
);
3333 cost
+= multiply_by_cost (-1, mode
);
3338 cost
= force_var_cost (data
, e1
, depends_on
);
3339 cost
+= force_var_cost (data
, e2
, depends_on
);
3340 cost
+= add_cost (mode
);
3345 /* Determines the cost of the computation by that USE is expressed
3346 from induction variable CAND. If ADDRESS_P is true, we just need
3347 to create an address from it, otherwise we want to get it into
3348 register. A set of invariants we depend on is stored in
3349 DEPENDS_ON. AT is the statement at that the value is computed. */
3352 get_computation_cost_at (struct ivopts_data
*data
,
3353 struct iv_use
*use
, struct iv_cand
*cand
,
3354 bool address_p
, bitmap
*depends_on
, tree at
)
3356 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3358 tree utype
= TREE_TYPE (ubase
), ctype
;
3359 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3360 HOST_WIDE_INT ratio
, aratio
;
3361 bool var_present
, symbol_present
;
3362 unsigned cost
= 0, n_sums
;
3367 /* Only consider real candidates. */
3371 cbase
= cand
->iv
->base
;
3372 cstep
= cand
->iv
->step
;
3373 ctype
= TREE_TYPE (cbase
);
3375 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3377 /* We do not have a precision to express the values of use. */
3383 /* Do not try to express address of an object with computation based
3384 on address of a different object. This may cause problems in rtl
3385 level alias analysis (that does not expect this to be happening,
3386 as this is illegal in C), and would be unlikely to be useful
3388 if (use
->iv
->base_object
3389 && cand
->iv
->base_object
3390 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3394 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3396 /* TODO -- add direct handling of this case. */
3400 /* CSTEPI is removed from the offset in case statement is after the
3401 increment. If the step is not constant, we use zero instead.
3402 This is a bit imprecise (there is the extra addition), but
3403 redundancy elimination is likely to transform the code so that
3404 it uses value of the variable before increment anyway,
3405 so it is not that much unrealistic. */
3406 if (cst_and_fits_in_hwi (cstep
))
3407 cstepi
= int_cst_value (cstep
);
3411 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3414 if (double_int_fits_in_shwi_p (rat
))
3415 ratio
= double_int_to_shwi (rat
);
3419 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3420 or ratio == 1, it is better to handle this like
3422 ubase - ratio * cbase + ratio * var
3424 (also holds in the case ratio == -1, TODO. */
3426 if (cst_and_fits_in_hwi (cbase
))
3428 offset
= - ratio
* int_cst_value (cbase
);
3429 cost
+= difference_cost (data
,
3430 ubase
, integer_zero_node
,
3431 &symbol_present
, &var_present
, &offset
,
3434 else if (ratio
== 1)
3436 cost
+= difference_cost (data
,
3438 &symbol_present
, &var_present
, &offset
,
3443 cost
+= force_var_cost (data
, cbase
, depends_on
);
3444 cost
+= add_cost (TYPE_MODE (ctype
));
3445 cost
+= difference_cost (data
,
3446 ubase
, integer_zero_node
,
3447 &symbol_present
, &var_present
, &offset
,
3451 /* If we are after the increment, the value of the candidate is higher by
3453 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3454 offset
-= ratio
* cstepi
;
3456 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3457 (symbol/var/const parts may be omitted). If we are looking for an address,
3458 find the cost of addressing this. */
3460 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
,
3461 TYPE_MODE (TREE_TYPE (*use
->op_p
)));
3463 /* Otherwise estimate the costs for computing the expression. */
3464 aratio
= ratio
> 0 ? ratio
: -ratio
;
3465 if (!symbol_present
&& !var_present
&& !offset
)
3468 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3474 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3478 /* Symbol + offset should be compile-time computable. */
3479 && (symbol_present
|| offset
))
3482 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3486 /* Just get the expression, expand it and measure the cost. */
3487 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3493 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3495 return computation_cost (comp
);
3499 /* Determines the cost of the computation by that USE is expressed
3500 from induction variable CAND. If ADDRESS_P is true, we just need
3501 to create an address from it, otherwise we want to get it into
3502 register. A set of invariants we depend on is stored in
3506 get_computation_cost (struct ivopts_data
*data
,
3507 struct iv_use
*use
, struct iv_cand
*cand
,
3508 bool address_p
, bitmap
*depends_on
)
3510 return get_computation_cost_at (data
,
3511 use
, cand
, address_p
, depends_on
, use
->stmt
);
3514 /* Determines cost of basing replacement of USE on CAND in a generic
3518 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3519 struct iv_use
*use
, struct iv_cand
*cand
)
3524 /* The simple case first -- if we need to express value of the preserved
3525 original biv, the cost is 0. This also prevents us from counting the
3526 cost of increment twice -- once at this use and once in the cost of
3528 if (cand
->pos
== IP_ORIGINAL
3529 && cand
->incremented_at
== use
->stmt
)
3531 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3535 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3536 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3538 return cost
!= INFTY
;
3541 /* Determines cost of basing replacement of USE on CAND in an address. */
3544 determine_use_iv_cost_address (struct ivopts_data
*data
,
3545 struct iv_use
*use
, struct iv_cand
*cand
)
3548 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3550 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3552 return cost
!= INFTY
;
3555 /* Computes value of candidate CAND at position AT in iteration NITER, and
3556 stores it to VAL. */
3559 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
,
3562 aff_tree step
, delta
, nit
;
3563 struct iv
*iv
= cand
->iv
;
3564 tree type
= TREE_TYPE (iv
->base
);
3566 tree_to_aff_combination (iv
->step
, type
, &step
);
3567 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
3568 aff_combination_convert (&nit
, type
);
3569 aff_combination_mult (&nit
, &step
, &delta
);
3570 if (stmt_after_increment (loop
, cand
, at
))
3571 aff_combination_add (&delta
, &step
);
3573 tree_to_aff_combination (iv
->base
, type
, val
);
3574 aff_combination_add (val
, &delta
);
3577 /* Returns period of induction variable iv. */
3580 iv_period (struct iv
*iv
)
3582 tree step
= iv
->step
, period
, type
;
3585 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3587 /* Period of the iv is gcd (step, type range). Since type range is power
3588 of two, it suffices to determine the maximum power of two that divides
3590 pow2div
= num_ending_zeros (step
);
3591 type
= unsigned_type_for (TREE_TYPE (step
));
3593 period
= build_low_bits_mask (type
,
3594 (TYPE_PRECISION (type
)
3595 - tree_low_cst (pow2div
, 1)));
3600 /* Returns the comparison operator used when eliminating the iv USE. */
3602 static enum tree_code
3603 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3605 struct loop
*loop
= data
->current_loop
;
3609 ex_bb
= bb_for_stmt (use
->stmt
);
3610 exit
= EDGE_SUCC (ex_bb
, 0);
3611 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3612 exit
= EDGE_SUCC (ex_bb
, 1);
3614 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3617 /* Check whether it is possible to express the condition in USE by comparison
3618 of candidate CAND. If so, store the value compared with to BOUND. */
3621 may_eliminate_iv (struct ivopts_data
*data
,
3622 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3627 tree wider_type
, period
, per_type
;
3628 struct loop
*loop
= data
->current_loop
;
3631 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3634 /* For now works only for exits that dominate the loop latch. TODO -- extend
3635 for other conditions inside loop body. */
3636 ex_bb
= bb_for_stmt (use
->stmt
);
3637 if (use
->stmt
!= last_stmt (ex_bb
)
3638 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3640 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3643 exit
= EDGE_SUCC (ex_bb
, 0);
3644 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3645 exit
= EDGE_SUCC (ex_bb
, 1);
3646 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3649 nit
= niter_for_exit (data
, exit
);
3653 nit_type
= TREE_TYPE (nit
);
3655 /* Determine whether we may use the variable to test whether niter iterations
3656 elapsed. This is the case iff the period of the induction variable is
3657 greater than the number of iterations. */
3658 period
= iv_period (cand
->iv
);
3661 per_type
= TREE_TYPE (period
);
3663 wider_type
= TREE_TYPE (period
);
3664 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
3665 wider_type
= per_type
;
3667 wider_type
= nit_type
;
3669 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
3670 fold_convert (wider_type
, period
),
3671 fold_convert (wider_type
, nit
))))
3674 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
3675 *bound
= aff_combination_to_tree (&bnd
);
3679 /* Determines cost of basing replacement of USE on CAND in a condition. */
3682 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3683 struct iv_use
*use
, struct iv_cand
*cand
)
3685 tree bound
= NULL_TREE
;
3687 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
3688 unsigned elim_cost
, express_cost
, cost
;
3691 /* Only consider real candidates. */
3694 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
3698 /* Try iv elimination. */
3699 if (may_eliminate_iv (data
, use
, cand
, &bound
))
3700 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
3704 /* Try expressing the original giv. If it is compared with an invariant,
3705 note that we cannot get rid of it. */
3706 ok
= extract_cond_operands (data
, use
->op_p
, NULL
, NULL
, NULL
, &cmp_iv
);
3709 express_cost
= get_computation_cost (data
, use
, cand
, false,
3710 &depends_on_express
);
3711 fd_ivopts_data
= data
;
3712 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
3714 /* Choose the better approach. */
3715 if (elim_cost
< express_cost
)
3718 depends_on
= depends_on_elim
;
3719 depends_on_elim
= NULL
;
3723 cost
= express_cost
;
3724 depends_on
= depends_on_express
;
3725 depends_on_express
= NULL
;
3729 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
3731 if (depends_on_elim
)
3732 BITMAP_FREE (depends_on_elim
);
3733 if (depends_on_express
)
3734 BITMAP_FREE (depends_on_express
);
3736 return cost
!= INFTY
;
3739 /* Determines cost of basing replacement of USE on CAND. Returns false
3740 if USE cannot be based on CAND. */
3743 determine_use_iv_cost (struct ivopts_data
*data
,
3744 struct iv_use
*use
, struct iv_cand
*cand
)
3748 case USE_NONLINEAR_EXPR
:
3749 return determine_use_iv_cost_generic (data
, use
, cand
);
3752 return determine_use_iv_cost_address (data
, use
, cand
);
3755 return determine_use_iv_cost_condition (data
, use
, cand
);
3762 /* Determines costs of basing the use of the iv on an iv candidate. */
3765 determine_use_iv_costs (struct ivopts_data
*data
)
3769 struct iv_cand
*cand
;
3770 bitmap to_clear
= BITMAP_ALLOC (NULL
);
3772 alloc_use_cost_map (data
);
3774 for (i
= 0; i
< n_iv_uses (data
); i
++)
3776 use
= iv_use (data
, i
);
3778 if (data
->consider_all_candidates
)
3780 for (j
= 0; j
< n_iv_cands (data
); j
++)
3782 cand
= iv_cand (data
, j
);
3783 determine_use_iv_cost (data
, use
, cand
);
3790 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3792 cand
= iv_cand (data
, j
);
3793 if (!determine_use_iv_cost (data
, use
, cand
))
3794 bitmap_set_bit (to_clear
, j
);
3797 /* Remove the candidates for that the cost is infinite from
3798 the list of related candidates. */
3799 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3800 bitmap_clear (to_clear
);
3804 BITMAP_FREE (to_clear
);
3806 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3808 fprintf (dump_file
, "Use-candidate costs:\n");
3810 for (i
= 0; i
< n_iv_uses (data
); i
++)
3812 use
= iv_use (data
, i
);
3814 fprintf (dump_file
, "Use %d:\n", i
);
3815 fprintf (dump_file
, " cand\tcost\tdepends on\n");
3816 for (j
= 0; j
< use
->n_map_members
; j
++)
3818 if (!use
->cost_map
[j
].cand
3819 || use
->cost_map
[j
].cost
== INFTY
)
3822 fprintf (dump_file
, " %d\t%d\t",
3823 use
->cost_map
[j
].cand
->id
,
3824 use
->cost_map
[j
].cost
);
3825 if (use
->cost_map
[j
].depends_on
)
3826 bitmap_print (dump_file
,
3827 use
->cost_map
[j
].depends_on
, "","");
3828 fprintf (dump_file
, "\n");
3831 fprintf (dump_file
, "\n");
3833 fprintf (dump_file
, "\n");
3837 /* Determines cost of the candidate CAND. */
3840 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
3842 unsigned cost_base
, cost_step
;
3851 /* There are two costs associated with the candidate -- its increment
3852 and its initialization. The second is almost negligible for any loop
3853 that rolls enough, so we take it just very little into account. */
3855 base
= cand
->iv
->base
;
3856 cost_base
= force_var_cost (data
, base
, NULL
);
3857 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
3859 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
3861 /* Prefer the original iv unless we may gain something by replacing it;
3862 this is not really relevant for artificial ivs created by other
3864 if (cand
->pos
== IP_ORIGINAL
3865 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
3868 /* Prefer not to insert statements into latch unless there are some
3869 already (so that we do not create unnecessary jumps). */
3870 if (cand
->pos
== IP_END
3871 && empty_block_p (ip_end_pos (data
->current_loop
)))
3875 /* Determines costs of computation of the candidates. */
3878 determine_iv_costs (struct ivopts_data
*data
)
3882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3884 fprintf (dump_file
, "Candidate costs:\n");
3885 fprintf (dump_file
, " cand\tcost\n");
3888 for (i
= 0; i
< n_iv_cands (data
); i
++)
3890 struct iv_cand
*cand
= iv_cand (data
, i
);
3892 determine_iv_cost (data
, cand
);
3894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3895 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
3898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3899 fprintf (dump_file
, "\n");
3902 /* Calculates cost for having SIZE induction variables. */
3905 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
3907 /* We add size to the cost, so that we prefer eliminating ivs
3909 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
);
3912 /* For each size of the induction variable set determine the penalty. */
3915 determine_set_costs (struct ivopts_data
*data
)
3919 struct loop
*loop
= data
->current_loop
;
3922 /* We use the following model (definitely improvable, especially the
3923 cost function -- TODO):
3925 We estimate the number of registers available (using MD data), name it A.
3927 We estimate the number of registers used by the loop, name it U. This
3928 number is obtained as the number of loop phi nodes (not counting virtual
3929 registers and bivs) + the number of variables from outside of the loop.
3931 We set a reserve R (free regs that are used for temporary computations,
3932 etc.). For now the reserve is a constant 3.
3934 Let I be the number of induction variables.
3936 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3937 make a lot of ivs without a reason).
3938 -- if A - R < U + I <= A, the cost is I * PRES_COST
3939 -- if U + I > A, the cost is I * PRES_COST and
3940 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3944 fprintf (dump_file
, "Global costs:\n");
3945 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
3946 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
);
3947 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
3951 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
3953 op
= PHI_RESULT (phi
);
3955 if (!is_gimple_reg (op
))
3958 if (get_iv (data
, op
))
3964 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
3966 struct version_info
*info
= ver_info (data
, j
);
3968 if (info
->inv_id
&& info
->has_nonlin_use
)
3972 data
->regs_used
= n
;
3973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3974 fprintf (dump_file
, " regs_used %d\n", n
);
3976 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3978 fprintf (dump_file
, " cost for size:\n");
3979 fprintf (dump_file
, " ivs\tcost\n");
3980 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
3981 fprintf (dump_file
, " %d\t%d\n", j
,
3982 ivopts_global_cost_for_size (data
, j
));
3983 fprintf (dump_file
, "\n");
3987 /* Returns true if A is a cheaper cost pair than B. */
3990 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
3998 if (a
->cost
< b
->cost
)
4001 if (a
->cost
> b
->cost
)
4004 /* In case the costs are the same, prefer the cheaper candidate. */
4005 if (a
->cand
->cost
< b
->cand
->cost
)
4011 /* Computes the cost field of IVS structure. */
4014 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4018 cost
+= ivs
->cand_use_cost
;
4019 cost
+= ivs
->cand_cost
;
4020 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4025 /* Remove invariants in set INVS to set IVS. */
4028 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4036 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4038 ivs
->n_invariant_uses
[iid
]--;
4039 if (ivs
->n_invariant_uses
[iid
] == 0)
4044 /* Set USE not to be expressed by any candidate in IVS. */
4047 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4050 unsigned uid
= use
->id
, cid
;
4051 struct cost_pair
*cp
;
4053 cp
= ivs
->cand_for_use
[uid
];
4059 ivs
->cand_for_use
[uid
] = NULL
;
4060 ivs
->n_cand_uses
[cid
]--;
4062 if (ivs
->n_cand_uses
[cid
] == 0)
4064 bitmap_clear_bit (ivs
->cands
, cid
);
4065 /* Do not count the pseudocandidates. */
4069 ivs
->cand_cost
-= cp
->cand
->cost
;
4071 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4074 ivs
->cand_use_cost
-= cp
->cost
;
4076 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4077 iv_ca_recount_cost (data
, ivs
);
4080 /* Add invariants in set INVS to set IVS. */
4083 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4091 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4093 ivs
->n_invariant_uses
[iid
]++;
4094 if (ivs
->n_invariant_uses
[iid
] == 1)
4099 /* Set cost pair for USE in set IVS to CP. */
4102 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4103 struct iv_use
*use
, struct cost_pair
*cp
)
4105 unsigned uid
= use
->id
, cid
;
4107 if (ivs
->cand_for_use
[uid
] == cp
)
4110 if (ivs
->cand_for_use
[uid
])
4111 iv_ca_set_no_cp (data
, ivs
, use
);
4118 ivs
->cand_for_use
[uid
] = cp
;
4119 ivs
->n_cand_uses
[cid
]++;
4120 if (ivs
->n_cand_uses
[cid
] == 1)
4122 bitmap_set_bit (ivs
->cands
, cid
);
4123 /* Do not count the pseudocandidates. */
4127 ivs
->cand_cost
+= cp
->cand
->cost
;
4129 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4132 ivs
->cand_use_cost
+= cp
->cost
;
4133 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4134 iv_ca_recount_cost (data
, ivs
);
4138 /* Extend set IVS by expressing USE by some of the candidates in it
4142 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4145 struct cost_pair
*best_cp
= NULL
, *cp
;
4149 gcc_assert (ivs
->upto
>= use
->id
);
4151 if (ivs
->upto
== use
->id
)
4157 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4159 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4161 if (cheaper_cost_pair (cp
, best_cp
))
4165 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4168 /* Get cost for assignment IVS. */
4171 iv_ca_cost (struct iv_ca
*ivs
)
4173 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4176 /* Returns true if all dependences of CP are among invariants in IVS. */
4179 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4184 if (!cp
->depends_on
)
4187 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4189 if (ivs
->n_invariant_uses
[i
] == 0)
4196 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4197 it before NEXT_CHANGE. */
4199 static struct iv_ca_delta
*
4200 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4201 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4203 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4206 change
->old_cp
= old_cp
;
4207 change
->new_cp
= new_cp
;
4208 change
->next_change
= next_change
;
4213 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4216 static struct iv_ca_delta
*
4217 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4219 struct iv_ca_delta
*last
;
4227 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4229 last
->next_change
= l2
;
4234 /* Returns candidate by that USE is expressed in IVS. */
4236 static struct cost_pair
*
4237 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4239 return ivs
->cand_for_use
[use
->id
];
4242 /* Reverse the list of changes DELTA, forming the inverse to it. */
4244 static struct iv_ca_delta
*
4245 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4247 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4248 struct cost_pair
*tmp
;
4250 for (act
= delta
; act
; act
= next
)
4252 next
= act
->next_change
;
4253 act
->next_change
= prev
;
4257 act
->old_cp
= act
->new_cp
;
4264 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4265 reverted instead. */
4268 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4269 struct iv_ca_delta
*delta
, bool forward
)
4271 struct cost_pair
*from
, *to
;
4272 struct iv_ca_delta
*act
;
4275 delta
= iv_ca_delta_reverse (delta
);
4277 for (act
= delta
; act
; act
= act
->next_change
)
4281 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4282 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4286 iv_ca_delta_reverse (delta
);
4289 /* Returns true if CAND is used in IVS. */
4292 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4294 return ivs
->n_cand_uses
[cand
->id
] > 0;
4297 /* Returns number of induction variable candidates in the set IVS. */
4300 iv_ca_n_cands (struct iv_ca
*ivs
)
4302 return ivs
->n_cands
;
4305 /* Free the list of changes DELTA. */
4308 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4310 struct iv_ca_delta
*act
, *next
;
4312 for (act
= *delta
; act
; act
= next
)
4314 next
= act
->next_change
;
4321 /* Allocates new iv candidates assignment. */
4323 static struct iv_ca
*
4324 iv_ca_new (struct ivopts_data
*data
)
4326 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4330 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4331 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4332 nw
->cands
= BITMAP_ALLOC (NULL
);
4335 nw
->cand_use_cost
= 0;
4337 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4343 /* Free memory occupied by the set IVS. */
4346 iv_ca_free (struct iv_ca
**ivs
)
4348 free ((*ivs
)->cand_for_use
);
4349 free ((*ivs
)->n_cand_uses
);
4350 BITMAP_FREE ((*ivs
)->cands
);
4351 free ((*ivs
)->n_invariant_uses
);
4356 /* Dumps IVS to FILE. */
4359 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4361 const char *pref
= " invariants ";
4364 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4365 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4367 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4368 if (ivs
->n_invariant_uses
[i
])
4370 fprintf (file
, "%s%d", pref
, i
);
4373 fprintf (file
, "\n");
4376 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4377 new set, and store differences in DELTA. Number of induction variables
4378 in the new set is stored to N_IVS. */
4381 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4382 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4387 struct cost_pair
*old_cp
, *new_cp
;
4390 for (i
= 0; i
< ivs
->upto
; i
++)
4392 use
= iv_use (data
, i
);
4393 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4396 && old_cp
->cand
== cand
)
4399 new_cp
= get_use_iv_cost (data
, use
, cand
);
4403 if (!iv_ca_has_deps (ivs
, new_cp
))
4406 if (!cheaper_cost_pair (new_cp
, old_cp
))
4409 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4412 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4413 cost
= iv_ca_cost (ivs
);
4415 *n_ivs
= iv_ca_n_cands (ivs
);
4416 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4421 /* Try narrowing set IVS by removing CAND. Return the cost of
4422 the new set and store the differences in DELTA. */
4425 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4426 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4430 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4432 struct iv_cand
*cnd
;
4436 for (i
= 0; i
< n_iv_uses (data
); i
++)
4438 use
= iv_use (data
, i
);
4440 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4441 if (old_cp
->cand
!= cand
)
4446 if (data
->consider_all_candidates
)
4448 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4453 cnd
= iv_cand (data
, ci
);
4455 cp
= get_use_iv_cost (data
, use
, cnd
);
4458 if (!iv_ca_has_deps (ivs
, cp
))
4461 if (!cheaper_cost_pair (cp
, new_cp
))
4469 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4474 cnd
= iv_cand (data
, ci
);
4476 cp
= get_use_iv_cost (data
, use
, cnd
);
4479 if (!iv_ca_has_deps (ivs
, cp
))
4482 if (!cheaper_cost_pair (cp
, new_cp
))
4491 iv_ca_delta_free (delta
);
4495 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4498 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4499 cost
= iv_ca_cost (ivs
);
4500 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4505 /* Try optimizing the set of candidates IVS by removing candidates different
4506 from to EXCEPT_CAND from it. Return cost of the new set, and store
4507 differences in DELTA. */
4510 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4511 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4514 struct iv_ca_delta
*act_delta
, *best_delta
;
4515 unsigned i
, best_cost
, acost
;
4516 struct iv_cand
*cand
;
4519 best_cost
= iv_ca_cost (ivs
);
4521 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4523 cand
= iv_cand (data
, i
);
4525 if (cand
== except_cand
)
4528 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4530 if (acost
< best_cost
)
4533 iv_ca_delta_free (&best_delta
);
4534 best_delta
= act_delta
;
4537 iv_ca_delta_free (&act_delta
);
4546 /* Recurse to possibly remove other unnecessary ivs. */
4547 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4548 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4549 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4550 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4554 /* Tries to extend the sets IVS in the best possible way in order
4555 to express the USE. */
4558 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4561 unsigned best_cost
, act_cost
;
4564 struct iv_cand
*cand
;
4565 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4566 struct cost_pair
*cp
;
4568 iv_ca_add_use (data
, ivs
, use
);
4569 best_cost
= iv_ca_cost (ivs
);
4571 cp
= iv_ca_cand_for_use (ivs
, use
);
4574 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4575 iv_ca_set_no_cp (data
, ivs
, use
);
4578 /* First try important candidates. Only if it fails, try the specific ones.
4579 Rationale -- in loops with many variables the best choice often is to use
4580 just one generic biv. If we added here many ivs specific to the uses,
4581 the optimization algorithm later would be likely to get stuck in a local
4582 minimum, thus causing us to create too many ivs. The approach from
4583 few ivs to more seems more likely to be successful -- starting from few
4584 ivs, replacing an expensive use by a specific iv should always be a
4586 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4588 cand
= iv_cand (data
, i
);
4590 if (iv_ca_cand_used_p (ivs
, cand
))
4593 cp
= get_use_iv_cost (data
, use
, cand
);
4597 iv_ca_set_cp (data
, ivs
, use
, cp
);
4598 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4599 iv_ca_set_no_cp (data
, ivs
, use
);
4600 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4602 if (act_cost
< best_cost
)
4604 best_cost
= act_cost
;
4606 iv_ca_delta_free (&best_delta
);
4607 best_delta
= act_delta
;
4610 iv_ca_delta_free (&act_delta
);
4613 if (best_cost
== INFTY
)
4615 for (i
= 0; i
< use
->n_map_members
; i
++)
4617 cp
= use
->cost_map
+ i
;
4622 /* Already tried this. */
4623 if (cand
->important
)
4626 if (iv_ca_cand_used_p (ivs
, cand
))
4630 iv_ca_set_cp (data
, ivs
, use
, cp
);
4631 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4632 iv_ca_set_no_cp (data
, ivs
, use
);
4633 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4636 if (act_cost
< best_cost
)
4638 best_cost
= act_cost
;
4641 iv_ca_delta_free (&best_delta
);
4642 best_delta
= act_delta
;
4645 iv_ca_delta_free (&act_delta
);
4649 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4650 iv_ca_delta_free (&best_delta
);
4652 return (best_cost
!= INFTY
);
4655 /* Finds an initial assignment of candidates to uses. */
4657 static struct iv_ca
*
4658 get_initial_solution (struct ivopts_data
*data
)
4660 struct iv_ca
*ivs
= iv_ca_new (data
);
4663 for (i
= 0; i
< n_iv_uses (data
); i
++)
4664 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4673 /* Tries to improve set of induction variables IVS. */
4676 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4678 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4679 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4680 struct iv_cand
*cand
;
4682 /* Try extending the set of induction variables by one. */
4683 for (i
= 0; i
< n_iv_cands (data
); i
++)
4685 cand
= iv_cand (data
, i
);
4687 if (iv_ca_cand_used_p (ivs
, cand
))
4690 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4694 /* If we successfully added the candidate and the set is small enough,
4695 try optimizing it by removing other candidates. */
4696 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4698 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4699 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4700 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4701 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4704 if (acost
< best_cost
)
4707 iv_ca_delta_free (&best_delta
);
4708 best_delta
= act_delta
;
4711 iv_ca_delta_free (&act_delta
);
4716 /* Try removing the candidates from the set instead. */
4717 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4719 /* Nothing more we can do. */
4724 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4725 gcc_assert (best_cost
== iv_ca_cost (ivs
));
4726 iv_ca_delta_free (&best_delta
);
4730 /* Attempts to find the optimal set of induction variables. We do simple
4731 greedy heuristic -- we try to replace at most one candidate in the selected
4732 solution and remove the unused ivs while this improves the cost. */
4734 static struct iv_ca
*
4735 find_optimal_iv_set (struct ivopts_data
*data
)
4741 /* Get the initial solution. */
4742 set
= get_initial_solution (data
);
4745 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4746 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4752 fprintf (dump_file
, "Initial set of candidates:\n");
4753 iv_ca_dump (data
, dump_file
, set
);
4756 while (try_improve_iv_set (data
, set
))
4758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4760 fprintf (dump_file
, "Improved to:\n");
4761 iv_ca_dump (data
, dump_file
, set
);
4765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4766 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
4768 for (i
= 0; i
< n_iv_uses (data
); i
++)
4770 use
= iv_use (data
, i
);
4771 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4777 /* Creates a new induction variable corresponding to CAND. */
4780 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4782 block_stmt_iterator incr_pos
;
4792 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
4796 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
4801 /* Mark that the iv is preserved. */
4802 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4803 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4805 /* Rewrite the increment so that it uses var_before directly. */
4806 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
4811 gimple_add_tmp_var (cand
->var_before
);
4812 add_referenced_var (cand
->var_before
);
4814 base
= unshare_expr (cand
->iv
->base
);
4816 create_iv (base
, unshare_expr (cand
->iv
->step
),
4817 cand
->var_before
, data
->current_loop
,
4818 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
4821 /* Creates new induction variables described in SET. */
4824 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
4827 struct iv_cand
*cand
;
4830 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
4832 cand
= iv_cand (data
, i
);
4833 create_new_iv (data
, cand
);
4837 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4838 is true, remove also the ssa name defined by the statement. */
4841 remove_statement (tree stmt
, bool including_defined_name
)
4843 if (TREE_CODE (stmt
) == PHI_NODE
)
4845 remove_phi_node (stmt
, NULL_TREE
, including_defined_name
);
4849 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
4851 bsi_remove (&bsi
, true);
4852 release_defs (stmt
);
4856 /* Rewrites USE (definition of iv used in a nonlinear expression)
4857 using candidate CAND. */
4860 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
4861 struct iv_use
*use
, struct iv_cand
*cand
)
4864 tree op
, stmts
, tgt
, ass
;
4865 block_stmt_iterator bsi
;
4867 /* An important special case -- if we are asked to express value of
4868 the original iv by itself, just exit; there is no need to
4869 introduce a new computation (that might also need casting the
4870 variable to unsigned and back). */
4871 if (cand
->pos
== IP_ORIGINAL
4872 && cand
->incremented_at
== use
->stmt
)
4874 tree step
, ctype
, utype
;
4875 enum tree_code incr_code
= PLUS_EXPR
;
4877 gcc_assert (TREE_CODE (use
->stmt
) == GIMPLE_MODIFY_STMT
);
4878 gcc_assert (GIMPLE_STMT_OPERAND (use
->stmt
, 0) == cand
->var_after
);
4880 step
= cand
->iv
->step
;
4881 ctype
= TREE_TYPE (step
);
4882 utype
= TREE_TYPE (cand
->var_after
);
4883 if (TREE_CODE (step
) == NEGATE_EXPR
)
4885 incr_code
= MINUS_EXPR
;
4886 step
= TREE_OPERAND (step
, 0);
4889 /* Check whether we may leave the computation unchanged.
4890 This is the case only if it does not rely on other
4891 computations in the loop -- otherwise, the computation
4892 we rely upon may be removed in remove_unused_ivs,
4893 thus leading to ICE. */
4894 op
= GIMPLE_STMT_OPERAND (use
->stmt
, 1);
4895 if (TREE_CODE (op
) == PLUS_EXPR
4896 || TREE_CODE (op
) == MINUS_EXPR
)
4898 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
4899 op
= TREE_OPERAND (op
, 1);
4900 else if (TREE_CODE (op
) == PLUS_EXPR
4901 && TREE_OPERAND (op
, 1) == cand
->var_before
)
4902 op
= TREE_OPERAND (op
, 0);
4910 && (TREE_CODE (op
) == INTEGER_CST
4911 || operand_equal_p (op
, step
, 0)))
4914 /* Otherwise, add the necessary computations to express
4916 op
= fold_convert (ctype
, cand
->var_before
);
4917 comp
= fold_convert (utype
,
4918 build2 (incr_code
, ctype
, op
,
4919 unshare_expr (step
)));
4923 comp
= get_computation (data
->current_loop
, use
, cand
);
4924 gcc_assert (comp
!= NULL_TREE
);
4927 switch (TREE_CODE (use
->stmt
))
4930 tgt
= PHI_RESULT (use
->stmt
);
4932 /* If we should keep the biv, do not replace it. */
4933 if (name_info (data
, tgt
)->preserve_biv
)
4936 bsi
= bsi_after_labels (bb_for_stmt (use
->stmt
));
4939 case GIMPLE_MODIFY_STMT
:
4940 tgt
= GIMPLE_STMT_OPERAND (use
->stmt
, 0);
4941 bsi
= bsi_for_stmt (use
->stmt
);
4948 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
4950 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
4952 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
4954 ass
= build_gimple_modify_stmt (tgt
, op
);
4955 bsi_insert_before (&bsi
, ass
, BSI_SAME_STMT
);
4956 remove_statement (use
->stmt
, false);
4957 SSA_NAME_DEF_STMT (tgt
) = ass
;
4960 GIMPLE_STMT_OPERAND (use
->stmt
, 1) = op
;
4963 /* Replaces ssa name in index IDX by its basic variable. Callback for
4967 idx_remove_ssa_names (tree base
, tree
*idx
,
4968 void *data ATTRIBUTE_UNUSED
)
4972 if (TREE_CODE (*idx
) == SSA_NAME
)
4973 *idx
= SSA_NAME_VAR (*idx
);
4975 if (TREE_CODE (base
) == ARRAY_REF
)
4977 op
= &TREE_OPERAND (base
, 2);
4979 && TREE_CODE (*op
) == SSA_NAME
)
4980 *op
= SSA_NAME_VAR (*op
);
4981 op
= &TREE_OPERAND (base
, 3);
4983 && TREE_CODE (*op
) == SSA_NAME
)
4984 *op
= SSA_NAME_VAR (*op
);
4990 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4993 unshare_and_remove_ssa_names (tree ref
)
4995 ref
= unshare_expr (ref
);
4996 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5001 /* Extract the alias analysis info for the memory reference REF. There are
5002 several ways how this information may be stored and what precisely is
5003 its semantics depending on the type of the reference, but there always is
5004 somewhere hidden one _DECL node that is used to determine the set of
5005 virtual operands for the reference. The code below deciphers this jungle
5006 and extracts this single useful piece of information. */
5009 get_ref_tag (tree ref
, tree orig
)
5011 tree var
= get_base_address (ref
);
5012 tree aref
= NULL_TREE
, tag
, sv
;
5013 HOST_WIDE_INT offset
, size
, maxsize
;
5015 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5017 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5022 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5023 return unshare_expr (sv
);
5028 if (TREE_CODE (var
) == INDIRECT_REF
)
5030 /* If the base is a dereference of a pointer, first check its name memory
5031 tag. If it does not have one, use its symbol memory tag. */
5032 var
= TREE_OPERAND (var
, 0);
5033 if (TREE_CODE (var
) != SSA_NAME
)
5036 if (SSA_NAME_PTR_INFO (var
))
5038 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5043 var
= SSA_NAME_VAR (var
);
5044 tag
= symbol_mem_tag (var
);
5045 gcc_assert (tag
!= NULL_TREE
);
5053 tag
= symbol_mem_tag (var
);
5061 /* Copies the reference information from OLD_REF to NEW_REF. */
5064 copy_ref_info (tree new_ref
, tree old_ref
)
5066 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5067 copy_mem_ref_info (new_ref
, old_ref
);
5070 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5071 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5075 /* Rewrites USE (address that is an iv) using candidate CAND. */
5078 rewrite_use_address (struct ivopts_data
*data
,
5079 struct iv_use
*use
, struct iv_cand
*cand
)
5082 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5086 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5088 unshare_aff_combination (&aff
);
5090 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5091 copy_ref_info (ref
, *use
->op_p
);
5095 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5099 rewrite_use_compare (struct ivopts_data
*data
,
5100 struct iv_use
*use
, struct iv_cand
*cand
)
5102 tree comp
, *var_p
, op
, bound
;
5103 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5104 enum tree_code compare
;
5105 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5111 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5112 tree var_type
= TREE_TYPE (var
);
5114 compare
= iv_elimination_compare (data
, use
);
5115 bound
= unshare_expr (fold_convert (var_type
, bound
));
5116 op
= force_gimple_operand_bsi (&bsi
, bound
, true, NULL_TREE
);
5118 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5122 /* The induction variable elimination failed; just express the original
5124 comp
= get_computation (data
->current_loop
, use
, cand
);
5125 gcc_assert (comp
!= NULL_TREE
);
5127 ok
= extract_cond_operands (data
, use
->op_p
, &var_p
, NULL
, NULL
, NULL
);
5130 *var_p
= force_gimple_operand_bsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
));
5133 /* Rewrites USE using candidate CAND. */
5136 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
5138 push_stmt_changes (&use
->stmt
);
5142 case USE_NONLINEAR_EXPR
:
5143 rewrite_use_nonlinear_expr (data
, use
, cand
);
5147 rewrite_use_address (data
, use
, cand
);
5151 rewrite_use_compare (data
, use
, cand
);
5158 pop_stmt_changes (&use
->stmt
);
5161 /* Rewrite the uses using the selected induction variables. */
5164 rewrite_uses (struct ivopts_data
*data
)
5167 struct iv_cand
*cand
;
5170 for (i
= 0; i
< n_iv_uses (data
); i
++)
5172 use
= iv_use (data
, i
);
5173 cand
= use
->selected
;
5176 rewrite_use (data
, use
, cand
);
5180 /* Removes the ivs that are not used after rewriting. */
5183 remove_unused_ivs (struct ivopts_data
*data
)
5188 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5190 struct version_info
*info
;
5192 info
= ver_info (data
, j
);
5194 && !integer_zerop (info
->iv
->step
)
5196 && !info
->iv
->have_use_for
5197 && !info
->preserve_biv
)
5198 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5202 /* Frees data allocated by the optimization of a single loop. */
5205 free_loop_data (struct ivopts_data
*data
)
5213 pointer_map_destroy (data
->niters
);
5214 data
->niters
= NULL
;
5217 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5219 struct version_info
*info
;
5221 info
= ver_info (data
, i
);
5225 info
->has_nonlin_use
= false;
5226 info
->preserve_biv
= false;
5229 bitmap_clear (data
->relevant
);
5230 bitmap_clear (data
->important_candidates
);
5232 for (i
= 0; i
< n_iv_uses (data
); i
++)
5234 struct iv_use
*use
= iv_use (data
, i
);
5237 BITMAP_FREE (use
->related_cands
);
5238 for (j
= 0; j
< use
->n_map_members
; j
++)
5239 if (use
->cost_map
[j
].depends_on
)
5240 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5241 free (use
->cost_map
);
5244 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5246 for (i
= 0; i
< n_iv_cands (data
); i
++)
5248 struct iv_cand
*cand
= iv_cand (data
, i
);
5252 if (cand
->depends_on
)
5253 BITMAP_FREE (cand
->depends_on
);
5256 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5258 if (data
->version_info_size
< num_ssa_names
)
5260 data
->version_info_size
= 2 * num_ssa_names
;
5261 free (data
->version_info
);
5262 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5265 data
->max_inv_id
= 0;
5267 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5268 SET_DECL_RTL (obj
, NULL_RTX
);
5270 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5273 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5277 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5279 free_loop_data (data
);
5280 free (data
->version_info
);
5281 BITMAP_FREE (data
->relevant
);
5282 BITMAP_FREE (data
->important_candidates
);
5284 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5285 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5286 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5289 /* Optimizes the LOOP. Returns true if anything changed. */
5292 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5294 bool changed
= false;
5295 struct iv_ca
*iv_ca
;
5298 gcc_assert (!data
->niters
);
5299 data
->current_loop
= loop
;
5301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5303 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5305 exit
= single_dom_exit (loop
);
5308 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5309 exit
->src
->index
, exit
->dest
->index
);
5310 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5311 fprintf (dump_file
, "\n");
5314 fprintf (dump_file
, "\n");
5317 /* For each ssa name determines whether it behaves as an induction variable
5319 if (!find_induction_variables (data
))
5322 /* Finds interesting uses (item 1). */
5323 find_interesting_uses (data
);
5324 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5327 /* Finds candidates for the induction variables (item 2). */
5328 find_iv_candidates (data
);
5330 /* Calculates the costs (item 3, part 1). */
5331 determine_use_iv_costs (data
);
5332 determine_iv_costs (data
);
5333 determine_set_costs (data
);
5335 /* Find the optimal set of induction variables (item 3, part 2). */
5336 iv_ca
= find_optimal_iv_set (data
);
5341 /* Create the new induction variables (item 4, part 1). */
5342 create_new_ivs (data
, iv_ca
);
5343 iv_ca_free (&iv_ca
);
5345 /* Rewrite the uses (item 4, part 2). */
5346 rewrite_uses (data
);
5348 /* Remove the ivs that are unused after rewriting. */
5349 remove_unused_ivs (data
);
5351 /* We have changed the structure of induction variables; it might happen
5352 that definitions in the scev database refer to some of them that were
5357 free_loop_data (data
);
5362 /* Main entry point. Optimizes induction variables in loops. */
5365 tree_ssa_iv_optimize (void)
5368 struct ivopts_data data
;
5371 tree_ssa_iv_optimize_init (&data
);
5373 /* Optimize the loops starting with the innermost ones. */
5374 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
5376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5377 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5379 tree_ssa_iv_optimize_loop (&data
, loop
);
5382 tree_ssa_iv_optimize_finalize (&data
);