1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base
; /* Initial value of the iv. */
108 tree base_object
; /* A memory object to that the induction variable points. */
109 tree step
; /* Step of the iv (constant only). */
110 tree ssa_name
; /* The ssa name with the value. */
111 bool biv_p
; /* Is it a biv? */
112 bool have_use_for
; /* Do we already have a use for it? */
113 unsigned use_id
; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name
; /* The ssa name. */
120 struct iv
*iv
; /* Induction variable description. */
121 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id
; /* Id of an invariant. */
124 bool preserve_biv
; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
131 USE_ADDRESS
, /* Use in an address. */
132 USE_COMPARE
/* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost
; /* The runtime cost. */
139 unsigned complexity
; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost
= {0, 0};
146 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
148 /* The candidate - cost pair. */
151 struct iv_cand
*cand
; /* The candidate. */
152 comp_cost cost
; /* The cost. */
153 bitmap depends_on
; /* The list of invariants that have to be
155 tree value
; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id
; /* The id of the use. */
164 enum use_type type
; /* Type of the use. */
165 struct iv
*iv
; /* The induction variable it is based on. */
166 gimple stmt
; /* Statement in that it occurs. */
167 tree
*op_p
; /* The place where it occurs. */
168 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
172 struct cost_pair
*cost_map
;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand
*selected
;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL
, /* At the end, just before the exit condition. */
183 IP_END
, /* At the end of the latch block. */
184 IP_ORIGINAL
/* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id
; /* The number of the candidate. */
191 bool important
; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos
; /* Where it is computed. */
194 gimple incremented_at
;/* For original biv, the statement where it is
196 tree var_before
; /* The variable used for it before increment. */
197 tree var_after
; /* The variable used for it after increment. */
198 struct iv
*iv
; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost
; /* Cost of the candidate. */
203 bitmap depends_on
; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use
*iv_use_p
;
211 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
213 typedef struct iv_cand
*iv_cand_p
;
214 DEF_VEC_P(iv_cand_p
);
215 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
219 /* The currently optimized loop. */
220 struct loop
*current_loop
;
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t
*niters
;
225 /* Number of registers used in it. */
228 /* The size of version_info array allocated. */
229 unsigned version_info_size
;
231 /* The array of information for the ssa names. */
232 struct version_info
*version_info
;
234 /* The bitmap of indices in version_info whose value was changed. */
237 /* The uses of induction variables. */
238 VEC(iv_use_p
,heap
) *iv_uses
;
240 /* The candidates. */
241 VEC(iv_cand_p
,heap
) *iv_candidates
;
243 /* A bitmap of important candidates. */
244 bitmap important_candidates
;
246 /* The maximum invariant id. */
249 /* Whether to consider just related and important candidates when replacing a
251 bool consider_all_candidates
;
253 /* Are we optimizing for speed? */
257 /* An assignment of iv candidates to uses. */
261 /* The number of uses covered by the assignment. */
264 /* Number of uses that cannot be expressed by the candidates in the set. */
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair
**cand_for_use
;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses
;
273 /* The candidates used. */
276 /* The number of candidates in the set. */
279 /* Total number of registers needed. */
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost
;
285 /* Total cost of candidates. */
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses
;
291 /* Total cost of the assignment. */
295 /* Difference of two iv candidate assignments. */
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair
*old_cp
;
305 /* A new assignment. */
306 struct cost_pair
*new_cp
;
308 /* Next change in the list. */
309 struct iv_ca_delta
*next_change
;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
332 static VEC(tree
,heap
) *decl_rtl_to_reset
;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data
*data
)
339 return VEC_length (iv_use_p
, data
->iv_uses
);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use
*
345 iv_use (struct ivopts_data
*data
, unsigned i
)
347 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data
*data
)
355 return VEC_length (iv_cand_p
, data
->iv_candidates
);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand
*
361 iv_cand (struct ivopts_data
*data
, unsigned i
)
363 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
369 single_dom_exit (struct loop
*loop
)
371 edge exit
= single_exit (loop
);
376 if (!just_once_each_iteration_p (loop
, exit
->src
))
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv
*);
386 dump_iv (FILE *file
, struct iv
*iv
)
390 fprintf (file
, "ssa name ");
391 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
392 fprintf (file
, "\n");
395 fprintf (file
, " type ");
396 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
397 fprintf (file
, "\n");
401 fprintf (file
, " base ");
402 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
403 fprintf (file
, "\n");
405 fprintf (file
, " step ");
406 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
407 fprintf (file
, "\n");
411 fprintf (file
, " invariant ");
412 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
413 fprintf (file
, "\n");
418 fprintf (file
, " base object ");
419 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
420 fprintf (file
, "\n");
424 fprintf (file
, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use
*);
431 dump_use (FILE *file
, struct iv_use
*use
)
433 fprintf (file
, "use %d\n", use
->id
);
437 case USE_NONLINEAR_EXPR
:
438 fprintf (file
, " generic\n");
442 fprintf (file
, " address\n");
446 fprintf (file
, " compare\n");
453 fprintf (file
, " in statement ");
454 print_gimple_stmt (file
, use
->stmt
, 0, 0);
455 fprintf (file
, "\n");
457 fprintf (file
, " at position ");
459 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
460 fprintf (file
, "\n");
462 dump_iv (file
, use
->iv
);
464 if (use
->related_cands
)
466 fprintf (file
, " related candidates ");
467 dump_bitmap (file
, use
->related_cands
);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data
*);
475 dump_uses (FILE *file
, struct ivopts_data
*data
)
480 for (i
= 0; i
< n_iv_uses (data
); i
++)
482 use
= iv_use (data
, i
);
484 dump_use (file
, use
);
485 fprintf (file
, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand
*);
493 dump_cand (FILE *file
, struct iv_cand
*cand
)
495 struct iv
*iv
= cand
->iv
;
497 fprintf (file
, "candidate %d%s\n",
498 cand
->id
, cand
->important
? " (important)" : "");
500 if (cand
->depends_on
)
502 fprintf (file
, " depends on ");
503 dump_bitmap (file
, cand
->depends_on
);
508 fprintf (file
, " final value replacement\n");
515 fprintf (file
, " incremented before exit test\n");
519 fprintf (file
, " incremented at end\n");
523 fprintf (file
, " original biv\n");
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info
*
533 ver_info (struct ivopts_data
*data
, unsigned ver
)
535 return data
->version_info
+ ver
;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info
*
541 name_info (struct ivopts_data
*data
, tree name
)
543 return ver_info (data
, SSA_NAME_VERSION (name
));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
550 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
552 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
556 if (sbb
== loop
->latch
)
562 return stmt
== last_stmt (bb
);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
569 stmt_after_ip_original_pos (struct iv_cand
*cand
, gimple stmt
)
571 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
572 basic_block stmt_bb
= gimple_bb (stmt
);
573 gimple_stmt_iterator bsi
;
575 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
578 if (stmt_bb
!= cand_bb
)
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi
= gsi_last_bb (stmt_bb
); ; gsi_prev (&bsi
))
585 if (gsi_stmt (bsi
) == cand
->incremented_at
)
587 if (gsi_stmt (bsi
) == stmt
)
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
596 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
604 return stmt_after_ip_normal_pos (loop
, stmt
);
607 return stmt_after_ip_original_pos (cand
, stmt
);
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
617 abnormal_ssa_name_p (tree exp
)
622 if (TREE_CODE (exp
) != SSA_NAME
)
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
632 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
633 void *data ATTRIBUTE_UNUSED
)
635 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
639 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
643 return !abnormal_ssa_name_p (*index
);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
650 contains_abnormal_ssa_name_p (tree expr
)
653 enum tree_code_class codeclass
;
658 code
= TREE_CODE (expr
);
659 codeclass
= TREE_CODE_CLASS (code
);
661 if (code
== SSA_NAME
)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
664 if (code
== INTEGER_CST
665 || is_gimple_min_invariant (expr
))
668 if (code
== ADDR_EXPR
)
669 return !for_each_index (&TREE_OPERAND (expr
, 0),
670 idx_contains_abnormal_ssa_name_p
,
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
698 niter_for_exit (struct ivopts_data
*data
, edge exit
)
700 struct tree_niter_desc desc
;
706 data
->niters
= pointer_map_create ();
710 slot
= pointer_map_contains (data
->niters
, exit
);
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data
->current_loop
,
721 && integer_zerop (desc
.may_be_zero
)
722 && !contains_abnormal_ssa_name_p (desc
.niter
))
727 *pointer_map_insert (data
->niters
, exit
) = niter
;
730 niter
= (tree
) *slot
;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
740 niter_for_single_dom_exit (struct ivopts_data
*data
)
742 edge exit
= single_dom_exit (data
->current_loop
);
747 return niter_for_exit (data
, exit
);
750 /* Initializes data structures used by the iv optimization pass, stored
754 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
756 data
->version_info_size
= 2 * num_ssa_names
;
757 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
758 data
->relevant
= BITMAP_ALLOC (NULL
);
759 data
->important_candidates
= BITMAP_ALLOC (NULL
);
760 data
->max_inv_id
= 0;
762 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
763 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
764 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr
)
773 enum tree_code code
= TREE_CODE (expr
);
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr
))
780 return determine_base_object (TREE_OPERAND (expr
, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
791 obj
= TREE_OPERAND (expr
, 0);
792 base
= get_base_address (obj
);
797 if (TREE_CODE (base
) == INDIRECT_REF
)
798 return determine_base_object (TREE_OPERAND (base
, 0));
800 return fold_convert (ptr_type_node
,
801 build_fold_addr_expr (base
));
803 case POINTER_PLUS_EXPR
:
804 return determine_base_object (TREE_OPERAND (expr
, 0));
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
812 return fold_convert (ptr_type_node
, expr
);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base
, tree step
)
822 struct iv
*iv
= XCNEW (struct iv
);
823 gcc_assert (step
!= NULL_TREE
);
826 iv
->base_object
= determine_base_object (base
);
829 iv
->have_use_for
= false;
831 iv
->ssa_name
= NULL_TREE
;
836 /* Sets STEP and BASE for induction variable IV. */
839 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
841 struct version_info
*info
= name_info (data
, iv
);
843 gcc_assert (!info
->iv
);
845 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
846 info
->iv
= alloc_iv (base
, step
);
847 info
->iv
->ssa_name
= iv
;
850 /* Finds induction variable declaration for VAR. */
853 get_iv (struct ivopts_data
*data
, tree var
)
856 tree type
= TREE_TYPE (var
);
858 if (!POINTER_TYPE_P (type
)
859 && !INTEGRAL_TYPE_P (type
))
862 if (!name_info (data
, var
)->iv
)
864 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
867 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
868 set_iv (data
, var
, var
, build_int_cst (type
, 0));
871 return name_info (data
, var
)->iv
;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
878 determine_biv_step (gimple phi
)
880 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
881 tree name
= PHI_RESULT (phi
);
884 if (!is_gimple_reg (name
))
887 if (!simple_iv (loop
, loop
, name
, &iv
, true))
890 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
893 /* Finds basic ivs. */
896 find_bivs (struct ivopts_data
*data
)
899 tree step
, type
, base
;
901 struct loop
*loop
= data
->current_loop
;
902 gimple_stmt_iterator psi
;
904 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
906 phi
= gsi_stmt (psi
);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
911 step
= determine_biv_step (phi
);
915 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
916 base
= expand_simple_operations (base
);
917 if (contains_abnormal_ssa_name_p (base
)
918 || contains_abnormal_ssa_name_p (step
))
921 type
= TREE_TYPE (PHI_RESULT (phi
));
922 base
= fold_convert (type
, base
);
925 if (POINTER_TYPE_P (type
))
926 step
= fold_convert (sizetype
, step
);
928 step
= fold_convert (type
, step
);
931 set_iv (data
, PHI_RESULT (phi
), base
, step
);
938 /* Marks basic ivs. */
941 mark_bivs (struct ivopts_data
*data
)
945 struct iv
*iv
, *incr_iv
;
946 struct loop
*loop
= data
->current_loop
;
948 gimple_stmt_iterator psi
;
950 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
952 phi
= gsi_stmt (psi
);
954 iv
= get_iv (data
, PHI_RESULT (phi
));
958 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
959 incr_iv
= get_iv (data
, var
);
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
965 if (incr_bb
->loop_father
!= data
->current_loop
966 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
970 incr_iv
->biv_p
= true;
974 /* Checks whether STMT defines a linear induction variable and stores its
978 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
981 struct loop
*loop
= data
->current_loop
;
983 iv
->base
= NULL_TREE
;
984 iv
->step
= NULL_TREE
;
986 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
989 lhs
= gimple_assign_lhs (stmt
);
990 if (TREE_CODE (lhs
) != SSA_NAME
)
993 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
995 iv
->base
= expand_simple_operations (iv
->base
);
997 if (contains_abnormal_ssa_name_p (iv
->base
)
998 || contains_abnormal_ssa_name_p (iv
->step
))
1004 /* Finds general ivs in statement STMT. */
1007 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1011 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1014 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1017 /* Finds general ivs in basic block BB. */
1020 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1022 gimple_stmt_iterator bsi
;
1024 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1025 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1028 /* Finds general ivs. */
1031 find_givs (struct ivopts_data
*data
)
1033 struct loop
*loop
= data
->current_loop
;
1034 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1037 for (i
= 0; i
< loop
->num_nodes
; i
++)
1038 find_givs_in_bb (data
, body
[i
]);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1046 find_induction_variables (struct ivopts_data
*data
)
1051 if (!find_bivs (data
))
1057 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1059 tree niter
= niter_for_single_dom_exit (data
);
1063 fprintf (dump_file
, " number of iterations ");
1064 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1065 fprintf (dump_file
, "\n\n");
1068 fprintf (dump_file
, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1072 if (ver_info (data
, i
)->iv
)
1073 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use
*
1083 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1084 gimple stmt
, enum use_type use_type
)
1086 struct iv_use
*use
= XCNEW (struct iv_use
);
1088 use
->id
= n_iv_uses (data
);
1089 use
->type
= use_type
;
1093 use
->related_cands
= BITMAP_ALLOC (NULL
);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1097 iv
->ssa_name
= NULL_TREE
;
1099 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1100 dump_use (dump_file
, use
);
1102 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1112 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1115 struct version_info
*info
;
1117 if (TREE_CODE (op
) != SSA_NAME
1118 || !is_gimple_reg (op
))
1121 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1123 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1126 info
= name_info (data
, op
);
1128 info
->has_nonlin_use
|= nonlinear_use
;
1130 info
->inv_id
= ++data
->max_inv_id
;
1131 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use
*
1137 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1144 if (TREE_CODE (op
) != SSA_NAME
)
1147 iv
= get_iv (data
, op
);
1151 if (iv
->have_use_for
)
1153 use
= iv_use (data
, iv
->use_id
);
1155 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1159 if (integer_zerop (iv
->step
))
1161 record_invariant (data
, op
, true);
1164 iv
->have_use_for
= true;
1166 civ
= XNEW (struct iv
);
1169 stmt
= SSA_NAME_DEF_STMT (op
);
1170 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1171 || is_gimple_assign (stmt
));
1173 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1174 iv
->use_id
= use
->id
;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1188 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1189 tree
**control_var
, tree
**bound
,
1190 struct iv
**iv_var
, struct iv
**iv_bound
)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv
;
1195 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1196 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1199 if (gimple_code (stmt
) == GIMPLE_COND
)
1201 op0
= gimple_cond_lhs_ptr (stmt
);
1202 op1
= gimple_cond_rhs_ptr (stmt
);
1206 op0
= gimple_assign_rhs1_ptr (stmt
);
1207 op1
= gimple_assign_rhs2_ptr (stmt
);
1210 zero
= integer_zero_node
;
1211 const_iv
.step
= integer_zero_node
;
1213 if (TREE_CODE (*op0
) == SSA_NAME
)
1214 iv0
= get_iv (data
, *op0
);
1215 if (TREE_CODE (*op1
) == SSA_NAME
)
1216 iv1
= get_iv (data
, *op1
);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1223 if (integer_zerop (iv0
->step
))
1225 /* Control variable may be on the other side. */
1226 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1227 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1229 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1233 *control_var
= op0
;;
1244 /* Checks whether the condition in STMT is interesting and if so,
1248 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1250 tree
*var_p
, *bound_p
;
1251 struct iv
*var_iv
, *civ
;
1253 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1255 find_interesting_uses_op (data
, *var_p
);
1256 find_interesting_uses_op (data
, *bound_p
);
1260 civ
= XNEW (struct iv
);
1262 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1270 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1275 gcc_assert (loop_depth (loop
) > 0);
1277 if (is_gimple_min_invariant (expr
))
1280 if (TREE_CODE (expr
) == SSA_NAME
)
1282 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1284 && flow_bb_inside_loop_p (loop
, def_bb
))
1293 len
= TREE_OPERAND_LENGTH (expr
);
1294 for (i
= 0; i
< len
; i
++)
1295 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1306 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1311 gcc_assert (loop_depth (loop
) > 0);
1313 lhs
= gimple_get_lhs (stmt
);
1314 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1316 tree op
= gimple_op (stmt
, i
);
1317 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data
*ivopts_data
;
1336 idx_find_step (tree base
, tree
*idx
, void *data
)
1338 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1340 tree step
, iv_base
, iv_step
, lbound
, off
;
1341 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1343 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1347 /* If base is a component ref, require that the offset of the reference
1349 if (TREE_CODE (base
) == COMPONENT_REF
)
1351 off
= component_ref_field_offset (base
);
1352 return expr_invariant_in_loop_p (loop
, off
);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1366 step
= array_ref_element_size (base
);
1367 lbound
= array_ref_low_bound (base
);
1369 if (!expr_invariant_in_loop_p (loop
, step
)
1370 || !expr_invariant_in_loop_p (loop
, lbound
))
1374 if (TREE_CODE (*idx
) != SSA_NAME
)
1377 iv
= get_iv (dta
->ivopts_data
, *idx
);
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1386 if (integer_zerop (iv
->step
))
1389 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1391 step
= array_ref_element_size (base
);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step
) != INTEGER_CST
)
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step
= build_int_cst (sizetype
, 1);
1403 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1404 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1407 /* The index might wrap. */
1411 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1412 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base
, tree
*idx
,
1424 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1425 find_interesting_uses_op (data
, *idx
);
1426 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1428 find_interesting_uses_op (data
, array_ref_element_size (base
));
1429 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1440 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1443 enum tree_code code
;
1444 double_int res
, p0
, p1
;
1445 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1450 if (operand_equal_p (top
, bot
, 0))
1452 *mul
= double_int_one
;
1456 code
= TREE_CODE (top
);
1460 mby
= TREE_OPERAND (top
, 1);
1461 if (TREE_CODE (mby
) != INTEGER_CST
)
1464 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1467 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1473 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1474 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1477 if (code
== MINUS_EXPR
)
1478 p1
= double_int_neg (p1
);
1479 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1483 if (TREE_CODE (bot
) != INTEGER_CST
)
1486 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1487 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1488 if (double_int_zero_p (p1
))
1490 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1492 return double_int_zero_p (res
);
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1502 may_be_unaligned_p (tree ref
, tree step
)
1506 HOST_WIDE_INT bitsize
;
1507 HOST_WIDE_INT bitpos
;
1509 enum machine_mode mode
;
1510 int unsignedp
, volatilep
;
1511 unsigned base_align
;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1522 &unsignedp
, &volatilep
, true);
1523 base_type
= TREE_TYPE (base
);
1524 base_align
= TYPE_ALIGN (base_type
);
1526 if (mode
!= BLKmode
)
1529 tree al
= build_int_cst (TREE_TYPE (step
),
1530 GET_MODE_ALIGNMENT (mode
) / BITS_PER_UNIT
);
1532 if (base_align
< GET_MODE_ALIGNMENT (mode
)
1533 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1534 || bitpos
% BITS_PER_UNIT
!= 0)
1537 if (!constant_multiple_of (step
, al
, &mul
))
1544 /* Return true if EXPR may be non-addressable. */
1547 may_be_nonaddressable_p (tree expr
)
1549 switch (TREE_CODE (expr
))
1551 case TARGET_MEM_REF
:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1560 case VIEW_CONVERT_EXPR
:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1569 /* ... fall through ... */
1572 case ARRAY_RANGE_REF
:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1585 /* Finds addresses in *OP_P inside STMT. */
1588 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1590 tree base
= *op_p
, step
= build_int_cst (sizetype
, 0);
1592 struct ifs_ivopts_data ifs_ivopts_data
;
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt
))
1599 /* Ignore bitfields for now. Not really something terribly complicated
1601 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1604 base
= unshare_expr (base
);
1606 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1608 tree type
= build_pointer_type (TREE_TYPE (base
));
1612 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1614 civ
= get_iv (data
, TMR_BASE (base
));
1618 TMR_BASE (base
) = civ
->base
;
1621 if (TMR_INDEX (base
)
1622 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1624 civ
= get_iv (data
, TMR_INDEX (base
));
1628 TMR_INDEX (base
) = civ
->base
;
1633 if (TMR_STEP (base
))
1634 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1636 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1640 if (integer_zerop (step
))
1642 base
= tree_mem_ref_addr (type
, base
);
1646 ifs_ivopts_data
.ivopts_data
= data
;
1647 ifs_ivopts_data
.stmt
= stmt
;
1648 ifs_ivopts_data
.step
= build_int_cst (sizetype
, 0);
1649 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1650 || integer_zerop (ifs_ivopts_data
.step
))
1652 step
= ifs_ivopts_data
.step
;
1654 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1655 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base
))
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1667 base
= build_fold_addr_expr (base
);
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base
) == ADDR_EXPR
)
1673 tree
*ref
= &TREE_OPERAND (base
, 0);
1674 while (handled_component_p (*ref
))
1675 ref
= &TREE_OPERAND (*ref
, 0);
1676 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1677 *ref
= fold_indirect_ref (*ref
);
1681 civ
= alloc_iv (base
, step
);
1682 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1686 for_each_index (op_p
, idx_record_use
, data
);
1689 /* Finds and records invariants used in STMT. */
1692 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1695 use_operand_p use_p
;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1700 op
= USE_FROM_PTR (use_p
);
1701 record_invariant (data
, op
, false);
1705 /* Finds interesting uses of induction variables in the statement STMT. */
1708 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1711 tree op
, *lhs
, *rhs
;
1713 use_operand_p use_p
;
1714 enum tree_code code
;
1716 find_invariants_stmt (data
, stmt
);
1718 if (gimple_code (stmt
) == GIMPLE_COND
)
1720 find_interesting_uses_cond (data
, stmt
);
1724 if (is_gimple_assign (stmt
))
1726 lhs
= gimple_assign_lhs_ptr (stmt
);
1727 rhs
= gimple_assign_rhs1_ptr (stmt
);
1729 if (TREE_CODE (*lhs
) == SSA_NAME
)
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1734 iv
= get_iv (data
, *lhs
);
1736 if (iv
&& !integer_zerop (iv
->step
))
1740 code
= gimple_assign_rhs_code (stmt
);
1741 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs
)
1743 || is_gimple_val (*rhs
)))
1745 if (REFERENCE_CLASS_P (*rhs
))
1746 find_interesting_uses_address (data
, stmt
, rhs
);
1748 find_interesting_uses_op (data
, *rhs
);
1750 if (REFERENCE_CLASS_P (*lhs
))
1751 find_interesting_uses_address (data
, stmt
, lhs
);
1754 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1756 find_interesting_uses_cond (data
, stmt
);
1760 /* TODO -- we should also handle address uses of type
1762 memory = call (whatever);
1769 if (gimple_code (stmt
) == GIMPLE_PHI
1770 && gimple_bb (stmt
) == data
->current_loop
->header
)
1772 iv
= get_iv (data
, PHI_RESULT (stmt
));
1774 if (iv
&& !integer_zerop (iv
->step
))
1778 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1780 op
= USE_FROM_PTR (use_p
);
1782 if (TREE_CODE (op
) != SSA_NAME
)
1785 iv
= get_iv (data
, op
);
1789 find_interesting_uses_op (data
, op
);
1793 /* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1797 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1800 gimple_stmt_iterator psi
;
1803 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1805 phi
= gsi_stmt (psi
);
1806 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1807 if (is_gimple_reg (def
))
1808 find_interesting_uses_op (data
, def
);
1812 /* Finds uses of the induction variables that are interesting. */
1815 find_interesting_uses (struct ivopts_data
*data
)
1818 gimple_stmt_iterator bsi
;
1819 basic_block
*body
= get_loop_body (data
->current_loop
);
1821 struct version_info
*info
;
1824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1825 fprintf (dump_file
, "Uses:\n\n");
1827 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1832 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1833 if (e
->dest
!= EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1835 find_interesting_uses_outside (data
, e
);
1837 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1838 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1839 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1840 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1847 fprintf (dump_file
, "\n");
1849 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1851 info
= ver_info (data
, i
);
1854 fprintf (dump_file
, " ");
1855 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1856 fprintf (dump_file
, " is invariant (%d)%s\n",
1857 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1861 fprintf (dump_file
, "\n");
1867 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1872 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1873 unsigned HOST_WIDE_INT
*offset
)
1875 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1876 enum tree_code code
;
1877 tree type
, orig_type
= TREE_TYPE (expr
);
1878 unsigned HOST_WIDE_INT off0
, off1
, st
;
1879 tree orig_expr
= expr
;
1883 type
= TREE_TYPE (expr
);
1884 code
= TREE_CODE (expr
);
1890 if (!cst_and_fits_in_hwi (expr
)
1891 || integer_zerop (expr
))
1894 *offset
= int_cst_value (expr
);
1895 return build_int_cst (orig_type
, 0);
1897 case POINTER_PLUS_EXPR
:
1900 op0
= TREE_OPERAND (expr
, 0);
1901 op1
= TREE_OPERAND (expr
, 1);
1903 op0
= strip_offset_1 (op0
, false, false, &off0
);
1904 op1
= strip_offset_1 (op1
, false, false, &off1
);
1906 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
1907 if (op0
== TREE_OPERAND (expr
, 0)
1908 && op1
== TREE_OPERAND (expr
, 1))
1911 if (integer_zerop (op1
))
1913 else if (integer_zerop (op0
))
1915 if (code
== MINUS_EXPR
)
1916 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1921 expr
= fold_build2 (code
, type
, op0
, op1
);
1923 return fold_convert (orig_type
, expr
);
1926 op1
= TREE_OPERAND (expr
, 1);
1927 if (!cst_and_fits_in_hwi (op1
))
1930 op0
= TREE_OPERAND (expr
, 0);
1931 op0
= strip_offset_1 (op0
, false, false, &off0
);
1932 if (op0
== TREE_OPERAND (expr
, 0))
1935 *offset
= off0
* int_cst_value (op1
);
1936 if (integer_zerop (op0
))
1939 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
1941 return fold_convert (orig_type
, expr
);
1944 case ARRAY_RANGE_REF
:
1948 step
= array_ref_element_size (expr
);
1949 if (!cst_and_fits_in_hwi (step
))
1952 st
= int_cst_value (step
);
1953 op1
= TREE_OPERAND (expr
, 1);
1954 op1
= strip_offset_1 (op1
, false, false, &off1
);
1955 *offset
= off1
* st
;
1958 && integer_zerop (op1
))
1960 /* Strip the component reference completely. */
1961 op0
= TREE_OPERAND (expr
, 0);
1962 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1972 tmp
= component_ref_field_offset (expr
);
1974 && cst_and_fits_in_hwi (tmp
))
1976 /* Strip the component reference completely. */
1977 op0
= TREE_OPERAND (expr
, 0);
1978 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1979 *offset
= off0
+ int_cst_value (tmp
);
1985 op0
= TREE_OPERAND (expr
, 0);
1986 op0
= strip_offset_1 (op0
, true, true, &off0
);
1989 if (op0
== TREE_OPERAND (expr
, 0))
1992 expr
= build_fold_addr_expr (op0
);
1993 return fold_convert (orig_type
, expr
);
1996 inside_addr
= false;
2003 /* Default handling of expressions for that we want to recurse into
2004 the first operand. */
2005 op0
= TREE_OPERAND (expr
, 0);
2006 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2009 if (op0
== TREE_OPERAND (expr
, 0)
2010 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2013 expr
= copy_node (expr
);
2014 TREE_OPERAND (expr
, 0) = op0
;
2016 TREE_OPERAND (expr
, 1) = op1
;
2018 /* Inside address, we might strip the top level component references,
2019 thus changing type of the expression. Handling of ADDR_EXPR
2021 expr
= fold_convert (orig_type
, expr
);
2026 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2029 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2031 return strip_offset_1 (expr
, false, false, offset
);
2034 /* Returns variant of TYPE that can be used as base for different uses.
2035 We return unsigned type with the same precision, which avoids problems
2039 generic_type_for (tree type
)
2041 if (POINTER_TYPE_P (type
))
2042 return unsigned_type_for (type
);
2044 if (TYPE_UNSIGNED (type
))
2047 return unsigned_type_for (type
);
2050 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2051 the bitmap to that we should store it. */
2053 static struct ivopts_data
*fd_ivopts_data
;
2055 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2057 bitmap
*depends_on
= (bitmap
*) data
;
2058 struct version_info
*info
;
2060 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2062 info
= name_info (fd_ivopts_data
, *expr_p
);
2064 if (!info
->inv_id
|| info
->has_nonlin_use
)
2068 *depends_on
= BITMAP_ALLOC (NULL
);
2069 bitmap_set_bit (*depends_on
, info
->inv_id
);
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2077 replacement of the final value of the iv by a direct computation. */
2079 static struct iv_cand
*
2080 add_candidate_1 (struct ivopts_data
*data
,
2081 tree base
, tree step
, bool important
, enum iv_position pos
,
2082 struct iv_use
*use
, gimple incremented_at
)
2085 struct iv_cand
*cand
= NULL
;
2086 tree type
, orig_type
;
2090 orig_type
= TREE_TYPE (base
);
2091 type
= generic_type_for (orig_type
);
2092 if (type
!= orig_type
)
2094 base
= fold_convert (type
, base
);
2095 step
= fold_convert (type
, step
);
2099 for (i
= 0; i
< n_iv_cands (data
); i
++)
2101 cand
= iv_cand (data
, i
);
2103 if (cand
->pos
!= pos
)
2106 if (cand
->incremented_at
!= incremented_at
)
2120 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2121 && operand_equal_p (step
, cand
->iv
->step
, 0))
2125 if (i
== n_iv_cands (data
))
2127 cand
= XCNEW (struct iv_cand
);
2133 cand
->iv
= alloc_iv (base
, step
);
2136 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2138 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2139 cand
->var_after
= cand
->var_before
;
2141 cand
->important
= important
;
2142 cand
->incremented_at
= incremented_at
;
2143 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2146 && TREE_CODE (step
) != INTEGER_CST
)
2148 fd_ivopts_data
= data
;
2149 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2153 dump_cand (dump_file
, cand
);
2156 if (important
&& !cand
->important
)
2158 cand
->important
= true;
2159 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2160 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2165 bitmap_set_bit (use
->related_cands
, i
);
2166 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2167 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2174 /* Returns true if incrementing the induction variable at the end of the LOOP
2177 The purpose is to avoid splitting latch edge with a biv increment, thus
2178 creating a jump, possibly confusing other optimization passes and leaving
2179 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2180 is not available (so we do not have a better alternative), or if the latch
2181 edge is already nonempty. */
2184 allow_ip_end_pos_p (struct loop
*loop
)
2186 if (!ip_normal_pos (loop
))
2189 if (!empty_block_p (ip_end_pos (loop
)))
2195 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2196 position to POS. If USE is not NULL, the candidate is set as related to
2197 it. The candidate computation is scheduled on all available positions. */
2200 add_candidate (struct ivopts_data
*data
,
2201 tree base
, tree step
, bool important
, struct iv_use
*use
)
2203 if (ip_normal_pos (data
->current_loop
))
2204 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2205 if (ip_end_pos (data
->current_loop
)
2206 && allow_ip_end_pos_p (data
->current_loop
))
2207 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2210 /* Add a standard "0 + 1 * iteration" iv candidate for a
2211 type with SIZE bits. */
2214 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2217 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2218 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2222 /* Adds standard iv candidates. */
2225 add_standard_iv_candidates (struct ivopts_data
*data
)
2227 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2229 /* The same for a double-integer type if it is still fast enough. */
2230 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2231 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2235 /* Adds candidates bases on the old induction variable IV. */
2238 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2242 struct iv_cand
*cand
;
2244 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2246 /* The same, but with initial value zero. */
2247 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2248 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2250 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2251 iv
->step
, true, NULL
);
2253 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2254 if (gimple_code (phi
) == GIMPLE_PHI
)
2256 /* Additionally record the possibility of leaving the original iv
2258 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2259 cand
= add_candidate_1 (data
,
2260 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2261 SSA_NAME_DEF_STMT (def
));
2262 cand
->var_before
= iv
->ssa_name
;
2263 cand
->var_after
= def
;
2267 /* Adds candidates based on the old induction variables. */
2270 add_old_ivs_candidates (struct ivopts_data
*data
)
2276 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2278 iv
= ver_info (data
, i
)->iv
;
2279 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2280 add_old_iv_candidates (data
, iv
);
2284 /* Adds candidates based on the value of the induction variable IV and USE. */
2287 add_iv_value_candidates (struct ivopts_data
*data
,
2288 struct iv
*iv
, struct iv_use
*use
)
2290 unsigned HOST_WIDE_INT offset
;
2294 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2296 /* The same, but with initial value zero. Make such variable important,
2297 since it is generic enough so that possibly many uses may be based
2299 basetype
= TREE_TYPE (iv
->base
);
2300 if (POINTER_TYPE_P (basetype
))
2301 basetype
= sizetype
;
2302 add_candidate (data
, build_int_cst (basetype
, 0),
2303 iv
->step
, true, use
);
2305 /* Third, try removing the constant offset. Make sure to even
2306 add a candidate for &a[0] vs. (T *)&a. */
2307 base
= strip_offset (iv
->base
, &offset
);
2309 || base
!= iv
->base
)
2310 add_candidate (data
, base
, iv
->step
, false, use
);
2313 /* Adds candidates based on the uses. */
2316 add_derived_ivs_candidates (struct ivopts_data
*data
)
2320 for (i
= 0; i
< n_iv_uses (data
); i
++)
2322 struct iv_use
*use
= iv_use (data
, i
);
2329 case USE_NONLINEAR_EXPR
:
2332 /* Just add the ivs based on the value of the iv used here. */
2333 add_iv_value_candidates (data
, use
->iv
, use
);
2342 /* Record important candidates and add them to related_cands bitmaps
2346 record_important_candidates (struct ivopts_data
*data
)
2351 for (i
= 0; i
< n_iv_cands (data
); i
++)
2353 struct iv_cand
*cand
= iv_cand (data
, i
);
2355 if (cand
->important
)
2356 bitmap_set_bit (data
->important_candidates
, i
);
2359 data
->consider_all_candidates
= (n_iv_cands (data
)
2360 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2362 if (data
->consider_all_candidates
)
2364 /* We will not need "related_cands" bitmaps in this case,
2365 so release them to decrease peak memory consumption. */
2366 for (i
= 0; i
< n_iv_uses (data
); i
++)
2368 use
= iv_use (data
, i
);
2369 BITMAP_FREE (use
->related_cands
);
2374 /* Add important candidates to the related_cands bitmaps. */
2375 for (i
= 0; i
< n_iv_uses (data
); i
++)
2376 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2377 data
->important_candidates
);
2381 /* Finds the candidates for the induction variables. */
2384 find_iv_candidates (struct ivopts_data
*data
)
2386 /* Add commonly used ivs. */
2387 add_standard_iv_candidates (data
);
2389 /* Add old induction variables. */
2390 add_old_ivs_candidates (data
);
2392 /* Add induction variables derived from uses. */
2393 add_derived_ivs_candidates (data
);
2395 /* Record the important candidates. */
2396 record_important_candidates (data
);
2399 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2400 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2401 we allocate a simple list to every use. */
2404 alloc_use_cost_map (struct ivopts_data
*data
)
2406 unsigned i
, size
, s
, j
;
2408 for (i
= 0; i
< n_iv_uses (data
); i
++)
2410 struct iv_use
*use
= iv_use (data
, i
);
2413 if (data
->consider_all_candidates
)
2414 size
= n_iv_cands (data
);
2418 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2423 /* Round up to the power of two, so that moduling by it is fast. */
2424 for (size
= 1; size
< s
; size
<<= 1)
2428 use
->n_map_members
= size
;
2429 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2433 /* Returns description of computation cost of expression whose runtime
2434 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2437 new_cost (unsigned runtime
, unsigned complexity
)
2441 cost
.cost
= runtime
;
2442 cost
.complexity
= complexity
;
2447 /* Adds costs COST1 and COST2. */
2450 add_costs (comp_cost cost1
, comp_cost cost2
)
2452 cost1
.cost
+= cost2
.cost
;
2453 cost1
.complexity
+= cost2
.complexity
;
2457 /* Subtracts costs COST1 and COST2. */
2460 sub_costs (comp_cost cost1
, comp_cost cost2
)
2462 cost1
.cost
-= cost2
.cost
;
2463 cost1
.complexity
-= cost2
.complexity
;
2468 /* Returns a negative number if COST1 < COST2, a positive number if
2469 COST1 > COST2, and 0 if COST1 = COST2. */
2472 compare_costs (comp_cost cost1
, comp_cost cost2
)
2474 if (cost1
.cost
== cost2
.cost
)
2475 return cost1
.complexity
- cost2
.complexity
;
2477 return cost1
.cost
- cost2
.cost
;
2480 /* Returns true if COST is infinite. */
2483 infinite_cost_p (comp_cost cost
)
2485 return cost
.cost
== INFTY
;
2488 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2489 on invariants DEPENDS_ON and that the value used in expressing it
2493 set_use_iv_cost (struct ivopts_data
*data
,
2494 struct iv_use
*use
, struct iv_cand
*cand
,
2495 comp_cost cost
, bitmap depends_on
, tree value
)
2499 if (infinite_cost_p (cost
))
2501 BITMAP_FREE (depends_on
);
2505 if (data
->consider_all_candidates
)
2507 use
->cost_map
[cand
->id
].cand
= cand
;
2508 use
->cost_map
[cand
->id
].cost
= cost
;
2509 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2510 use
->cost_map
[cand
->id
].value
= value
;
2514 /* n_map_members is a power of two, so this computes modulo. */
2515 s
= cand
->id
& (use
->n_map_members
- 1);
2516 for (i
= s
; i
< use
->n_map_members
; i
++)
2517 if (!use
->cost_map
[i
].cand
)
2519 for (i
= 0; i
< s
; i
++)
2520 if (!use
->cost_map
[i
].cand
)
2526 use
->cost_map
[i
].cand
= cand
;
2527 use
->cost_map
[i
].cost
= cost
;
2528 use
->cost_map
[i
].depends_on
= depends_on
;
2529 use
->cost_map
[i
].value
= value
;
2532 /* Gets cost of (USE, CANDIDATE) pair. */
2534 static struct cost_pair
*
2535 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2536 struct iv_cand
*cand
)
2539 struct cost_pair
*ret
;
2544 if (data
->consider_all_candidates
)
2546 ret
= use
->cost_map
+ cand
->id
;
2553 /* n_map_members is a power of two, so this computes modulo. */
2554 s
= cand
->id
& (use
->n_map_members
- 1);
2555 for (i
= s
; i
< use
->n_map_members
; i
++)
2556 if (use
->cost_map
[i
].cand
== cand
)
2557 return use
->cost_map
+ i
;
2559 for (i
= 0; i
< s
; i
++)
2560 if (use
->cost_map
[i
].cand
== cand
)
2561 return use
->cost_map
+ i
;
2566 /* Returns estimate on cost of computing SEQ. */
2569 seq_cost (rtx seq
, bool speed
)
2574 for (; seq
; seq
= NEXT_INSN (seq
))
2576 set
= single_set (seq
);
2578 cost
+= rtx_cost (set
, SET
,speed
);
2586 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2588 produce_memory_decl_rtl (tree obj
, int *regno
)
2593 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2595 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2596 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2597 SET_SYMBOL_REF_DECL (x
, obj
);
2598 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2599 targetm
.encode_section_info (obj
, x
, true);
2603 x
= gen_raw_REG (Pmode
, (*regno
)++);
2604 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2610 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2611 walk_tree. DATA contains the actual fake register number. */
2614 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2616 tree obj
= NULL_TREE
;
2618 int *regno
= (int *) data
;
2620 switch (TREE_CODE (*expr_p
))
2623 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2624 handled_component_p (*expr_p
);
2625 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2628 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2629 x
= produce_memory_decl_rtl (obj
, regno
);
2634 obj
= SSA_NAME_VAR (*expr_p
);
2635 if (!DECL_RTL_SET_P (obj
))
2636 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2645 if (DECL_RTL_SET_P (obj
))
2648 if (DECL_MODE (obj
) == BLKmode
)
2649 x
= produce_memory_decl_rtl (obj
, regno
);
2651 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2661 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2662 SET_DECL_RTL (obj
, x
);
2668 /* Determines cost of the computation of EXPR. */
2671 computation_cost (tree expr
, bool speed
)
2674 tree type
= TREE_TYPE (expr
);
2676 /* Avoid using hard regs in ways which may be unsupported. */
2677 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2678 enum function_frequency real_frequency
= cfun
->function_frequency
;
2680 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
2681 crtl
->maybe_hot_insn_p
= speed
;
2682 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2684 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2687 default_rtl_profile ();
2688 cfun
->function_frequency
= real_frequency
;
2690 cost
= seq_cost (seq
, speed
);
2692 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
), speed
);
2697 /* Returns variable containing the value of candidate CAND at statement AT. */
2700 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2702 if (stmt_after_increment (loop
, cand
, stmt
))
2703 return cand
->var_after
;
2705 return cand
->var_before
;
2708 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2709 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2712 tree_int_cst_sign_bit (const_tree t
)
2714 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2715 unsigned HOST_WIDE_INT w
;
2717 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2718 w
= TREE_INT_CST_LOW (t
);
2721 w
= TREE_INT_CST_HIGH (t
);
2722 bitno
-= HOST_BITS_PER_WIDE_INT
;
2725 return (w
>> bitno
) & 1;
2728 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2729 same precision that is at least as wide as the precision of TYPE, stores
2730 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2734 determine_common_wider_type (tree
*a
, tree
*b
)
2736 tree wider_type
= NULL
;
2738 tree atype
= TREE_TYPE (*a
);
2740 if (CONVERT_EXPR_P (*a
))
2742 suba
= TREE_OPERAND (*a
, 0);
2743 wider_type
= TREE_TYPE (suba
);
2744 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2750 if (CONVERT_EXPR_P (*b
))
2752 subb
= TREE_OPERAND (*b
, 0);
2753 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2764 /* Determines the expression by that USE is expressed from induction variable
2765 CAND at statement AT in LOOP. The expression is stored in a decomposed
2766 form into AFF. Returns false if USE cannot be expressed using CAND. */
2769 get_computation_aff (struct loop
*loop
,
2770 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2771 struct affine_tree_combination
*aff
)
2773 tree ubase
= use
->iv
->base
;
2774 tree ustep
= use
->iv
->step
;
2775 tree cbase
= cand
->iv
->base
;
2776 tree cstep
= cand
->iv
->step
, cstep_common
;
2777 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2778 tree common_type
, var
;
2780 aff_tree cbase_aff
, var_aff
;
2783 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2785 /* We do not have a precision to express the values of use. */
2789 var
= var_at_stmt (loop
, cand
, at
);
2790 uutype
= unsigned_type_for (utype
);
2792 /* If the conversion is not noop, perform it. */
2793 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2795 cstep
= fold_convert (uutype
, cstep
);
2796 cbase
= fold_convert (uutype
, cbase
);
2797 var
= fold_convert (uutype
, var
);
2800 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2803 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2804 type, we achieve better folding by computing their difference in this
2805 wider type, and cast the result to UUTYPE. We do not need to worry about
2806 overflows, as all the arithmetics will in the end be performed in UUTYPE
2808 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2810 /* use = ubase - ratio * cbase + ratio * var. */
2811 tree_to_aff_combination (ubase
, common_type
, aff
);
2812 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2813 tree_to_aff_combination (var
, uutype
, &var_aff
);
2815 /* We need to shift the value if we are after the increment. */
2816 if (stmt_after_increment (loop
, cand
, at
))
2820 if (common_type
!= uutype
)
2821 cstep_common
= fold_convert (common_type
, cstep
);
2823 cstep_common
= cstep
;
2825 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2826 aff_combination_add (&cbase_aff
, &cstep_aff
);
2829 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2830 aff_combination_add (aff
, &cbase_aff
);
2831 if (common_type
!= uutype
)
2832 aff_combination_convert (aff
, uutype
);
2834 aff_combination_scale (&var_aff
, rat
);
2835 aff_combination_add (aff
, &var_aff
);
2840 /* Determines the expression by that USE is expressed from induction variable
2841 CAND at statement AT in LOOP. The computation is unshared. */
2844 get_computation_at (struct loop
*loop
,
2845 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
2848 tree type
= TREE_TYPE (use
->iv
->base
);
2850 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
2852 unshare_aff_combination (&aff
);
2853 return fold_convert (type
, aff_combination_to_tree (&aff
));
2856 /* Determines the expression by that USE is expressed from induction variable
2857 CAND in LOOP. The computation is unshared. */
2860 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2862 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2865 /* Returns cost of addition in MODE. */
2868 add_cost (enum machine_mode mode
, bool speed
)
2870 static unsigned costs
[NUM_MACHINE_MODES
];
2878 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2879 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2880 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
2885 cost
= seq_cost (seq
, speed
);
2891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2892 fprintf (dump_file
, "Addition in %s costs %d\n",
2893 GET_MODE_NAME (mode
), cost
);
2897 /* Entry in a hashtable of already known costs for multiplication. */
2900 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2901 enum machine_mode mode
; /* In mode. */
2902 unsigned cost
; /* The cost. */
2905 /* Counts hash value for the ENTRY. */
2908 mbc_entry_hash (const void *entry
)
2910 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
2912 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2915 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2918 mbc_entry_eq (const void *entry1
, const void *entry2
)
2920 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
2921 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
2923 return (e1
->mode
== e2
->mode
2924 && e1
->cst
== e2
->cst
);
2927 /* Returns cost of multiplication by constant CST in MODE. */
2930 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
2932 static htab_t costs
;
2933 struct mbc_entry
**cached
, act
;
2938 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2942 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2944 return (*cached
)->cost
;
2946 *cached
= XNEW (struct mbc_entry
);
2947 (*cached
)->mode
= mode
;
2948 (*cached
)->cst
= cst
;
2951 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2952 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
2956 cost
= seq_cost (seq
, speed
);
2958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2959 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2960 (int) cst
, GET_MODE_NAME (mode
), cost
);
2962 (*cached
)->cost
= cost
;
2967 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2968 validity for a memory reference accessing memory of mode MODE. */
2971 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
)
2973 #define MAX_RATIO 128
2974 static sbitmap valid_mult
[MAX_MACHINE_MODE
];
2976 if (!valid_mult
[mode
])
2978 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
2982 valid_mult
[mode
] = sbitmap_alloc (2 * MAX_RATIO
+ 1);
2983 sbitmap_zero (valid_mult
[mode
]);
2984 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2985 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2987 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
2988 if (memory_address_p (mode
, addr
))
2989 SET_BIT (valid_mult
[mode
], i
+ MAX_RATIO
);
2992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2994 fprintf (dump_file
, " allowed multipliers:");
2995 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2996 if (TEST_BIT (valid_mult
[mode
], i
+ MAX_RATIO
))
2997 fprintf (dump_file
, " %d", (int) i
);
2998 fprintf (dump_file
, "\n");
2999 fprintf (dump_file
, "\n");
3003 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3006 return TEST_BIT (valid_mult
[mode
], ratio
+ MAX_RATIO
);
3009 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3010 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3011 variable is omitted. Compute the cost for a memory reference that accesses
3012 a memory location of mode MEM_MODE.
3014 TODO -- there must be some better way. This all is quite crude. */
3017 get_address_cost (bool symbol_present
, bool var_present
,
3018 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3019 enum machine_mode mem_mode
,
3022 static bool initialized
[MAX_MACHINE_MODE
];
3023 static HOST_WIDE_INT rat
[MAX_MACHINE_MODE
], off
[MAX_MACHINE_MODE
];
3024 static HOST_WIDE_INT min_offset
[MAX_MACHINE_MODE
], max_offset
[MAX_MACHINE_MODE
];
3025 static unsigned costs
[MAX_MACHINE_MODE
][2][2][2][2];
3026 unsigned cost
, acost
, complexity
;
3027 bool offset_p
, ratio_p
;
3028 HOST_WIDE_INT s_offset
;
3029 unsigned HOST_WIDE_INT mask
;
3032 if (!initialized
[mem_mode
])
3035 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
3036 int old_cse_not_expected
;
3037 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3038 rtx seq
, addr
, base
;
3041 initialized
[mem_mode
] = true;
3043 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3045 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3046 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3048 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3049 if (!memory_address_p (mem_mode
, addr
))
3052 max_offset
[mem_mode
] = i
== start
? 0 : i
>> 1;
3053 off
[mem_mode
] = max_offset
[mem_mode
];
3055 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3057 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3058 if (!memory_address_p (mem_mode
, addr
))
3061 min_offset
[mem_mode
] = i
== start
? 0 : -(i
>> 1);
3063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3065 fprintf (dump_file
, "get_address_cost:\n");
3066 fprintf (dump_file
, " min offset %s %d\n",
3067 GET_MODE_NAME (mem_mode
),
3068 (int) min_offset
[mem_mode
]);
3069 fprintf (dump_file
, " max offset %s %d\n",
3070 GET_MODE_NAME (mem_mode
),
3071 (int) max_offset
[mem_mode
]);
3075 for (i
= 2; i
<= MAX_RATIO
; i
++)
3076 if (multiplier_allowed_in_address_p (i
, mem_mode
))
3082 /* Compute the cost of various addressing modes. */
3084 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3085 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3087 for (i
= 0; i
< 16; i
++)
3090 var_p
= (i
>> 1) & 1;
3091 off_p
= (i
>> 2) & 1;
3092 rat_p
= (i
>> 3) & 1;
3096 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
,
3097 gen_int_mode (rat
[mem_mode
], Pmode
));
3100 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3104 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3105 /* ??? We can run into trouble with some backends by presenting
3106 it with symbols which haven't been properly passed through
3107 targetm.encode_section_info. By setting the local bit, we
3108 enhance the probability of things working. */
3109 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3112 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3113 gen_rtx_fmt_ee (PLUS
, Pmode
,
3115 gen_int_mode (off
[mem_mode
],
3119 base
= gen_int_mode (off
[mem_mode
], Pmode
);
3124 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3127 /* To avoid splitting addressing modes, pretend that no cse will
3129 old_cse_not_expected
= cse_not_expected
;
3130 cse_not_expected
= true;
3131 addr
= memory_address (mem_mode
, addr
);
3132 cse_not_expected
= old_cse_not_expected
;
3136 acost
= seq_cost (seq
, speed
);
3137 acost
+= address_cost (addr
, mem_mode
, speed
);
3141 costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
] = acost
;
3144 /* On some targets, it is quite expensive to load symbol to a register,
3145 which makes addresses that contain symbols look much more expensive.
3146 However, the symbol will have to be loaded in any case before the
3147 loop (and quite likely we have it in register already), so it does not
3148 make much sense to penalize them too heavily. So make some final
3149 tweaks for the SYMBOL_PRESENT modes:
3151 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3152 var is cheaper, use this mode with small penalty.
3153 If VAR_PRESENT is true, try whether the mode with
3154 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3155 if this is the case, use it. */
3156 add_c
= add_cost (Pmode
, speed
);
3157 for (i
= 0; i
< 8; i
++)
3160 off_p
= (i
>> 1) & 1;
3161 rat_p
= (i
>> 2) & 1;
3163 acost
= costs
[mem_mode
][0][1][off_p
][rat_p
] + 1;
3167 if (acost
< costs
[mem_mode
][1][var_p
][off_p
][rat_p
])
3168 costs
[mem_mode
][1][var_p
][off_p
][rat_p
] = acost
;
3171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3173 fprintf (dump_file
, "Address costs:\n");
3175 for (i
= 0; i
< 16; i
++)
3178 var_p
= (i
>> 1) & 1;
3179 off_p
= (i
>> 2) & 1;
3180 rat_p
= (i
>> 3) & 1;
3182 fprintf (dump_file
, " ");
3184 fprintf (dump_file
, "sym + ");
3186 fprintf (dump_file
, "var + ");
3188 fprintf (dump_file
, "cst + ");
3190 fprintf (dump_file
, "rat * ");
3192 acost
= costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
];
3193 fprintf (dump_file
, "index costs %d\n", acost
);
3195 fprintf (dump_file
, "\n");
3199 bits
= GET_MODE_BITSIZE (Pmode
);
3200 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3202 if ((offset
>> (bits
- 1) & 1))
3207 offset_p
= (s_offset
!= 0
3208 && min_offset
[mem_mode
] <= s_offset
3209 && s_offset
<= max_offset
[mem_mode
]);
3210 ratio_p
= (ratio
!= 1
3211 && multiplier_allowed_in_address_p (ratio
, mem_mode
));
3213 if (ratio
!= 1 && !ratio_p
)
3214 cost
+= multiply_by_cost (ratio
, Pmode
, speed
);
3216 if (s_offset
&& !offset_p
&& !symbol_present
)
3217 cost
+= add_cost (Pmode
, speed
);
3219 acost
= costs
[mem_mode
][symbol_present
][var_present
][offset_p
][ratio_p
];
3220 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3221 return new_cost (cost
+ acost
, complexity
);
3224 /* Estimates cost of forcing expression EXPR into a variable. */
3227 force_expr_to_var_cost (tree expr
, bool speed
)
3229 static bool costs_initialized
= false;
3230 static unsigned integer_cost
[2];
3231 static unsigned symbol_cost
[2];
3232 static unsigned address_cost
[2];
3234 comp_cost cost0
, cost1
, cost
;
3235 enum machine_mode mode
;
3237 if (!costs_initialized
)
3239 tree type
= build_pointer_type (integer_type_node
);
3244 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3245 TREE_STATIC (var
) = 1;
3246 x
= produce_memory_decl_rtl (var
, NULL
);
3247 SET_DECL_RTL (var
, x
);
3249 addr
= build1 (ADDR_EXPR
, type
, var
);
3252 for (i
= 0; i
< 2; i
++)
3254 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3257 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3260 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3262 build_int_cst (sizetype
, 2000)), i
) + 1;
3263 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3265 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3266 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3267 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3268 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3269 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3270 fprintf (dump_file
, "\n");
3274 costs_initialized
= true;
3279 if (SSA_VAR_P (expr
))
3282 if (is_gimple_min_invariant (expr
))
3284 if (TREE_CODE (expr
) == INTEGER_CST
)
3285 return new_cost (integer_cost
[speed
], 0);
3287 if (TREE_CODE (expr
) == ADDR_EXPR
)
3289 tree obj
= TREE_OPERAND (expr
, 0);
3291 if (TREE_CODE (obj
) == VAR_DECL
3292 || TREE_CODE (obj
) == PARM_DECL
3293 || TREE_CODE (obj
) == RESULT_DECL
)
3294 return new_cost (symbol_cost
[speed
], 0);
3297 return new_cost (address_cost
[speed
], 0);
3300 switch (TREE_CODE (expr
))
3302 case POINTER_PLUS_EXPR
:
3306 op0
= TREE_OPERAND (expr
, 0);
3307 op1
= TREE_OPERAND (expr
, 1);
3311 if (is_gimple_val (op0
))
3314 cost0
= force_expr_to_var_cost (op0
, speed
);
3316 if (is_gimple_val (op1
))
3319 cost1
= force_expr_to_var_cost (op1
, speed
);
3324 op0
= TREE_OPERAND (expr
, 0);
3328 if (is_gimple_val (op0
))
3331 cost0
= force_expr_to_var_cost (op0
, speed
);
3337 /* Just an arbitrary value, FIXME. */
3338 return new_cost (target_spill_cost
[speed
], 0);
3341 mode
= TYPE_MODE (TREE_TYPE (expr
));
3342 switch (TREE_CODE (expr
))
3344 case POINTER_PLUS_EXPR
:
3348 cost
= new_cost (add_cost (mode
, speed
), 0);
3352 if (cst_and_fits_in_hwi (op0
))
3353 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3354 else if (cst_and_fits_in_hwi (op1
))
3355 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3357 return new_cost (target_spill_cost
[speed
], 0);
3364 cost
= add_costs (cost
, cost0
);
3365 cost
= add_costs (cost
, cost1
);
3367 /* Bound the cost by target_spill_cost. The parts of complicated
3368 computations often are either loop invariant or at least can
3369 be shared between several iv uses, so letting this grow without
3370 limits would not give reasonable results. */
3371 if (cost
.cost
> target_spill_cost
[speed
])
3372 cost
.cost
= target_spill_cost
[speed
];
3377 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3378 invariants the computation depends on. */
3381 force_var_cost (struct ivopts_data
*data
,
3382 tree expr
, bitmap
*depends_on
)
3386 fd_ivopts_data
= data
;
3387 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3390 return force_expr_to_var_cost (expr
, data
->speed
);
3393 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3394 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3395 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3396 invariants the computation depends on. */
3399 split_address_cost (struct ivopts_data
*data
,
3400 tree addr
, bool *symbol_present
, bool *var_present
,
3401 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3404 HOST_WIDE_INT bitsize
;
3405 HOST_WIDE_INT bitpos
;
3407 enum machine_mode mode
;
3408 int unsignedp
, volatilep
;
3410 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3411 &unsignedp
, &volatilep
, false);
3414 || bitpos
% BITS_PER_UNIT
!= 0
3415 || TREE_CODE (core
) != VAR_DECL
)
3417 *symbol_present
= false;
3418 *var_present
= true;
3419 fd_ivopts_data
= data
;
3420 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3421 return new_cost (target_spill_cost
[data
->speed
], 0);
3424 *offset
+= bitpos
/ BITS_PER_UNIT
;
3425 if (TREE_STATIC (core
)
3426 || DECL_EXTERNAL (core
))
3428 *symbol_present
= true;
3429 *var_present
= false;
3433 *symbol_present
= false;
3434 *var_present
= true;
3438 /* Estimates cost of expressing difference of addresses E1 - E2 as
3439 var + symbol + offset. The value of offset is added to OFFSET,
3440 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3441 part is missing. DEPENDS_ON is a set of the invariants the computation
3445 ptr_difference_cost (struct ivopts_data
*data
,
3446 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3447 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3449 HOST_WIDE_INT diff
= 0;
3450 aff_tree aff_e1
, aff_e2
;
3453 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3455 if (ptr_difference_const (e1
, e2
, &diff
))
3458 *symbol_present
= false;
3459 *var_present
= false;
3463 if (integer_zerop (e2
))
3464 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3465 symbol_present
, var_present
, offset
, depends_on
);
3467 *symbol_present
= false;
3468 *var_present
= true;
3470 type
= signed_type_for (TREE_TYPE (e1
));
3471 tree_to_aff_combination (e1
, type
, &aff_e1
);
3472 tree_to_aff_combination (e2
, type
, &aff_e2
);
3473 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3474 aff_combination_add (&aff_e1
, &aff_e2
);
3476 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3479 /* Estimates cost of expressing difference E1 - E2 as
3480 var + symbol + offset. The value of offset is added to OFFSET,
3481 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3482 part is missing. DEPENDS_ON is a set of the invariants the computation
3486 difference_cost (struct ivopts_data
*data
,
3487 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3488 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3490 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3491 unsigned HOST_WIDE_INT off1
, off2
;
3492 aff_tree aff_e1
, aff_e2
;
3495 e1
= strip_offset (e1
, &off1
);
3496 e2
= strip_offset (e2
, &off2
);
3497 *offset
+= off1
- off2
;
3502 if (TREE_CODE (e1
) == ADDR_EXPR
)
3503 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3504 offset
, depends_on
);
3505 *symbol_present
= false;
3507 if (operand_equal_p (e1
, e2
, 0))
3509 *var_present
= false;
3513 *var_present
= true;
3515 if (integer_zerop (e2
))
3516 return force_var_cost (data
, e1
, depends_on
);
3518 if (integer_zerop (e1
))
3520 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3521 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3525 type
= signed_type_for (TREE_TYPE (e1
));
3526 tree_to_aff_combination (e1
, type
, &aff_e1
);
3527 tree_to_aff_combination (e2
, type
, &aff_e2
);
3528 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3529 aff_combination_add (&aff_e1
, &aff_e2
);
3531 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3534 /* Determines the cost of the computation by that USE is expressed
3535 from induction variable CAND. If ADDRESS_P is true, we just need
3536 to create an address from it, otherwise we want to get it into
3537 register. A set of invariants we depend on is stored in
3538 DEPENDS_ON. AT is the statement at that the value is computed. */
3541 get_computation_cost_at (struct ivopts_data
*data
,
3542 struct iv_use
*use
, struct iv_cand
*cand
,
3543 bool address_p
, bitmap
*depends_on
, gimple at
)
3545 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3547 tree utype
= TREE_TYPE (ubase
), ctype
;
3548 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3549 HOST_WIDE_INT ratio
, aratio
;
3550 bool var_present
, symbol_present
;
3553 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3557 /* Only consider real candidates. */
3559 return infinite_cost
;
3561 cbase
= cand
->iv
->base
;
3562 cstep
= cand
->iv
->step
;
3563 ctype
= TREE_TYPE (cbase
);
3565 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3567 /* We do not have a precision to express the values of use. */
3568 return infinite_cost
;
3573 /* Do not try to express address of an object with computation based
3574 on address of a different object. This may cause problems in rtl
3575 level alias analysis (that does not expect this to be happening,
3576 as this is illegal in C), and would be unlikely to be useful
3578 if (use
->iv
->base_object
3579 && cand
->iv
->base_object
3580 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3581 return infinite_cost
;
3584 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3586 /* TODO -- add direct handling of this case. */
3590 /* CSTEPI is removed from the offset in case statement is after the
3591 increment. If the step is not constant, we use zero instead.
3592 This is a bit imprecise (there is the extra addition), but
3593 redundancy elimination is likely to transform the code so that
3594 it uses value of the variable before increment anyway,
3595 so it is not that much unrealistic. */
3596 if (cst_and_fits_in_hwi (cstep
))
3597 cstepi
= int_cst_value (cstep
);
3601 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3602 return infinite_cost
;
3604 if (double_int_fits_in_shwi_p (rat
))
3605 ratio
= double_int_to_shwi (rat
);
3607 return infinite_cost
;
3610 ctype
= TREE_TYPE (cbase
);
3612 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3613 or ratio == 1, it is better to handle this like
3615 ubase - ratio * cbase + ratio * var
3617 (also holds in the case ratio == -1, TODO. */
3619 if (cst_and_fits_in_hwi (cbase
))
3621 offset
= - ratio
* int_cst_value (cbase
);
3622 cost
= difference_cost (data
,
3623 ubase
, build_int_cst (utype
, 0),
3624 &symbol_present
, &var_present
, &offset
,
3627 else if (ratio
== 1)
3629 cost
= difference_cost (data
,
3631 &symbol_present
, &var_present
, &offset
,
3635 && !POINTER_TYPE_P (ctype
)
3636 && multiplier_allowed_in_address_p (ratio
,
3637 TYPE_MODE (TREE_TYPE (utype
))))
3640 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
3641 cost
= difference_cost (data
,
3643 &symbol_present
, &var_present
, &offset
,
3648 cost
= force_var_cost (data
, cbase
, depends_on
);
3649 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
3650 cost
= add_costs (cost
,
3651 difference_cost (data
,
3652 ubase
, build_int_cst (utype
, 0),
3653 &symbol_present
, &var_present
,
3654 &offset
, depends_on
));
3657 /* If we are after the increment, the value of the candidate is higher by
3659 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3660 offset
-= ratio
* cstepi
;
3662 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3663 (symbol/var1/const parts may be omitted). If we are looking for an
3664 address, find the cost of addressing this. */
3666 return add_costs (cost
,
3667 get_address_cost (symbol_present
, var_present
,
3669 TYPE_MODE (TREE_TYPE (utype
)), speed
));
3671 /* Otherwise estimate the costs for computing the expression. */
3672 if (!symbol_present
&& !var_present
&& !offset
)
3675 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
3679 /* Symbol + offset should be compile-time computable so consider that they
3680 are added once to the variable, if present. */
3681 if (var_present
&& (symbol_present
|| offset
))
3682 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
)
3683 / AVG_LOOP_NITER (data
->current_loop
);
3685 /* Having offset does not affect runtime cost in case it is added to
3686 symbol, but it increases complexity. */
3690 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
3692 aratio
= ratio
> 0 ? ratio
: -ratio
;
3694 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
3698 /* Just get the expression, expand it and measure the cost. */
3699 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3702 return infinite_cost
;
3705 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3707 return new_cost (computation_cost (comp
, speed
), 0);
3711 /* Determines the cost of the computation by that USE is expressed
3712 from induction variable CAND. If ADDRESS_P is true, we just need
3713 to create an address from it, otherwise we want to get it into
3714 register. A set of invariants we depend on is stored in
3718 get_computation_cost (struct ivopts_data
*data
,
3719 struct iv_use
*use
, struct iv_cand
*cand
,
3720 bool address_p
, bitmap
*depends_on
)
3722 return get_computation_cost_at (data
,
3723 use
, cand
, address_p
, depends_on
, use
->stmt
);
3726 /* Determines cost of basing replacement of USE on CAND in a generic
3730 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3731 struct iv_use
*use
, struct iv_cand
*cand
)
3736 /* The simple case first -- if we need to express value of the preserved
3737 original biv, the cost is 0. This also prevents us from counting the
3738 cost of increment twice -- once at this use and once in the cost of
3740 if (cand
->pos
== IP_ORIGINAL
3741 && cand
->incremented_at
== use
->stmt
)
3743 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
);
3747 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3748 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3750 return !infinite_cost_p (cost
);
3753 /* Determines cost of basing replacement of USE on CAND in an address. */
3756 determine_use_iv_cost_address (struct ivopts_data
*data
,
3757 struct iv_use
*use
, struct iv_cand
*cand
)
3760 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3762 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3764 return !infinite_cost_p (cost
);
3767 /* Computes value of candidate CAND at position AT in iteration NITER, and
3768 stores it to VAL. */
3771 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
3774 aff_tree step
, delta
, nit
;
3775 struct iv
*iv
= cand
->iv
;
3776 tree type
= TREE_TYPE (iv
->base
);
3777 tree steptype
= type
;
3778 if (POINTER_TYPE_P (type
))
3779 steptype
= sizetype
;
3781 tree_to_aff_combination (iv
->step
, steptype
, &step
);
3782 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
3783 aff_combination_convert (&nit
, steptype
);
3784 aff_combination_mult (&nit
, &step
, &delta
);
3785 if (stmt_after_increment (loop
, cand
, at
))
3786 aff_combination_add (&delta
, &step
);
3788 tree_to_aff_combination (iv
->base
, type
, val
);
3789 aff_combination_add (val
, &delta
);
3792 /* Returns period of induction variable iv. */
3795 iv_period (struct iv
*iv
)
3797 tree step
= iv
->step
, period
, type
;
3800 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3802 /* Period of the iv is gcd (step, type range). Since type range is power
3803 of two, it suffices to determine the maximum power of two that divides
3805 pow2div
= num_ending_zeros (step
);
3806 type
= unsigned_type_for (TREE_TYPE (step
));
3808 period
= build_low_bits_mask (type
,
3809 (TYPE_PRECISION (type
)
3810 - tree_low_cst (pow2div
, 1)));
3815 /* Returns the comparison operator used when eliminating the iv USE. */
3817 static enum tree_code
3818 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3820 struct loop
*loop
= data
->current_loop
;
3824 ex_bb
= gimple_bb (use
->stmt
);
3825 exit
= EDGE_SUCC (ex_bb
, 0);
3826 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3827 exit
= EDGE_SUCC (ex_bb
, 1);
3829 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3832 /* Check whether it is possible to express the condition in USE by comparison
3833 of candidate CAND. If so, store the value compared with to BOUND. */
3836 may_eliminate_iv (struct ivopts_data
*data
,
3837 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3842 struct loop
*loop
= data
->current_loop
;
3845 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3848 /* For now works only for exits that dominate the loop latch.
3849 TODO: extend to other conditions inside loop body. */
3850 ex_bb
= gimple_bb (use
->stmt
);
3851 if (use
->stmt
!= last_stmt (ex_bb
)
3852 || gimple_code (use
->stmt
) != GIMPLE_COND
3853 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3856 exit
= EDGE_SUCC (ex_bb
, 0);
3857 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3858 exit
= EDGE_SUCC (ex_bb
, 1);
3859 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3862 nit
= niter_for_exit (data
, exit
);
3866 /* Determine whether we can use the variable to test the exit condition.
3867 This is the case iff the period of the induction variable is greater
3868 than the number of iterations for which the exit condition is true. */
3869 period
= iv_period (cand
->iv
);
3871 /* If the number of iterations is constant, compare against it directly. */
3872 if (TREE_CODE (nit
) == INTEGER_CST
)
3874 if (!tree_int_cst_lt (nit
, period
))
3878 /* If not, and if this is the only possible exit of the loop, see whether
3879 we can get a conservative estimate on the number of iterations of the
3880 entire loop and compare against that instead. */
3881 else if (loop_only_exit_p (loop
, exit
))
3883 double_int period_value
, max_niter
;
3884 if (!estimated_loop_iterations (loop
, true, &max_niter
))
3886 period_value
= tree_to_double_int (period
);
3887 if (double_int_ucmp (max_niter
, period_value
) >= 0)
3891 /* Otherwise, punt. */
3895 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
3897 *bound
= aff_combination_to_tree (&bnd
);
3898 /* It is unlikely that computing the number of iterations using division
3899 would be more profitable than keeping the original induction variable. */
3900 if (expression_expensive_p (*bound
))
3905 /* Determines cost of basing replacement of USE on CAND in a condition. */
3908 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3909 struct iv_use
*use
, struct iv_cand
*cand
)
3911 tree bound
= NULL_TREE
;
3913 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
3914 comp_cost elim_cost
, express_cost
, cost
;
3917 /* Only consider real candidates. */
3920 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
);
3924 /* Try iv elimination. */
3925 if (may_eliminate_iv (data
, use
, cand
, &bound
))
3927 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
3928 /* The bound is a loop invariant, so it will be only computed
3930 elim_cost
.cost
/= AVG_LOOP_NITER (data
->current_loop
);
3933 elim_cost
= infinite_cost
;
3935 /* Try expressing the original giv. If it is compared with an invariant,
3936 note that we cannot get rid of it. */
3937 ok
= extract_cond_operands (data
, use
->stmt
, NULL
, NULL
, NULL
, &cmp_iv
);
3940 express_cost
= get_computation_cost (data
, use
, cand
, false,
3941 &depends_on_express
);
3942 fd_ivopts_data
= data
;
3943 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
3945 /* Choose the better approach, preferring the eliminated IV. */
3946 if (compare_costs (elim_cost
, express_cost
) <= 0)
3949 depends_on
= depends_on_elim
;
3950 depends_on_elim
= NULL
;
3954 cost
= express_cost
;
3955 depends_on
= depends_on_express
;
3956 depends_on_express
= NULL
;
3960 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
3962 if (depends_on_elim
)
3963 BITMAP_FREE (depends_on_elim
);
3964 if (depends_on_express
)
3965 BITMAP_FREE (depends_on_express
);
3967 return !infinite_cost_p (cost
);
3970 /* Determines cost of basing replacement of USE on CAND. Returns false
3971 if USE cannot be based on CAND. */
3974 determine_use_iv_cost (struct ivopts_data
*data
,
3975 struct iv_use
*use
, struct iv_cand
*cand
)
3979 case USE_NONLINEAR_EXPR
:
3980 return determine_use_iv_cost_generic (data
, use
, cand
);
3983 return determine_use_iv_cost_address (data
, use
, cand
);
3986 return determine_use_iv_cost_condition (data
, use
, cand
);
3993 /* Determines costs of basing the use of the iv on an iv candidate. */
3996 determine_use_iv_costs (struct ivopts_data
*data
)
4000 struct iv_cand
*cand
;
4001 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4003 alloc_use_cost_map (data
);
4005 for (i
= 0; i
< n_iv_uses (data
); i
++)
4007 use
= iv_use (data
, i
);
4009 if (data
->consider_all_candidates
)
4011 for (j
= 0; j
< n_iv_cands (data
); j
++)
4013 cand
= iv_cand (data
, j
);
4014 determine_use_iv_cost (data
, use
, cand
);
4021 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4023 cand
= iv_cand (data
, j
);
4024 if (!determine_use_iv_cost (data
, use
, cand
))
4025 bitmap_set_bit (to_clear
, j
);
4028 /* Remove the candidates for that the cost is infinite from
4029 the list of related candidates. */
4030 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4031 bitmap_clear (to_clear
);
4035 BITMAP_FREE (to_clear
);
4037 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4039 fprintf (dump_file
, "Use-candidate costs:\n");
4041 for (i
= 0; i
< n_iv_uses (data
); i
++)
4043 use
= iv_use (data
, i
);
4045 fprintf (dump_file
, "Use %d:\n", i
);
4046 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4047 for (j
= 0; j
< use
->n_map_members
; j
++)
4049 if (!use
->cost_map
[j
].cand
4050 || infinite_cost_p (use
->cost_map
[j
].cost
))
4053 fprintf (dump_file
, " %d\t%d\t%d\t",
4054 use
->cost_map
[j
].cand
->id
,
4055 use
->cost_map
[j
].cost
.cost
,
4056 use
->cost_map
[j
].cost
.complexity
);
4057 if (use
->cost_map
[j
].depends_on
)
4058 bitmap_print (dump_file
,
4059 use
->cost_map
[j
].depends_on
, "","");
4060 fprintf (dump_file
, "\n");
4063 fprintf (dump_file
, "\n");
4065 fprintf (dump_file
, "\n");
4069 /* Determines cost of the candidate CAND. */
4072 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4074 comp_cost cost_base
;
4075 unsigned cost
, cost_step
;
4084 /* There are two costs associated with the candidate -- its increment
4085 and its initialization. The second is almost negligible for any loop
4086 that rolls enough, so we take it just very little into account. */
4088 base
= cand
->iv
->base
;
4089 cost_base
= force_var_cost (data
, base
, NULL
);
4090 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4092 cost
= cost_step
+ cost_base
.cost
/ AVG_LOOP_NITER (current_loop
);
4094 /* Prefer the original ivs unless we may gain something by replacing it.
4095 The reason is to make debugging simpler; so this is not relevant for
4096 artificial ivs created by other optimization passes. */
4097 if (cand
->pos
!= IP_ORIGINAL
4098 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4101 /* Prefer not to insert statements into latch unless there are some
4102 already (so that we do not create unnecessary jumps). */
4103 if (cand
->pos
== IP_END
4104 && empty_block_p (ip_end_pos (data
->current_loop
)))
4110 /* Determines costs of computation of the candidates. */
4113 determine_iv_costs (struct ivopts_data
*data
)
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4119 fprintf (dump_file
, "Candidate costs:\n");
4120 fprintf (dump_file
, " cand\tcost\n");
4123 for (i
= 0; i
< n_iv_cands (data
); i
++)
4125 struct iv_cand
*cand
= iv_cand (data
, i
);
4127 determine_iv_cost (data
, cand
);
4129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4130 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4133 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4134 fprintf (dump_file
, "\n");
4137 /* Calculates cost for having SIZE induction variables. */
4140 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4142 /* We add size to the cost, so that we prefer eliminating ivs
4144 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
);
4147 /* For each size of the induction variable set determine the penalty. */
4150 determine_set_costs (struct ivopts_data
*data
)
4154 gimple_stmt_iterator psi
;
4156 struct loop
*loop
= data
->current_loop
;
4159 /* We use the following model (definitely improvable, especially the
4160 cost function -- TODO):
4162 We estimate the number of registers available (using MD data), name it A.
4164 We estimate the number of registers used by the loop, name it U. This
4165 number is obtained as the number of loop phi nodes (not counting virtual
4166 registers and bivs) + the number of variables from outside of the loop.
4168 We set a reserve R (free regs that are used for temporary computations,
4169 etc.). For now the reserve is a constant 3.
4171 Let I be the number of induction variables.
4173 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4174 make a lot of ivs without a reason).
4175 -- if A - R < U + I <= A, the cost is I * PRES_COST
4176 -- if U + I > A, the cost is I * PRES_COST and
4177 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4179 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4181 fprintf (dump_file
, "Global costs:\n");
4182 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4183 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4184 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4188 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4190 phi
= gsi_stmt (psi
);
4191 op
= PHI_RESULT (phi
);
4193 if (!is_gimple_reg (op
))
4196 if (get_iv (data
, op
))
4202 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4204 struct version_info
*info
= ver_info (data
, j
);
4206 if (info
->inv_id
&& info
->has_nonlin_use
)
4210 data
->regs_used
= n
;
4211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4212 fprintf (dump_file
, " regs_used %d\n", n
);
4214 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4216 fprintf (dump_file
, " cost for size:\n");
4217 fprintf (dump_file
, " ivs\tcost\n");
4218 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4219 fprintf (dump_file
, " %d\t%d\n", j
,
4220 ivopts_global_cost_for_size (data
, j
));
4221 fprintf (dump_file
, "\n");
4225 /* Returns true if A is a cheaper cost pair than B. */
4228 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4238 cmp
= compare_costs (a
->cost
, b
->cost
);
4245 /* In case the costs are the same, prefer the cheaper candidate. */
4246 if (a
->cand
->cost
< b
->cand
->cost
)
4252 /* Computes the cost field of IVS structure. */
4255 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4257 comp_cost cost
= ivs
->cand_use_cost
;
4258 cost
.cost
+= ivs
->cand_cost
;
4259 cost
.cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4264 /* Remove invariants in set INVS to set IVS. */
4267 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4275 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4277 ivs
->n_invariant_uses
[iid
]--;
4278 if (ivs
->n_invariant_uses
[iid
] == 0)
4283 /* Set USE not to be expressed by any candidate in IVS. */
4286 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4289 unsigned uid
= use
->id
, cid
;
4290 struct cost_pair
*cp
;
4292 cp
= ivs
->cand_for_use
[uid
];
4298 ivs
->cand_for_use
[uid
] = NULL
;
4299 ivs
->n_cand_uses
[cid
]--;
4301 if (ivs
->n_cand_uses
[cid
] == 0)
4303 bitmap_clear_bit (ivs
->cands
, cid
);
4304 /* Do not count the pseudocandidates. */
4308 ivs
->cand_cost
-= cp
->cand
->cost
;
4310 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4313 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4315 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4316 iv_ca_recount_cost (data
, ivs
);
4319 /* Add invariants in set INVS to set IVS. */
4322 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4330 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4332 ivs
->n_invariant_uses
[iid
]++;
4333 if (ivs
->n_invariant_uses
[iid
] == 1)
4338 /* Set cost pair for USE in set IVS to CP. */
4341 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4342 struct iv_use
*use
, struct cost_pair
*cp
)
4344 unsigned uid
= use
->id
, cid
;
4346 if (ivs
->cand_for_use
[uid
] == cp
)
4349 if (ivs
->cand_for_use
[uid
])
4350 iv_ca_set_no_cp (data
, ivs
, use
);
4357 ivs
->cand_for_use
[uid
] = cp
;
4358 ivs
->n_cand_uses
[cid
]++;
4359 if (ivs
->n_cand_uses
[cid
] == 1)
4361 bitmap_set_bit (ivs
->cands
, cid
);
4362 /* Do not count the pseudocandidates. */
4366 ivs
->cand_cost
+= cp
->cand
->cost
;
4368 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4371 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4372 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4373 iv_ca_recount_cost (data
, ivs
);
4377 /* Extend set IVS by expressing USE by some of the candidates in it
4381 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4384 struct cost_pair
*best_cp
= NULL
, *cp
;
4388 gcc_assert (ivs
->upto
>= use
->id
);
4390 if (ivs
->upto
== use
->id
)
4396 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4398 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4400 if (cheaper_cost_pair (cp
, best_cp
))
4404 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4407 /* Get cost for assignment IVS. */
4410 iv_ca_cost (struct iv_ca
*ivs
)
4412 /* This was a conditional expression but it triggered a bug in
4415 return infinite_cost
;
4420 /* Returns true if all dependences of CP are among invariants in IVS. */
4423 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4428 if (!cp
->depends_on
)
4431 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4433 if (ivs
->n_invariant_uses
[i
] == 0)
4440 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4441 it before NEXT_CHANGE. */
4443 static struct iv_ca_delta
*
4444 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4445 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4447 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4450 change
->old_cp
= old_cp
;
4451 change
->new_cp
= new_cp
;
4452 change
->next_change
= next_change
;
4457 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4460 static struct iv_ca_delta
*
4461 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4463 struct iv_ca_delta
*last
;
4471 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4473 last
->next_change
= l2
;
4478 /* Returns candidate by that USE is expressed in IVS. */
4480 static struct cost_pair
*
4481 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4483 return ivs
->cand_for_use
[use
->id
];
4486 /* Reverse the list of changes DELTA, forming the inverse to it. */
4488 static struct iv_ca_delta
*
4489 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4491 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4492 struct cost_pair
*tmp
;
4494 for (act
= delta
; act
; act
= next
)
4496 next
= act
->next_change
;
4497 act
->next_change
= prev
;
4501 act
->old_cp
= act
->new_cp
;
4508 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4509 reverted instead. */
4512 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4513 struct iv_ca_delta
*delta
, bool forward
)
4515 struct cost_pair
*from
, *to
;
4516 struct iv_ca_delta
*act
;
4519 delta
= iv_ca_delta_reverse (delta
);
4521 for (act
= delta
; act
; act
= act
->next_change
)
4525 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4526 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4530 iv_ca_delta_reverse (delta
);
4533 /* Returns true if CAND is used in IVS. */
4536 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4538 return ivs
->n_cand_uses
[cand
->id
] > 0;
4541 /* Returns number of induction variable candidates in the set IVS. */
4544 iv_ca_n_cands (struct iv_ca
*ivs
)
4546 return ivs
->n_cands
;
4549 /* Free the list of changes DELTA. */
4552 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4554 struct iv_ca_delta
*act
, *next
;
4556 for (act
= *delta
; act
; act
= next
)
4558 next
= act
->next_change
;
4565 /* Allocates new iv candidates assignment. */
4567 static struct iv_ca
*
4568 iv_ca_new (struct ivopts_data
*data
)
4570 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4574 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4575 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4576 nw
->cands
= BITMAP_ALLOC (NULL
);
4579 nw
->cand_use_cost
= zero_cost
;
4581 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4582 nw
->cost
= zero_cost
;
4587 /* Free memory occupied by the set IVS. */
4590 iv_ca_free (struct iv_ca
**ivs
)
4592 free ((*ivs
)->cand_for_use
);
4593 free ((*ivs
)->n_cand_uses
);
4594 BITMAP_FREE ((*ivs
)->cands
);
4595 free ((*ivs
)->n_invariant_uses
);
4600 /* Dumps IVS to FILE. */
4603 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4605 const char *pref
= " invariants ";
4607 comp_cost cost
= iv_ca_cost (ivs
);
4609 fprintf (file
, " cost %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
4610 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4612 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4613 if (ivs
->n_invariant_uses
[i
])
4615 fprintf (file
, "%s%d", pref
, i
);
4618 fprintf (file
, "\n");
4621 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4622 new set, and store differences in DELTA. Number of induction variables
4623 in the new set is stored to N_IVS. */
4626 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4627 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4633 struct cost_pair
*old_cp
, *new_cp
;
4636 for (i
= 0; i
< ivs
->upto
; i
++)
4638 use
= iv_use (data
, i
);
4639 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4642 && old_cp
->cand
== cand
)
4645 new_cp
= get_use_iv_cost (data
, use
, cand
);
4649 if (!iv_ca_has_deps (ivs
, new_cp
))
4652 if (!cheaper_cost_pair (new_cp
, old_cp
))
4655 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4658 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4659 cost
= iv_ca_cost (ivs
);
4661 *n_ivs
= iv_ca_n_cands (ivs
);
4662 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4667 /* Try narrowing set IVS by removing CAND. Return the cost of
4668 the new set and store the differences in DELTA. */
4671 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4672 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4676 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4678 struct iv_cand
*cnd
;
4682 for (i
= 0; i
< n_iv_uses (data
); i
++)
4684 use
= iv_use (data
, i
);
4686 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4687 if (old_cp
->cand
!= cand
)
4692 if (data
->consider_all_candidates
)
4694 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4699 cnd
= iv_cand (data
, ci
);
4701 cp
= get_use_iv_cost (data
, use
, cnd
);
4704 if (!iv_ca_has_deps (ivs
, cp
))
4707 if (!cheaper_cost_pair (cp
, new_cp
))
4715 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4720 cnd
= iv_cand (data
, ci
);
4722 cp
= get_use_iv_cost (data
, use
, cnd
);
4725 if (!iv_ca_has_deps (ivs
, cp
))
4728 if (!cheaper_cost_pair (cp
, new_cp
))
4737 iv_ca_delta_free (delta
);
4738 return infinite_cost
;
4741 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4744 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4745 cost
= iv_ca_cost (ivs
);
4746 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4751 /* Try optimizing the set of candidates IVS by removing candidates different
4752 from to EXCEPT_CAND from it. Return cost of the new set, and store
4753 differences in DELTA. */
4756 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4757 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4760 struct iv_ca_delta
*act_delta
, *best_delta
;
4762 comp_cost best_cost
, acost
;
4763 struct iv_cand
*cand
;
4766 best_cost
= iv_ca_cost (ivs
);
4768 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4770 cand
= iv_cand (data
, i
);
4772 if (cand
== except_cand
)
4775 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4777 if (compare_costs (acost
, best_cost
) < 0)
4780 iv_ca_delta_free (&best_delta
);
4781 best_delta
= act_delta
;
4784 iv_ca_delta_free (&act_delta
);
4793 /* Recurse to possibly remove other unnecessary ivs. */
4794 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4795 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4796 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4797 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4801 /* Tries to extend the sets IVS in the best possible way in order
4802 to express the USE. */
4805 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4808 comp_cost best_cost
, act_cost
;
4811 struct iv_cand
*cand
;
4812 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4813 struct cost_pair
*cp
;
4815 iv_ca_add_use (data
, ivs
, use
);
4816 best_cost
= iv_ca_cost (ivs
);
4818 cp
= iv_ca_cand_for_use (ivs
, use
);
4821 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4822 iv_ca_set_no_cp (data
, ivs
, use
);
4825 /* First try important candidates not based on any memory object. Only if
4826 this fails, try the specific ones. Rationale -- in loops with many
4827 variables the best choice often is to use just one generic biv. If we
4828 added here many ivs specific to the uses, the optimization algorithm later
4829 would be likely to get stuck in a local minimum, thus causing us to create
4830 too many ivs. The approach from few ivs to more seems more likely to be
4831 successful -- starting from few ivs, replacing an expensive use by a
4832 specific iv should always be a win. */
4833 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4835 cand
= iv_cand (data
, i
);
4837 if (cand
->iv
->base_object
!= NULL_TREE
)
4840 if (iv_ca_cand_used_p (ivs
, cand
))
4843 cp
= get_use_iv_cost (data
, use
, cand
);
4847 iv_ca_set_cp (data
, ivs
, use
, cp
);
4848 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4849 iv_ca_set_no_cp (data
, ivs
, use
);
4850 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4852 if (compare_costs (act_cost
, best_cost
) < 0)
4854 best_cost
= act_cost
;
4856 iv_ca_delta_free (&best_delta
);
4857 best_delta
= act_delta
;
4860 iv_ca_delta_free (&act_delta
);
4863 if (infinite_cost_p (best_cost
))
4865 for (i
= 0; i
< use
->n_map_members
; i
++)
4867 cp
= use
->cost_map
+ i
;
4872 /* Already tried this. */
4873 if (cand
->important
&& cand
->iv
->base_object
== NULL_TREE
)
4876 if (iv_ca_cand_used_p (ivs
, cand
))
4880 iv_ca_set_cp (data
, ivs
, use
, cp
);
4881 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4882 iv_ca_set_no_cp (data
, ivs
, use
);
4883 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4886 if (compare_costs (act_cost
, best_cost
) < 0)
4888 best_cost
= act_cost
;
4891 iv_ca_delta_free (&best_delta
);
4892 best_delta
= act_delta
;
4895 iv_ca_delta_free (&act_delta
);
4899 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4900 iv_ca_delta_free (&best_delta
);
4902 return !infinite_cost_p (best_cost
);
4905 /* Finds an initial assignment of candidates to uses. */
4907 static struct iv_ca
*
4908 get_initial_solution (struct ivopts_data
*data
)
4910 struct iv_ca
*ivs
= iv_ca_new (data
);
4913 for (i
= 0; i
< n_iv_uses (data
); i
++)
4914 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4923 /* Tries to improve set of induction variables IVS. */
4926 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4929 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
4930 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4931 struct iv_cand
*cand
;
4933 /* Try extending the set of induction variables by one. */
4934 for (i
= 0; i
< n_iv_cands (data
); i
++)
4936 cand
= iv_cand (data
, i
);
4938 if (iv_ca_cand_used_p (ivs
, cand
))
4941 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4945 /* If we successfully added the candidate and the set is small enough,
4946 try optimizing it by removing other candidates. */
4947 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4949 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4950 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4951 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4952 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4955 if (compare_costs (acost
, best_cost
) < 0)
4958 iv_ca_delta_free (&best_delta
);
4959 best_delta
= act_delta
;
4962 iv_ca_delta_free (&act_delta
);
4967 /* Try removing the candidates from the set instead. */
4968 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4970 /* Nothing more we can do. */
4975 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4976 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
4977 iv_ca_delta_free (&best_delta
);
4981 /* Attempts to find the optimal set of induction variables. We do simple
4982 greedy heuristic -- we try to replace at most one candidate in the selected
4983 solution and remove the unused ivs while this improves the cost. */
4985 static struct iv_ca
*
4986 find_optimal_iv_set (struct ivopts_data
*data
)
4992 /* Get the initial solution. */
4993 set
= get_initial_solution (data
);
4996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4997 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5001 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5003 fprintf (dump_file
, "Initial set of candidates:\n");
5004 iv_ca_dump (data
, dump_file
, set
);
5007 while (try_improve_iv_set (data
, set
))
5009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5011 fprintf (dump_file
, "Improved to:\n");
5012 iv_ca_dump (data
, dump_file
, set
);
5016 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5018 comp_cost cost
= iv_ca_cost (set
);
5019 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n", cost
.cost
, cost
.complexity
);
5022 for (i
= 0; i
< n_iv_uses (data
); i
++)
5024 use
= iv_use (data
, i
);
5025 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5031 /* Creates a new induction variable corresponding to CAND. */
5034 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5036 gimple_stmt_iterator incr_pos
;
5046 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5050 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5055 /* Mark that the iv is preserved. */
5056 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5057 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5059 /* Rewrite the increment so that it uses var_before directly. */
5060 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5065 gimple_add_tmp_var (cand
->var_before
);
5066 add_referenced_var (cand
->var_before
);
5068 base
= unshare_expr (cand
->iv
->base
);
5070 create_iv (base
, unshare_expr (cand
->iv
->step
),
5071 cand
->var_before
, data
->current_loop
,
5072 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5075 /* Creates new induction variables described in SET. */
5078 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5081 struct iv_cand
*cand
;
5084 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5086 cand
= iv_cand (data
, i
);
5087 create_new_iv (data
, cand
);
5091 /* Returns the phi-node in BB with result RESULT. */
5094 get_phi_with_result (basic_block bb
, tree result
)
5096 gimple_stmt_iterator i
= gsi_start_phis (bb
);
5098 for (; !gsi_end_p (i
); gsi_next (&i
))
5099 if (gimple_phi_result (gsi_stmt (i
)) == result
)
5100 return gsi_stmt (i
);
5107 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5108 is true, remove also the ssa name defined by the statement. */
5111 remove_statement (gimple stmt
, bool including_defined_name
)
5113 if (gimple_code (stmt
) == GIMPLE_PHI
)
5115 gimple bb_phi
= get_phi_with_result (gimple_bb (stmt
),
5116 gimple_phi_result (stmt
));
5117 gimple_stmt_iterator bsi
= gsi_for_stmt (bb_phi
);
5118 remove_phi_node (&bsi
, including_defined_name
);
5122 gimple_stmt_iterator bsi
= gsi_for_stmt (stmt
);
5123 gsi_remove (&bsi
, true);
5124 release_defs (stmt
);
5128 /* Rewrites USE (definition of iv used in a nonlinear expression)
5129 using candidate CAND. */
5132 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5133 struct iv_use
*use
, struct iv_cand
*cand
)
5138 gimple_stmt_iterator bsi
;
5140 /* An important special case -- if we are asked to express value of
5141 the original iv by itself, just exit; there is no need to
5142 introduce a new computation (that might also need casting the
5143 variable to unsigned and back). */
5144 if (cand
->pos
== IP_ORIGINAL
5145 && cand
->incremented_at
== use
->stmt
)
5147 tree step
, ctype
, utype
;
5148 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5150 gcc_assert (is_gimple_assign (use
->stmt
));
5151 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5153 step
= cand
->iv
->step
;
5154 ctype
= TREE_TYPE (step
);
5155 utype
= TREE_TYPE (cand
->var_after
);
5156 if (TREE_CODE (step
) == NEGATE_EXPR
)
5158 incr_code
= MINUS_EXPR
;
5159 step
= TREE_OPERAND (step
, 0);
5162 /* Check whether we may leave the computation unchanged.
5163 This is the case only if it does not rely on other
5164 computations in the loop -- otherwise, the computation
5165 we rely upon may be removed in remove_unused_ivs,
5166 thus leading to ICE. */
5167 old_code
= gimple_assign_rhs_code (use
->stmt
);
5168 if (old_code
== PLUS_EXPR
5169 || old_code
== MINUS_EXPR
5170 || old_code
== POINTER_PLUS_EXPR
)
5172 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5173 op
= gimple_assign_rhs2 (use
->stmt
);
5174 else if (old_code
!= MINUS_EXPR
5175 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5176 op
= gimple_assign_rhs1 (use
->stmt
);
5184 && (TREE_CODE (op
) == INTEGER_CST
5185 || operand_equal_p (op
, step
, 0)))
5188 /* Otherwise, add the necessary computations to express
5190 op
= fold_convert (ctype
, cand
->var_before
);
5191 comp
= fold_convert (utype
,
5192 build2 (incr_code
, ctype
, op
,
5193 unshare_expr (step
)));
5197 comp
= get_computation (data
->current_loop
, use
, cand
);
5198 gcc_assert (comp
!= NULL_TREE
);
5201 switch (gimple_code (use
->stmt
))
5204 tgt
= PHI_RESULT (use
->stmt
);
5206 /* If we should keep the biv, do not replace it. */
5207 if (name_info (data
, tgt
)->preserve_biv
)
5210 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5214 tgt
= gimple_assign_lhs (use
->stmt
);
5215 bsi
= gsi_for_stmt (use
->stmt
);
5222 op
= force_gimple_operand_gsi (&bsi
, comp
, false, SSA_NAME_VAR (tgt
),
5223 true, GSI_SAME_STMT
);
5225 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5227 ass
= gimple_build_assign (tgt
, op
);
5228 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5229 remove_statement (use
->stmt
, false);
5233 gimple_assign_set_rhs_from_tree (&bsi
, op
);
5234 use
->stmt
= gsi_stmt (bsi
);
5238 /* Replaces ssa name in index IDX by its basic variable. Callback for
5242 idx_remove_ssa_names (tree base
, tree
*idx
,
5243 void *data ATTRIBUTE_UNUSED
)
5247 if (TREE_CODE (*idx
) == SSA_NAME
)
5248 *idx
= SSA_NAME_VAR (*idx
);
5250 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
5252 op
= &TREE_OPERAND (base
, 2);
5254 && TREE_CODE (*op
) == SSA_NAME
)
5255 *op
= SSA_NAME_VAR (*op
);
5256 op
= &TREE_OPERAND (base
, 3);
5258 && TREE_CODE (*op
) == SSA_NAME
)
5259 *op
= SSA_NAME_VAR (*op
);
5265 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5268 unshare_and_remove_ssa_names (tree ref
)
5270 ref
= unshare_expr (ref
);
5271 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5276 /* Copies the reference information from OLD_REF to NEW_REF. */
5279 copy_ref_info (tree new_ref
, tree old_ref
)
5281 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5282 copy_mem_ref_info (new_ref
, old_ref
);
5284 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5287 /* Rewrites USE (address that is an iv) using candidate CAND. */
5290 rewrite_use_address (struct ivopts_data
*data
,
5291 struct iv_use
*use
, struct iv_cand
*cand
)
5294 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5298 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5300 unshare_aff_combination (&aff
);
5302 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
, data
->speed
);
5303 copy_ref_info (ref
, *use
->op_p
);
5307 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5311 rewrite_use_compare (struct ivopts_data
*data
,
5312 struct iv_use
*use
, struct iv_cand
*cand
)
5314 tree comp
, *var_p
, op
, bound
;
5315 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5316 enum tree_code compare
;
5317 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5323 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5324 tree var_type
= TREE_TYPE (var
);
5327 compare
= iv_elimination_compare (data
, use
);
5328 bound
= unshare_expr (fold_convert (var_type
, bound
));
5329 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
5331 gsi_insert_seq_on_edge_immediate (
5332 loop_preheader_edge (data
->current_loop
),
5335 gimple_cond_set_lhs (use
->stmt
, var
);
5336 gimple_cond_set_code (use
->stmt
, compare
);
5337 gimple_cond_set_rhs (use
->stmt
, op
);
5341 /* The induction variable elimination failed; just express the original
5343 comp
= get_computation (data
->current_loop
, use
, cand
);
5344 gcc_assert (comp
!= NULL_TREE
);
5346 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
5349 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
5350 true, GSI_SAME_STMT
);
5353 /* Rewrites USE using candidate CAND. */
5356 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
5360 case USE_NONLINEAR_EXPR
:
5361 rewrite_use_nonlinear_expr (data
, use
, cand
);
5365 rewrite_use_address (data
, use
, cand
);
5369 rewrite_use_compare (data
, use
, cand
);
5376 update_stmt (use
->stmt
);
5379 /* Rewrite the uses using the selected induction variables. */
5382 rewrite_uses (struct ivopts_data
*data
)
5385 struct iv_cand
*cand
;
5388 for (i
= 0; i
< n_iv_uses (data
); i
++)
5390 use
= iv_use (data
, i
);
5391 cand
= use
->selected
;
5394 rewrite_use (data
, use
, cand
);
5398 /* Removes the ivs that are not used after rewriting. */
5401 remove_unused_ivs (struct ivopts_data
*data
)
5406 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5408 struct version_info
*info
;
5410 info
= ver_info (data
, j
);
5412 && !integer_zerop (info
->iv
->step
)
5414 && !info
->iv
->have_use_for
5415 && !info
->preserve_biv
)
5416 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5420 /* Frees data allocated by the optimization of a single loop. */
5423 free_loop_data (struct ivopts_data
*data
)
5431 pointer_map_destroy (data
->niters
);
5432 data
->niters
= NULL
;
5435 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5437 struct version_info
*info
;
5439 info
= ver_info (data
, i
);
5443 info
->has_nonlin_use
= false;
5444 info
->preserve_biv
= false;
5447 bitmap_clear (data
->relevant
);
5448 bitmap_clear (data
->important_candidates
);
5450 for (i
= 0; i
< n_iv_uses (data
); i
++)
5452 struct iv_use
*use
= iv_use (data
, i
);
5455 BITMAP_FREE (use
->related_cands
);
5456 for (j
= 0; j
< use
->n_map_members
; j
++)
5457 if (use
->cost_map
[j
].depends_on
)
5458 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5459 free (use
->cost_map
);
5462 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5464 for (i
= 0; i
< n_iv_cands (data
); i
++)
5466 struct iv_cand
*cand
= iv_cand (data
, i
);
5470 if (cand
->depends_on
)
5471 BITMAP_FREE (cand
->depends_on
);
5474 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5476 if (data
->version_info_size
< num_ssa_names
)
5478 data
->version_info_size
= 2 * num_ssa_names
;
5479 free (data
->version_info
);
5480 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5483 data
->max_inv_id
= 0;
5485 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5486 SET_DECL_RTL (obj
, NULL_RTX
);
5488 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5491 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5495 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5497 free_loop_data (data
);
5498 free (data
->version_info
);
5499 BITMAP_FREE (data
->relevant
);
5500 BITMAP_FREE (data
->important_candidates
);
5502 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5503 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5504 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5507 /* Optimizes the LOOP. Returns true if anything changed. */
5510 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5512 bool changed
= false;
5513 struct iv_ca
*iv_ca
;
5516 gcc_assert (!data
->niters
);
5517 data
->current_loop
= loop
;
5518 data
->speed
= optimize_loop_for_speed_p (loop
);
5520 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5522 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5524 exit
= single_dom_exit (loop
);
5527 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5528 exit
->src
->index
, exit
->dest
->index
);
5529 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
5530 fprintf (dump_file
, "\n");
5533 fprintf (dump_file
, "\n");
5536 /* For each ssa name determines whether it behaves as an induction variable
5538 if (!find_induction_variables (data
))
5541 /* Finds interesting uses (item 1). */
5542 find_interesting_uses (data
);
5543 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5546 /* Finds candidates for the induction variables (item 2). */
5547 find_iv_candidates (data
);
5549 /* Calculates the costs (item 3, part 1). */
5550 determine_use_iv_costs (data
);
5551 determine_iv_costs (data
);
5552 determine_set_costs (data
);
5554 /* Find the optimal set of induction variables (item 3, part 2). */
5555 iv_ca
= find_optimal_iv_set (data
);
5560 /* Create the new induction variables (item 4, part 1). */
5561 create_new_ivs (data
, iv_ca
);
5562 iv_ca_free (&iv_ca
);
5564 /* Rewrite the uses (item 4, part 2). */
5565 rewrite_uses (data
);
5567 /* Remove the ivs that are unused after rewriting. */
5568 remove_unused_ivs (data
);
5570 /* We have changed the structure of induction variables; it might happen
5571 that definitions in the scev database refer to some of them that were
5576 free_loop_data (data
);
5581 /* Main entry point. Optimizes induction variables in loops. */
5584 tree_ssa_iv_optimize (void)
5587 struct ivopts_data data
;
5590 tree_ssa_iv_optimize_init (&data
);
5592 /* Optimize the loops starting with the innermost ones. */
5593 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
5595 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5596 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5598 tree_ssa_iv_optimize_loop (&data
, loop
);
5601 tree_ssa_iv_optimize_finalize (&data
);