1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base
; /* Initial value of the iv. */
108 tree base_object
; /* A memory object to that the induction variable points. */
109 tree step
; /* Step of the iv (constant only). */
110 tree ssa_name
; /* The ssa name with the value. */
111 bool biv_p
; /* Is it a biv? */
112 bool have_use_for
; /* Do we already have a use for it? */
113 unsigned use_id
; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name
; /* The ssa name. */
120 struct iv
*iv
; /* Induction variable description. */
121 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id
; /* Id of an invariant. */
124 bool preserve_biv
; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
131 USE_ADDRESS
, /* Use in an address. */
132 USE_COMPARE
/* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost
; /* The runtime cost. */
139 unsigned complexity
; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost
= {0, 0};
146 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
148 /* The candidate - cost pair. */
151 struct iv_cand
*cand
; /* The candidate. */
152 comp_cost cost
; /* The cost. */
153 bitmap depends_on
; /* The list of invariants that have to be
155 tree value
; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id
; /* The id of the use. */
164 enum use_type type
; /* Type of the use. */
165 struct iv
*iv
; /* The induction variable it is based on. */
166 gimple stmt
; /* Statement in that it occurs. */
167 tree
*op_p
; /* The place where it occurs. */
168 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
172 struct cost_pair
*cost_map
;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand
*selected
;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL
, /* At the end, just before the exit condition. */
183 IP_END
, /* At the end of the latch block. */
184 IP_ORIGINAL
/* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id
; /* The number of the candidate. */
191 bool important
; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos
; /* Where it is computed. */
194 gimple incremented_at
;/* For original biv, the statement where it is
196 tree var_before
; /* The variable used for it before increment. */
197 tree var_after
; /* The variable used for it after increment. */
198 struct iv
*iv
; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost
; /* Cost of the candidate. */
203 bitmap depends_on
; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use
*iv_use_p
;
211 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
213 typedef struct iv_cand
*iv_cand_p
;
214 DEF_VEC_P(iv_cand_p
);
215 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
219 /* The currently optimized loop. */
220 struct loop
*current_loop
;
222 /* Are we optimizing for speed? */
225 /* Number of registers used in it. */
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t
*niters
;
231 /* The size of version_info array allocated. */
232 unsigned version_info_size
;
234 /* The array of information for the ssa names. */
235 struct version_info
*version_info
;
237 /* The bitmap of indices in version_info whose value was changed. */
240 /* The maximum invariant id. */
243 /* The uses of induction variables. */
244 VEC(iv_use_p
,heap
) *iv_uses
;
246 /* The candidates. */
247 VEC(iv_cand_p
,heap
) *iv_candidates
;
249 /* A bitmap of important candidates. */
250 bitmap important_candidates
;
252 /* Whether to consider just related and important candidates when replacing a
254 bool consider_all_candidates
;
257 /* An assignment of iv candidates to uses. */
261 /* The number of uses covered by the assignment. */
264 /* Number of uses that cannot be expressed by the candidates in the set. */
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair
**cand_for_use
;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses
;
273 /* The candidates used. */
276 /* The number of candidates in the set. */
279 /* Total number of registers needed. */
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost
;
285 /* Total cost of candidates. */
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses
;
291 /* Total cost of the assignment. */
295 /* Difference of two iv candidate assignments. */
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair
*old_cp
;
305 /* A new assignment. */
306 struct cost_pair
*new_cp
;
308 /* Next change in the list. */
309 struct iv_ca_delta
*next_change
;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
332 static VEC(tree
,heap
) *decl_rtl_to_reset
;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data
*data
)
339 return VEC_length (iv_use_p
, data
->iv_uses
);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use
*
345 iv_use (struct ivopts_data
*data
, unsigned i
)
347 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data
*data
)
355 return VEC_length (iv_cand_p
, data
->iv_candidates
);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand
*
361 iv_cand (struct ivopts_data
*data
, unsigned i
)
363 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
369 single_dom_exit (struct loop
*loop
)
371 edge exit
= single_exit (loop
);
376 if (!just_once_each_iteration_p (loop
, exit
->src
))
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv
*);
386 dump_iv (FILE *file
, struct iv
*iv
)
390 fprintf (file
, "ssa name ");
391 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
392 fprintf (file
, "\n");
395 fprintf (file
, " type ");
396 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
397 fprintf (file
, "\n");
401 fprintf (file
, " base ");
402 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
403 fprintf (file
, "\n");
405 fprintf (file
, " step ");
406 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
407 fprintf (file
, "\n");
411 fprintf (file
, " invariant ");
412 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
413 fprintf (file
, "\n");
418 fprintf (file
, " base object ");
419 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
420 fprintf (file
, "\n");
424 fprintf (file
, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use
*);
431 dump_use (FILE *file
, struct iv_use
*use
)
433 fprintf (file
, "use %d\n", use
->id
);
437 case USE_NONLINEAR_EXPR
:
438 fprintf (file
, " generic\n");
442 fprintf (file
, " address\n");
446 fprintf (file
, " compare\n");
453 fprintf (file
, " in statement ");
454 print_gimple_stmt (file
, use
->stmt
, 0, 0);
455 fprintf (file
, "\n");
457 fprintf (file
, " at position ");
459 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
460 fprintf (file
, "\n");
462 dump_iv (file
, use
->iv
);
464 if (use
->related_cands
)
466 fprintf (file
, " related candidates ");
467 dump_bitmap (file
, use
->related_cands
);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data
*);
475 dump_uses (FILE *file
, struct ivopts_data
*data
)
480 for (i
= 0; i
< n_iv_uses (data
); i
++)
482 use
= iv_use (data
, i
);
484 dump_use (file
, use
);
485 fprintf (file
, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand
*);
493 dump_cand (FILE *file
, struct iv_cand
*cand
)
495 struct iv
*iv
= cand
->iv
;
497 fprintf (file
, "candidate %d%s\n",
498 cand
->id
, cand
->important
? " (important)" : "");
500 if (cand
->depends_on
)
502 fprintf (file
, " depends on ");
503 dump_bitmap (file
, cand
->depends_on
);
508 fprintf (file
, " final value replacement\n");
515 fprintf (file
, " incremented before exit test\n");
519 fprintf (file
, " incremented at end\n");
523 fprintf (file
, " original biv\n");
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info
*
533 ver_info (struct ivopts_data
*data
, unsigned ver
)
535 return data
->version_info
+ ver
;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info
*
541 name_info (struct ivopts_data
*data
, tree name
)
543 return ver_info (data
, SSA_NAME_VERSION (name
));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
550 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
552 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
556 if (sbb
== loop
->latch
)
562 return stmt
== last_stmt (bb
);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
569 stmt_after_ip_original_pos (struct iv_cand
*cand
, gimple stmt
)
571 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
572 basic_block stmt_bb
= gimple_bb (stmt
);
573 gimple_stmt_iterator bsi
;
575 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
578 if (stmt_bb
!= cand_bb
)
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi
= gsi_last_bb (stmt_bb
); ; gsi_prev (&bsi
))
585 if (gsi_stmt (bsi
) == cand
->incremented_at
)
587 if (gsi_stmt (bsi
) == stmt
)
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
596 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
604 return stmt_after_ip_normal_pos (loop
, stmt
);
607 return stmt_after_ip_original_pos (cand
, stmt
);
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
617 abnormal_ssa_name_p (tree exp
)
622 if (TREE_CODE (exp
) != SSA_NAME
)
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
632 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
633 void *data ATTRIBUTE_UNUSED
)
635 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
639 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
643 return !abnormal_ssa_name_p (*index
);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
650 contains_abnormal_ssa_name_p (tree expr
)
653 enum tree_code_class codeclass
;
658 code
= TREE_CODE (expr
);
659 codeclass
= TREE_CODE_CLASS (code
);
661 if (code
== SSA_NAME
)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
664 if (code
== INTEGER_CST
665 || is_gimple_min_invariant (expr
))
668 if (code
== ADDR_EXPR
)
669 return !for_each_index (&TREE_OPERAND (expr
, 0),
670 idx_contains_abnormal_ssa_name_p
,
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
698 niter_for_exit (struct ivopts_data
*data
, edge exit
)
700 struct tree_niter_desc desc
;
706 data
->niters
= pointer_map_create ();
710 slot
= pointer_map_contains (data
->niters
, exit
);
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data
->current_loop
,
721 && integer_zerop (desc
.may_be_zero
)
722 && !contains_abnormal_ssa_name_p (desc
.niter
))
727 *pointer_map_insert (data
->niters
, exit
) = niter
;
730 niter
= (tree
) *slot
;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
740 niter_for_single_dom_exit (struct ivopts_data
*data
)
742 edge exit
= single_dom_exit (data
->current_loop
);
747 return niter_for_exit (data
, exit
);
750 /* Initializes data structures used by the iv optimization pass, stored
754 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
756 data
->version_info_size
= 2 * num_ssa_names
;
757 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
758 data
->relevant
= BITMAP_ALLOC (NULL
);
759 data
->important_candidates
= BITMAP_ALLOC (NULL
);
760 data
->max_inv_id
= 0;
762 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
763 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
764 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr
)
773 enum tree_code code
= TREE_CODE (expr
);
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr
))
780 return determine_base_object (TREE_OPERAND (expr
, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
791 obj
= TREE_OPERAND (expr
, 0);
792 base
= get_base_address (obj
);
797 if (TREE_CODE (base
) == INDIRECT_REF
)
798 return determine_base_object (TREE_OPERAND (base
, 0));
800 return fold_convert (ptr_type_node
,
801 build_fold_addr_expr (base
));
803 case POINTER_PLUS_EXPR
:
804 return determine_base_object (TREE_OPERAND (expr
, 0));
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
812 return fold_convert (ptr_type_node
, expr
);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base
, tree step
)
822 struct iv
*iv
= XCNEW (struct iv
);
823 gcc_assert (step
!= NULL_TREE
);
826 iv
->base_object
= determine_base_object (base
);
829 iv
->have_use_for
= false;
831 iv
->ssa_name
= NULL_TREE
;
836 /* Sets STEP and BASE for induction variable IV. */
839 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
841 struct version_info
*info
= name_info (data
, iv
);
843 gcc_assert (!info
->iv
);
845 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
846 info
->iv
= alloc_iv (base
, step
);
847 info
->iv
->ssa_name
= iv
;
850 /* Finds induction variable declaration for VAR. */
853 get_iv (struct ivopts_data
*data
, tree var
)
856 tree type
= TREE_TYPE (var
);
858 if (!POINTER_TYPE_P (type
)
859 && !INTEGRAL_TYPE_P (type
))
862 if (!name_info (data
, var
)->iv
)
864 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
867 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
868 set_iv (data
, var
, var
, build_int_cst (type
, 0));
871 return name_info (data
, var
)->iv
;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
878 determine_biv_step (gimple phi
)
880 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
881 tree name
= PHI_RESULT (phi
);
884 if (!is_gimple_reg (name
))
887 if (!simple_iv (loop
, phi
, name
, &iv
, true))
890 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
893 /* Finds basic ivs. */
896 find_bivs (struct ivopts_data
*data
)
899 tree step
, type
, base
;
901 struct loop
*loop
= data
->current_loop
;
902 gimple_stmt_iterator psi
;
904 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
906 phi
= gsi_stmt (psi
);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
911 step
= determine_biv_step (phi
);
915 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
916 base
= expand_simple_operations (base
);
917 if (contains_abnormal_ssa_name_p (base
)
918 || contains_abnormal_ssa_name_p (step
))
921 type
= TREE_TYPE (PHI_RESULT (phi
));
922 base
= fold_convert (type
, base
);
925 if (POINTER_TYPE_P (type
))
926 step
= fold_convert (sizetype
, step
);
928 step
= fold_convert (type
, step
);
931 set_iv (data
, PHI_RESULT (phi
), base
, step
);
938 /* Marks basic ivs. */
941 mark_bivs (struct ivopts_data
*data
)
945 struct iv
*iv
, *incr_iv
;
946 struct loop
*loop
= data
->current_loop
;
948 gimple_stmt_iterator psi
;
950 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
952 phi
= gsi_stmt (psi
);
954 iv
= get_iv (data
, PHI_RESULT (phi
));
958 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
959 incr_iv
= get_iv (data
, var
);
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
965 if (incr_bb
->loop_father
!= data
->current_loop
966 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
970 incr_iv
->biv_p
= true;
974 /* Checks whether STMT defines a linear induction variable and stores its
978 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
981 struct loop
*loop
= data
->current_loop
;
983 iv
->base
= NULL_TREE
;
984 iv
->step
= NULL_TREE
;
986 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
989 lhs
= gimple_assign_lhs (stmt
);
990 if (TREE_CODE (lhs
) != SSA_NAME
)
993 if (!simple_iv (loop
, stmt
, lhs
, iv
, true))
995 iv
->base
= expand_simple_operations (iv
->base
);
997 if (contains_abnormal_ssa_name_p (iv
->base
)
998 || contains_abnormal_ssa_name_p (iv
->step
))
1004 /* Finds general ivs in statement STMT. */
1007 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1011 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1014 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1017 /* Finds general ivs in basic block BB. */
1020 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1022 gimple_stmt_iterator bsi
;
1024 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1025 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1028 /* Finds general ivs. */
1031 find_givs (struct ivopts_data
*data
)
1033 struct loop
*loop
= data
->current_loop
;
1034 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1037 for (i
= 0; i
< loop
->num_nodes
; i
++)
1038 find_givs_in_bb (data
, body
[i
]);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1046 find_induction_variables (struct ivopts_data
*data
)
1051 if (!find_bivs (data
))
1057 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1059 tree niter
= niter_for_single_dom_exit (data
);
1063 fprintf (dump_file
, " number of iterations ");
1064 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1065 fprintf (dump_file
, "\n\n");
1068 fprintf (dump_file
, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1072 if (ver_info (data
, i
)->iv
)
1073 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use
*
1083 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1084 gimple stmt
, enum use_type use_type
)
1086 struct iv_use
*use
= XCNEW (struct iv_use
);
1088 use
->id
= n_iv_uses (data
);
1089 use
->type
= use_type
;
1093 use
->related_cands
= BITMAP_ALLOC (NULL
);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1097 iv
->ssa_name
= NULL_TREE
;
1099 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1100 dump_use (dump_file
, use
);
1102 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1112 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1115 struct version_info
*info
;
1117 if (TREE_CODE (op
) != SSA_NAME
1118 || !is_gimple_reg (op
))
1121 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1123 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1126 info
= name_info (data
, op
);
1128 info
->has_nonlin_use
|= nonlinear_use
;
1130 info
->inv_id
= ++data
->max_inv_id
;
1131 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use
*
1137 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1144 if (TREE_CODE (op
) != SSA_NAME
)
1147 iv
= get_iv (data
, op
);
1151 if (iv
->have_use_for
)
1153 use
= iv_use (data
, iv
->use_id
);
1155 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1159 if (integer_zerop (iv
->step
))
1161 record_invariant (data
, op
, true);
1164 iv
->have_use_for
= true;
1166 civ
= XNEW (struct iv
);
1169 stmt
= SSA_NAME_DEF_STMT (op
);
1170 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1171 || is_gimple_assign (stmt
));
1173 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1174 iv
->use_id
= use
->id
;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1188 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1189 tree
**control_var
, tree
**bound
,
1190 struct iv
**iv_var
, struct iv
**iv_bound
)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv
;
1195 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1196 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1199 if (gimple_code (stmt
) == GIMPLE_COND
)
1201 op0
= gimple_cond_lhs_ptr (stmt
);
1202 op1
= gimple_cond_rhs_ptr (stmt
);
1206 op0
= gimple_assign_rhs1_ptr (stmt
);
1207 op1
= gimple_assign_rhs2_ptr (stmt
);
1210 zero
= integer_zero_node
;
1211 const_iv
.step
= integer_zero_node
;
1213 if (TREE_CODE (*op0
) == SSA_NAME
)
1214 iv0
= get_iv (data
, *op0
);
1215 if (TREE_CODE (*op1
) == SSA_NAME
)
1216 iv1
= get_iv (data
, *op1
);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1223 if (integer_zerop (iv0
->step
))
1225 /* Control variable may be on the other side. */
1226 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1227 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1229 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1233 *control_var
= op0
;;
1244 /* Checks whether the condition in STMT is interesting and if so,
1248 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1250 tree
*var_p
, *bound_p
;
1251 struct iv
*var_iv
, *civ
;
1253 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1255 find_interesting_uses_op (data
, *var_p
);
1256 find_interesting_uses_op (data
, *bound_p
);
1260 civ
= XNEW (struct iv
);
1262 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1270 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1275 gcc_assert (loop_depth (loop
) > 0);
1277 if (is_gimple_min_invariant (expr
))
1280 if (TREE_CODE (expr
) == SSA_NAME
)
1282 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1284 && flow_bb_inside_loop_p (loop
, def_bb
))
1293 len
= TREE_OPERAND_LENGTH (expr
);
1294 for (i
= 0; i
< len
; i
++)
1295 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1306 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1311 gcc_assert (loop_depth (loop
) > 0);
1313 lhs
= gimple_get_lhs (stmt
);
1314 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1316 tree op
= gimple_op (stmt
, i
);
1317 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data
*ivopts_data
;
1336 idx_find_step (tree base
, tree
*idx
, void *data
)
1338 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1340 tree step
, iv_base
, iv_step
, lbound
, off
;
1341 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1343 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1347 /* If base is a component ref, require that the offset of the reference
1349 if (TREE_CODE (base
) == COMPONENT_REF
)
1351 off
= component_ref_field_offset (base
);
1352 return expr_invariant_in_loop_p (loop
, off
);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1366 step
= array_ref_element_size (base
);
1367 lbound
= array_ref_low_bound (base
);
1369 if (!expr_invariant_in_loop_p (loop
, step
)
1370 || !expr_invariant_in_loop_p (loop
, lbound
))
1374 if (TREE_CODE (*idx
) != SSA_NAME
)
1377 iv
= get_iv (dta
->ivopts_data
, *idx
);
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1386 if (integer_zerop (iv
->step
))
1389 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1391 step
= array_ref_element_size (base
);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step
) != INTEGER_CST
)
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step
= build_int_cst (sizetype
, 1);
1403 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1404 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1407 /* The index might wrap. */
1411 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1412 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base
, tree
*idx
,
1424 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1425 find_interesting_uses_op (data
, *idx
);
1426 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1428 find_interesting_uses_op (data
, array_ref_element_size (base
));
1429 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1440 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1443 enum tree_code code
;
1444 double_int res
, p0
, p1
;
1445 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1450 if (operand_equal_p (top
, bot
, 0))
1452 *mul
= double_int_one
;
1456 code
= TREE_CODE (top
);
1460 mby
= TREE_OPERAND (top
, 1);
1461 if (TREE_CODE (mby
) != INTEGER_CST
)
1464 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1467 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1473 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1474 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1477 if (code
== MINUS_EXPR
)
1478 p1
= double_int_neg (p1
);
1479 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1483 if (TREE_CODE (bot
) != INTEGER_CST
)
1486 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1487 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1488 if (double_int_zero_p (p1
))
1490 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1492 return double_int_zero_p (res
);
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1502 may_be_unaligned_p (tree ref
, tree step
)
1506 HOST_WIDE_INT bitsize
;
1507 HOST_WIDE_INT bitpos
;
1509 enum machine_mode mode
;
1510 int unsignedp
, volatilep
;
1511 unsigned base_align
;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1522 &unsignedp
, &volatilep
, true);
1523 base_type
= TREE_TYPE (base
);
1524 base_align
= TYPE_ALIGN (base_type
);
1526 if (mode
!= BLKmode
)
1529 tree al
= build_int_cst (TREE_TYPE (step
),
1530 GET_MODE_ALIGNMENT (mode
) / BITS_PER_UNIT
);
1532 if (base_align
< GET_MODE_ALIGNMENT (mode
)
1533 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1534 || bitpos
% BITS_PER_UNIT
!= 0)
1537 if (!constant_multiple_of (step
, al
, &mul
))
1544 /* Return true if EXPR may be non-addressable. */
1547 may_be_nonaddressable_p (tree expr
)
1549 switch (TREE_CODE (expr
))
1551 case TARGET_MEM_REF
:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1560 case VIEW_CONVERT_EXPR
:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1569 /* ... fall through ... */
1572 case ARRAY_RANGE_REF
:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1585 /* Finds addresses in *OP_P inside STMT. */
1588 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1590 tree base
= *op_p
, step
= build_int_cst (sizetype
, 0);
1592 struct ifs_ivopts_data ifs_ivopts_data
;
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt
))
1599 /* Ignore bitfields for now. Not really something terribly complicated
1601 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1604 base
= unshare_expr (base
);
1606 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1608 tree type
= build_pointer_type (TREE_TYPE (base
));
1612 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1614 civ
= get_iv (data
, TMR_BASE (base
));
1618 TMR_BASE (base
) = civ
->base
;
1621 if (TMR_INDEX (base
)
1622 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1624 civ
= get_iv (data
, TMR_INDEX (base
));
1628 TMR_INDEX (base
) = civ
->base
;
1633 if (TMR_STEP (base
))
1634 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1636 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1640 if (integer_zerop (step
))
1642 base
= tree_mem_ref_addr (type
, base
);
1646 ifs_ivopts_data
.ivopts_data
= data
;
1647 ifs_ivopts_data
.stmt
= stmt
;
1648 ifs_ivopts_data
.step
= build_int_cst (sizetype
, 0);
1649 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1650 || integer_zerop (ifs_ivopts_data
.step
))
1652 step
= ifs_ivopts_data
.step
;
1654 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1655 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base
))
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1667 base
= build_fold_addr_expr (base
);
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base
) == ADDR_EXPR
)
1673 tree
*ref
= &TREE_OPERAND (base
, 0);
1674 while (handled_component_p (*ref
))
1675 ref
= &TREE_OPERAND (*ref
, 0);
1676 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1677 *ref
= fold_indirect_ref (*ref
);
1681 civ
= alloc_iv (base
, step
);
1682 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1686 for_each_index (op_p
, idx_record_use
, data
);
1689 /* Finds and records invariants used in STMT. */
1692 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1695 use_operand_p use_p
;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1700 op
= USE_FROM_PTR (use_p
);
1701 record_invariant (data
, op
, false);
1705 /* Finds interesting uses of induction variables in the statement STMT. */
1708 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1711 tree op
, *lhs
, *rhs
;
1713 use_operand_p use_p
;
1714 enum tree_code code
;
1716 find_invariants_stmt (data
, stmt
);
1718 if (gimple_code (stmt
) == GIMPLE_COND
)
1720 find_interesting_uses_cond (data
, stmt
);
1724 if (is_gimple_assign (stmt
))
1726 lhs
= gimple_assign_lhs_ptr (stmt
);
1727 rhs
= gimple_assign_rhs1_ptr (stmt
);
1729 if (TREE_CODE (*lhs
) == SSA_NAME
)
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1734 iv
= get_iv (data
, *lhs
);
1736 if (iv
&& !integer_zerop (iv
->step
))
1740 code
= gimple_assign_rhs_code (stmt
);
1741 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs
)
1743 || is_gimple_val (*rhs
)))
1745 if (REFERENCE_CLASS_P (*rhs
))
1746 find_interesting_uses_address (data
, stmt
, rhs
);
1748 find_interesting_uses_op (data
, *rhs
);
1750 if (REFERENCE_CLASS_P (*lhs
))
1751 find_interesting_uses_address (data
, stmt
, lhs
);
1754 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1756 find_interesting_uses_cond (data
, stmt
);
1760 /* TODO -- we should also handle address uses of type
1762 memory = call (whatever);
1769 if (gimple_code (stmt
) == GIMPLE_PHI
1770 && gimple_bb (stmt
) == data
->current_loop
->header
)
1772 iv
= get_iv (data
, PHI_RESULT (stmt
));
1774 if (iv
&& !integer_zerop (iv
->step
))
1778 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1780 op
= USE_FROM_PTR (use_p
);
1782 if (TREE_CODE (op
) != SSA_NAME
)
1785 iv
= get_iv (data
, op
);
1789 find_interesting_uses_op (data
, op
);
1793 /* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1797 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1800 gimple_stmt_iterator psi
;
1803 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1805 phi
= gsi_stmt (psi
);
1806 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1807 if (is_gimple_reg (def
))
1808 find_interesting_uses_op (data
, def
);
1812 /* Finds uses of the induction variables that are interesting. */
1815 find_interesting_uses (struct ivopts_data
*data
)
1818 gimple_stmt_iterator bsi
;
1819 basic_block
*body
= get_loop_body (data
->current_loop
);
1821 struct version_info
*info
;
1824 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1825 fprintf (dump_file
, "Uses:\n\n");
1827 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1832 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1833 if (e
->dest
!= EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1835 find_interesting_uses_outside (data
, e
);
1837 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1838 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1839 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1840 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1847 fprintf (dump_file
, "\n");
1849 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1851 info
= ver_info (data
, i
);
1854 fprintf (dump_file
, " ");
1855 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1856 fprintf (dump_file
, " is invariant (%d)%s\n",
1857 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1861 fprintf (dump_file
, "\n");
1867 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1872 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1873 unsigned HOST_WIDE_INT
*offset
)
1875 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1876 enum tree_code code
;
1877 tree type
, orig_type
= TREE_TYPE (expr
);
1878 unsigned HOST_WIDE_INT off0
, off1
, st
;
1879 tree orig_expr
= expr
;
1883 type
= TREE_TYPE (expr
);
1884 code
= TREE_CODE (expr
);
1890 if (!cst_and_fits_in_hwi (expr
)
1891 || integer_zerop (expr
))
1894 *offset
= int_cst_value (expr
);
1895 return build_int_cst (orig_type
, 0);
1897 case POINTER_PLUS_EXPR
:
1900 op0
= TREE_OPERAND (expr
, 0);
1901 op1
= TREE_OPERAND (expr
, 1);
1903 op0
= strip_offset_1 (op0
, false, false, &off0
);
1904 op1
= strip_offset_1 (op1
, false, false, &off1
);
1906 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
1907 if (op0
== TREE_OPERAND (expr
, 0)
1908 && op1
== TREE_OPERAND (expr
, 1))
1911 if (integer_zerop (op1
))
1913 else if (integer_zerop (op0
))
1915 if (code
== MINUS_EXPR
)
1916 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1921 expr
= fold_build2 (code
, type
, op0
, op1
);
1923 return fold_convert (orig_type
, expr
);
1926 case ARRAY_RANGE_REF
:
1930 step
= array_ref_element_size (expr
);
1931 if (!cst_and_fits_in_hwi (step
))
1934 st
= int_cst_value (step
);
1935 op1
= TREE_OPERAND (expr
, 1);
1936 op1
= strip_offset_1 (op1
, false, false, &off1
);
1937 *offset
= off1
* st
;
1940 && integer_zerop (op1
))
1942 /* Strip the component reference completely. */
1943 op0
= TREE_OPERAND (expr
, 0);
1944 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1954 tmp
= component_ref_field_offset (expr
);
1956 && cst_and_fits_in_hwi (tmp
))
1958 /* Strip the component reference completely. */
1959 op0
= TREE_OPERAND (expr
, 0);
1960 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1961 *offset
= off0
+ int_cst_value (tmp
);
1967 op0
= TREE_OPERAND (expr
, 0);
1968 op0
= strip_offset_1 (op0
, true, true, &off0
);
1971 if (op0
== TREE_OPERAND (expr
, 0))
1974 expr
= build_fold_addr_expr (op0
);
1975 return fold_convert (orig_type
, expr
);
1978 inside_addr
= false;
1985 /* Default handling of expressions for that we want to recurse into
1986 the first operand. */
1987 op0
= TREE_OPERAND (expr
, 0);
1988 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1991 if (op0
== TREE_OPERAND (expr
, 0)
1992 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1995 expr
= copy_node (expr
);
1996 TREE_OPERAND (expr
, 0) = op0
;
1998 TREE_OPERAND (expr
, 1) = op1
;
2000 /* Inside address, we might strip the top level component references,
2001 thus changing type of the expression. Handling of ADDR_EXPR
2003 expr
= fold_convert (orig_type
, expr
);
2008 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2011 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2013 return strip_offset_1 (expr
, false, false, offset
);
2016 /* Returns variant of TYPE that can be used as base for different uses.
2017 We return unsigned type with the same precision, which avoids problems
2021 generic_type_for (tree type
)
2023 if (POINTER_TYPE_P (type
))
2024 return unsigned_type_for (type
);
2026 if (TYPE_UNSIGNED (type
))
2029 return unsigned_type_for (type
);
2032 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2033 the bitmap to that we should store it. */
2035 static struct ivopts_data
*fd_ivopts_data
;
2037 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2039 bitmap
*depends_on
= (bitmap
*) data
;
2040 struct version_info
*info
;
2042 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2044 info
= name_info (fd_ivopts_data
, *expr_p
);
2046 if (!info
->inv_id
|| info
->has_nonlin_use
)
2050 *depends_on
= BITMAP_ALLOC (NULL
);
2051 bitmap_set_bit (*depends_on
, info
->inv_id
);
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2059 replacement of the final value of the iv by a direct computation. */
2061 static struct iv_cand
*
2062 add_candidate_1 (struct ivopts_data
*data
,
2063 tree base
, tree step
, bool important
, enum iv_position pos
,
2064 struct iv_use
*use
, gimple incremented_at
)
2067 struct iv_cand
*cand
= NULL
;
2068 tree type
, orig_type
;
2072 orig_type
= TREE_TYPE (base
);
2073 type
= generic_type_for (orig_type
);
2074 /* Don't convert the base to the generic type for pointers as the generic
2075 type is an integer type with the same size as the pointer type. */
2076 if (type
!= orig_type
&& !POINTER_TYPE_P (orig_type
))
2078 base
= fold_convert (type
, base
);
2079 step
= fold_convert (type
, step
);
2083 for (i
= 0; i
< n_iv_cands (data
); i
++)
2085 cand
= iv_cand (data
, i
);
2087 if (cand
->pos
!= pos
)
2090 if (cand
->incremented_at
!= incremented_at
)
2104 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2105 && operand_equal_p (step
, cand
->iv
->step
, 0))
2109 if (i
== n_iv_cands (data
))
2111 cand
= XCNEW (struct iv_cand
);
2117 cand
->iv
= alloc_iv (base
, step
);
2120 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2122 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2123 cand
->var_after
= cand
->var_before
;
2125 cand
->important
= important
;
2126 cand
->incremented_at
= incremented_at
;
2127 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2130 && TREE_CODE (step
) != INTEGER_CST
)
2132 fd_ivopts_data
= data
;
2133 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2136 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2137 dump_cand (dump_file
, cand
);
2140 if (important
&& !cand
->important
)
2142 cand
->important
= true;
2143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2144 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2149 bitmap_set_bit (use
->related_cands
, i
);
2150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2151 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2158 /* Returns true if incrementing the induction variable at the end of the LOOP
2161 The purpose is to avoid splitting latch edge with a biv increment, thus
2162 creating a jump, possibly confusing other optimization passes and leaving
2163 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2164 is not available (so we do not have a better alternative), or if the latch
2165 edge is already nonempty. */
2168 allow_ip_end_pos_p (struct loop
*loop
)
2170 if (!ip_normal_pos (loop
))
2173 if (!empty_block_p (ip_end_pos (loop
)))
2179 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2180 position to POS. If USE is not NULL, the candidate is set as related to
2181 it. The candidate computation is scheduled on all available positions. */
2184 add_candidate (struct ivopts_data
*data
,
2185 tree base
, tree step
, bool important
, struct iv_use
*use
)
2187 if (ip_normal_pos (data
->current_loop
))
2188 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2189 if (ip_end_pos (data
->current_loop
)
2190 && allow_ip_end_pos_p (data
->current_loop
))
2191 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2194 /* Add a standard "0 + 1 * iteration" iv candidate for a
2195 type with SIZE bits. */
2198 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2201 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2202 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2206 /* Adds standard iv candidates. */
2209 add_standard_iv_candidates (struct ivopts_data
*data
)
2211 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2213 /* The same for a double-integer type if it is still fast enough. */
2214 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2215 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2219 /* Adds candidates bases on the old induction variable IV. */
2222 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2226 struct iv_cand
*cand
;
2228 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2230 /* The same, but with initial value zero. */
2231 add_candidate (data
,
2232 build_int_cst (TREE_TYPE (iv
->base
), 0),
2233 iv
->step
, true, NULL
);
2235 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2236 if (gimple_code (phi
) == GIMPLE_PHI
)
2238 /* Additionally record the possibility of leaving the original iv
2240 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2241 cand
= add_candidate_1 (data
,
2242 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2243 SSA_NAME_DEF_STMT (def
));
2244 cand
->var_before
= iv
->ssa_name
;
2245 cand
->var_after
= def
;
2249 /* Adds candidates based on the old induction variables. */
2252 add_old_ivs_candidates (struct ivopts_data
*data
)
2258 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2260 iv
= ver_info (data
, i
)->iv
;
2261 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2262 add_old_iv_candidates (data
, iv
);
2266 /* Adds candidates based on the value of the induction variable IV and USE. */
2269 add_iv_value_candidates (struct ivopts_data
*data
,
2270 struct iv
*iv
, struct iv_use
*use
)
2272 unsigned HOST_WIDE_INT offset
;
2276 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2278 /* The same, but with initial value zero. Make such variable important,
2279 since it is generic enough so that possibly many uses may be based
2281 basetype
= TREE_TYPE (iv
->base
);
2282 if (POINTER_TYPE_P (basetype
))
2283 basetype
= sizetype
;
2284 add_candidate (data
, build_int_cst (basetype
, 0),
2285 iv
->step
, true, use
);
2287 /* Third, try removing the constant offset. Make sure to even
2288 add a candidate for &a[0] vs. (T *)&a. */
2289 base
= strip_offset (iv
->base
, &offset
);
2291 || base
!= iv
->base
)
2292 add_candidate (data
, base
, iv
->step
, false, use
);
2295 /* Adds candidates based on the uses. */
2298 add_derived_ivs_candidates (struct ivopts_data
*data
)
2302 for (i
= 0; i
< n_iv_uses (data
); i
++)
2304 struct iv_use
*use
= iv_use (data
, i
);
2311 case USE_NONLINEAR_EXPR
:
2314 /* Just add the ivs based on the value of the iv used here. */
2315 add_iv_value_candidates (data
, use
->iv
, use
);
2324 /* Record important candidates and add them to related_cands bitmaps
2328 record_important_candidates (struct ivopts_data
*data
)
2333 for (i
= 0; i
< n_iv_cands (data
); i
++)
2335 struct iv_cand
*cand
= iv_cand (data
, i
);
2337 if (cand
->important
)
2338 bitmap_set_bit (data
->important_candidates
, i
);
2341 data
->consider_all_candidates
= (n_iv_cands (data
)
2342 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2344 if (data
->consider_all_candidates
)
2346 /* We will not need "related_cands" bitmaps in this case,
2347 so release them to decrease peak memory consumption. */
2348 for (i
= 0; i
< n_iv_uses (data
); i
++)
2350 use
= iv_use (data
, i
);
2351 BITMAP_FREE (use
->related_cands
);
2356 /* Add important candidates to the related_cands bitmaps. */
2357 for (i
= 0; i
< n_iv_uses (data
); i
++)
2358 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2359 data
->important_candidates
);
2363 /* Finds the candidates for the induction variables. */
2366 find_iv_candidates (struct ivopts_data
*data
)
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data
);
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data
);
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data
);
2377 /* Record the important candidates. */
2378 record_important_candidates (data
);
2381 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */
2386 alloc_use_cost_map (struct ivopts_data
*data
)
2388 unsigned i
, size
, s
, j
;
2390 for (i
= 0; i
< n_iv_uses (data
); i
++)
2392 struct iv_use
*use
= iv_use (data
, i
);
2395 if (data
->consider_all_candidates
)
2396 size
= n_iv_cands (data
);
2400 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2405 /* Round up to the power of two, so that moduling by it is fast. */
2406 for (size
= 1; size
< s
; size
<<= 1)
2410 use
->n_map_members
= size
;
2411 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2415 /* Returns description of computation cost of expression whose runtime
2416 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2419 new_cost (unsigned runtime
, unsigned complexity
)
2423 cost
.cost
= runtime
;
2424 cost
.complexity
= complexity
;
2429 /* Adds costs COST1 and COST2. */
2432 add_costs (comp_cost cost1
, comp_cost cost2
)
2434 cost1
.cost
+= cost2
.cost
;
2435 cost1
.complexity
+= cost2
.complexity
;
2439 /* Subtracts costs COST1 and COST2. */
2442 sub_costs (comp_cost cost1
, comp_cost cost2
)
2444 cost1
.cost
-= cost2
.cost
;
2445 cost1
.complexity
-= cost2
.complexity
;
2450 /* Returns a negative number if COST1 < COST2, a positive number if
2451 COST1 > COST2, and 0 if COST1 = COST2. */
2454 compare_costs (comp_cost cost1
, comp_cost cost2
)
2456 if (cost1
.cost
== cost2
.cost
)
2457 return cost1
.complexity
- cost2
.complexity
;
2459 return cost1
.cost
- cost2
.cost
;
2462 /* Returns true if COST is infinite. */
2465 infinite_cost_p (comp_cost cost
)
2467 return cost
.cost
== INFTY
;
2470 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2471 on invariants DEPENDS_ON and that the value used in expressing it
2475 set_use_iv_cost (struct ivopts_data
*data
,
2476 struct iv_use
*use
, struct iv_cand
*cand
,
2477 comp_cost cost
, bitmap depends_on
, tree value
)
2481 if (infinite_cost_p (cost
))
2483 BITMAP_FREE (depends_on
);
2487 if (data
->consider_all_candidates
)
2489 use
->cost_map
[cand
->id
].cand
= cand
;
2490 use
->cost_map
[cand
->id
].cost
= cost
;
2491 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2492 use
->cost_map
[cand
->id
].value
= value
;
2496 /* n_map_members is a power of two, so this computes modulo. */
2497 s
= cand
->id
& (use
->n_map_members
- 1);
2498 for (i
= s
; i
< use
->n_map_members
; i
++)
2499 if (!use
->cost_map
[i
].cand
)
2501 for (i
= 0; i
< s
; i
++)
2502 if (!use
->cost_map
[i
].cand
)
2508 use
->cost_map
[i
].cand
= cand
;
2509 use
->cost_map
[i
].cost
= cost
;
2510 use
->cost_map
[i
].depends_on
= depends_on
;
2511 use
->cost_map
[i
].value
= value
;
2514 /* Gets cost of (USE, CANDIDATE) pair. */
2516 static struct cost_pair
*
2517 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2518 struct iv_cand
*cand
)
2521 struct cost_pair
*ret
;
2526 if (data
->consider_all_candidates
)
2528 ret
= use
->cost_map
+ cand
->id
;
2535 /* n_map_members is a power of two, so this computes modulo. */
2536 s
= cand
->id
& (use
->n_map_members
- 1);
2537 for (i
= s
; i
< use
->n_map_members
; i
++)
2538 if (use
->cost_map
[i
].cand
== cand
)
2539 return use
->cost_map
+ i
;
2541 for (i
= 0; i
< s
; i
++)
2542 if (use
->cost_map
[i
].cand
== cand
)
2543 return use
->cost_map
+ i
;
2548 /* Returns estimate on cost of computing SEQ. */
2551 seq_cost (rtx seq
, bool speed
)
2556 for (; seq
; seq
= NEXT_INSN (seq
))
2558 set
= single_set (seq
);
2560 cost
+= rtx_cost (set
, SET
,speed
);
2568 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2570 produce_memory_decl_rtl (tree obj
, int *regno
)
2575 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2577 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2578 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2579 SET_SYMBOL_REF_DECL (x
, obj
);
2580 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2581 targetm
.encode_section_info (obj
, x
, true);
2585 x
= gen_raw_REG (Pmode
, (*regno
)++);
2586 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2592 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2593 walk_tree. DATA contains the actual fake register number. */
2596 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2598 tree obj
= NULL_TREE
;
2600 int *regno
= (int *) data
;
2602 switch (TREE_CODE (*expr_p
))
2605 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2606 handled_component_p (*expr_p
);
2607 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2610 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2611 x
= produce_memory_decl_rtl (obj
, regno
);
2616 obj
= SSA_NAME_VAR (*expr_p
);
2617 if (!DECL_RTL_SET_P (obj
))
2618 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2627 if (DECL_RTL_SET_P (obj
))
2630 if (DECL_MODE (obj
) == BLKmode
)
2631 x
= produce_memory_decl_rtl (obj
, regno
);
2633 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2643 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2644 SET_DECL_RTL (obj
, x
);
2650 /* Determines cost of the computation of EXPR. */
2653 computation_cost (tree expr
, bool speed
)
2656 tree type
= TREE_TYPE (expr
);
2658 /* Avoid using hard regs in ways which may be unsupported. */
2659 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2660 enum function_frequency real_frequency
= cfun
->function_frequency
;
2662 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
2663 crtl
->maybe_hot_insn_p
= speed
;
2664 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2666 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2669 default_rtl_profile ();
2670 cfun
->function_frequency
= real_frequency
;
2672 cost
= seq_cost (seq
, speed
);
2674 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
), speed
);
2679 /* Returns variable containing the value of candidate CAND at statement AT. */
2682 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2684 if (stmt_after_increment (loop
, cand
, stmt
))
2685 return cand
->var_after
;
2687 return cand
->var_before
;
2690 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2691 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2694 tree_int_cst_sign_bit (const_tree t
)
2696 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2697 unsigned HOST_WIDE_INT w
;
2699 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2700 w
= TREE_INT_CST_LOW (t
);
2703 w
= TREE_INT_CST_HIGH (t
);
2704 bitno
-= HOST_BITS_PER_WIDE_INT
;
2707 return (w
>> bitno
) & 1;
2710 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2711 same precision that is at least as wide as the precision of TYPE, stores
2712 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2716 determine_common_wider_type (tree
*a
, tree
*b
)
2718 tree wider_type
= NULL
;
2720 tree atype
= TREE_TYPE (*a
);
2722 if (CONVERT_EXPR_P (*a
))
2724 suba
= TREE_OPERAND (*a
, 0);
2725 wider_type
= TREE_TYPE (suba
);
2726 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2732 if (CONVERT_EXPR_P (*b
))
2734 subb
= TREE_OPERAND (*b
, 0);
2735 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2746 /* Determines the expression by that USE is expressed from induction variable
2747 CAND at statement AT in LOOP. The expression is stored in a decomposed
2748 form into AFF. Returns false if USE cannot be expressed using CAND. */
2751 get_computation_aff (struct loop
*loop
,
2752 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2753 struct affine_tree_combination
*aff
)
2755 tree ubase
= use
->iv
->base
;
2756 tree ustep
= use
->iv
->step
;
2757 tree cbase
= cand
->iv
->base
;
2758 tree cstep
= cand
->iv
->step
, cstep_common
;
2759 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2760 tree common_type
, var
;
2762 aff_tree cbase_aff
, var_aff
;
2765 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2767 /* We do not have a precision to express the values of use. */
2771 var
= var_at_stmt (loop
, cand
, at
);
2772 uutype
= unsigned_type_for (utype
);
2774 /* If the conversion is not noop, perform it. */
2775 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2777 cstep
= fold_convert (uutype
, cstep
);
2778 cbase
= fold_convert (uutype
, cbase
);
2779 var
= fold_convert (uutype
, var
);
2782 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2785 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2786 type, we achieve better folding by computing their difference in this
2787 wider type, and cast the result to UUTYPE. We do not need to worry about
2788 overflows, as all the arithmetics will in the end be performed in UUTYPE
2790 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2792 /* use = ubase - ratio * cbase + ratio * var. */
2793 tree_to_aff_combination (ubase
, common_type
, aff
);
2794 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2795 tree_to_aff_combination (var
, uutype
, &var_aff
);
2797 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop
, cand
, at
))
2802 if (common_type
!= uutype
)
2803 cstep_common
= fold_convert (common_type
, cstep
);
2805 cstep_common
= cstep
;
2807 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2808 aff_combination_add (&cbase_aff
, &cstep_aff
);
2811 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2812 aff_combination_add (aff
, &cbase_aff
);
2813 if (common_type
!= uutype
)
2814 aff_combination_convert (aff
, uutype
);
2816 aff_combination_scale (&var_aff
, rat
);
2817 aff_combination_add (aff
, &var_aff
);
2822 /* Determines the expression by that USE is expressed from induction variable
2823 CAND at statement AT in LOOP. The computation is unshared. */
2826 get_computation_at (struct loop
*loop
,
2827 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
2830 tree type
= TREE_TYPE (use
->iv
->base
);
2832 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
2834 unshare_aff_combination (&aff
);
2835 return fold_convert (type
, aff_combination_to_tree (&aff
));
2838 /* Determines the expression by that USE is expressed from induction variable
2839 CAND in LOOP. The computation is unshared. */
2842 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2844 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2847 /* Returns cost of addition in MODE. */
2850 add_cost (enum machine_mode mode
, bool speed
)
2852 static unsigned costs
[NUM_MACHINE_MODES
];
2860 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2861 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2862 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
2867 cost
= seq_cost (seq
, speed
);
2873 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2874 fprintf (dump_file
, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode
), cost
);
2879 /* Entry in a hashtable of already known costs for multiplication. */
2882 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2883 enum machine_mode mode
; /* In mode. */
2884 unsigned cost
; /* The cost. */
2887 /* Counts hash value for the ENTRY. */
2890 mbc_entry_hash (const void *entry
)
2892 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
2894 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2897 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2900 mbc_entry_eq (const void *entry1
, const void *entry2
)
2902 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
2903 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
2905 return (e1
->mode
== e2
->mode
2906 && e1
->cst
== e2
->cst
);
2909 /* Returns cost of multiplication by constant CST in MODE. */
2912 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
2914 static htab_t costs
;
2915 struct mbc_entry
**cached
, act
;
2920 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2924 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2926 return (*cached
)->cost
;
2928 *cached
= XNEW (struct mbc_entry
);
2929 (*cached
)->mode
= mode
;
2930 (*cached
)->cst
= cst
;
2933 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2934 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
2938 cost
= seq_cost (seq
, speed
);
2940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2941 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
2942 (int) cst
, GET_MODE_NAME (mode
), cost
);
2944 (*cached
)->cost
= cost
;
2949 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */
2953 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
)
2955 #define MAX_RATIO 128
2956 static sbitmap valid_mult
[MAX_MACHINE_MODE
];
2958 if (!valid_mult
[mode
])
2960 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
2964 valid_mult
[mode
] = sbitmap_alloc (2 * MAX_RATIO
+ 1);
2965 sbitmap_zero (valid_mult
[mode
]);
2966 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
2967 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2969 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
2970 if (memory_address_p (mode
, addr
))
2971 SET_BIT (valid_mult
[mode
], i
+ MAX_RATIO
);
2974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2976 fprintf (dump_file
, " allowed multipliers:");
2977 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
2978 if (TEST_BIT (valid_mult
[mode
], i
+ MAX_RATIO
))
2979 fprintf (dump_file
, " %d", (int) i
);
2980 fprintf (dump_file
, "\n");
2981 fprintf (dump_file
, "\n");
2985 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
2988 return TEST_BIT (valid_mult
[mode
], ratio
+ MAX_RATIO
);
2991 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2993 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE.
2996 TODO -- there must be some better way. This all is quite crude. */
2999 get_address_cost (bool symbol_present
, bool var_present
,
3000 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3001 enum machine_mode mem_mode
,
3004 static bool initialized
[MAX_MACHINE_MODE
];
3005 static HOST_WIDE_INT rat
[MAX_MACHINE_MODE
], off
[MAX_MACHINE_MODE
];
3006 static HOST_WIDE_INT min_offset
[MAX_MACHINE_MODE
], max_offset
[MAX_MACHINE_MODE
];
3007 static unsigned costs
[MAX_MACHINE_MODE
][2][2][2][2];
3008 unsigned cost
, acost
, complexity
;
3009 bool offset_p
, ratio_p
;
3010 HOST_WIDE_INT s_offset
;
3011 unsigned HOST_WIDE_INT mask
;
3014 if (!initialized
[mem_mode
])
3017 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
3018 int old_cse_not_expected
;
3019 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3020 rtx seq
, addr
, base
;
3023 initialized
[mem_mode
] = true;
3025 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3027 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3028 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3030 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3031 if (!memory_address_p (mem_mode
, addr
))
3034 max_offset
[mem_mode
] = i
== start
? 0 : i
>> 1;
3035 off
[mem_mode
] = max_offset
[mem_mode
];
3037 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3039 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3040 if (!memory_address_p (mem_mode
, addr
))
3043 min_offset
[mem_mode
] = i
== start
? 0 : -(i
>> 1);
3045 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3047 fprintf (dump_file
, "get_address_cost:\n");
3048 fprintf (dump_file
, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode
),
3050 (int) min_offset
[mem_mode
]);
3051 fprintf (dump_file
, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode
),
3053 (int) max_offset
[mem_mode
]);
3057 for (i
= 2; i
<= MAX_RATIO
; i
++)
3058 if (multiplier_allowed_in_address_p (i
, mem_mode
))
3064 /* Compute the cost of various addressing modes. */
3066 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3067 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3069 for (i
= 0; i
< 16; i
++)
3072 var_p
= (i
>> 1) & 1;
3073 off_p
= (i
>> 2) & 1;
3074 rat_p
= (i
>> 3) & 1;
3078 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
,
3079 gen_int_mode (rat
[mem_mode
], Pmode
));
3082 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3086 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3087 /* ??? We can run into trouble with some backends by presenting
3088 it with symbols which haven't been properly passed through
3089 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3094 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3095 gen_rtx_fmt_ee (PLUS
, Pmode
,
3097 gen_int_mode (off
[mem_mode
],
3101 base
= gen_int_mode (off
[mem_mode
], Pmode
);
3106 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3109 /* To avoid splitting addressing modes, pretend that no cse will
3111 old_cse_not_expected
= cse_not_expected
;
3112 cse_not_expected
= true;
3113 addr
= memory_address (mem_mode
, addr
);
3114 cse_not_expected
= old_cse_not_expected
;
3118 acost
= seq_cost (seq
, speed
);
3119 acost
+= address_cost (addr
, mem_mode
, speed
);
3123 costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
] = acost
;
3126 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the
3129 loop (and quite likely we have it in register already), so it does not
3130 make much sense to penalize them too heavily. So make some final
3131 tweaks for the SYMBOL_PRESENT modes:
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */
3138 add_c
= add_cost (Pmode
, speed
);
3139 for (i
= 0; i
< 8; i
++)
3142 off_p
= (i
>> 1) & 1;
3143 rat_p
= (i
>> 2) & 1;
3145 acost
= costs
[mem_mode
][0][1][off_p
][rat_p
] + 1;
3149 if (acost
< costs
[mem_mode
][1][var_p
][off_p
][rat_p
])
3150 costs
[mem_mode
][1][var_p
][off_p
][rat_p
] = acost
;
3153 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3155 fprintf (dump_file
, "Address costs:\n");
3157 for (i
= 0; i
< 16; i
++)
3160 var_p
= (i
>> 1) & 1;
3161 off_p
= (i
>> 2) & 1;
3162 rat_p
= (i
>> 3) & 1;
3164 fprintf (dump_file
, " ");
3166 fprintf (dump_file
, "sym + ");
3168 fprintf (dump_file
, "var + ");
3170 fprintf (dump_file
, "cst + ");
3172 fprintf (dump_file
, "rat * ");
3174 acost
= costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
];
3175 fprintf (dump_file
, "index costs %d\n", acost
);
3177 fprintf (dump_file
, "\n");
3181 bits
= GET_MODE_BITSIZE (Pmode
);
3182 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3184 if ((offset
>> (bits
- 1) & 1))
3189 offset_p
= (s_offset
!= 0
3190 && min_offset
[mem_mode
] <= s_offset
3191 && s_offset
<= max_offset
[mem_mode
]);
3192 ratio_p
= (ratio
!= 1
3193 && multiplier_allowed_in_address_p (ratio
, mem_mode
));
3195 if (ratio
!= 1 && !ratio_p
)
3196 cost
+= multiply_by_cost (ratio
, Pmode
, speed
);
3198 if (s_offset
&& !offset_p
&& !symbol_present
)
3199 cost
+= add_cost (Pmode
, speed
);
3201 acost
= costs
[mem_mode
][symbol_present
][var_present
][offset_p
][ratio_p
];
3202 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3203 return new_cost (cost
+ acost
, complexity
);
3206 /* Estimates cost of forcing expression EXPR into a variable. */
3209 force_expr_to_var_cost (tree expr
, bool speed
)
3211 static bool costs_initialized
= false;
3212 static unsigned integer_cost
[2];
3213 static unsigned symbol_cost
[2];
3214 static unsigned address_cost
[2];
3216 comp_cost cost0
, cost1
, cost
;
3217 enum machine_mode mode
;
3219 if (!costs_initialized
)
3221 tree type
= build_pointer_type (integer_type_node
);
3226 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3227 TREE_STATIC (var
) = 1;
3228 x
= produce_memory_decl_rtl (var
, NULL
);
3229 SET_DECL_RTL (var
, x
);
3231 addr
= build1 (ADDR_EXPR
, type
, var
);
3234 for (i
= 0; i
< 2; i
++)
3236 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3239 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3242 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3244 build_int_cst (sizetype
, 2000)), i
) + 1;
3245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3247 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3248 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3249 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3250 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3251 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3252 fprintf (dump_file
, "\n");
3256 costs_initialized
= true;
3261 if (SSA_VAR_P (expr
))
3264 if (is_gimple_min_invariant (expr
))
3266 if (TREE_CODE (expr
) == INTEGER_CST
)
3267 return new_cost (integer_cost
[speed
], 0);
3269 if (TREE_CODE (expr
) == ADDR_EXPR
)
3271 tree obj
= TREE_OPERAND (expr
, 0);
3273 if (TREE_CODE (obj
) == VAR_DECL
3274 || TREE_CODE (obj
) == PARM_DECL
3275 || TREE_CODE (obj
) == RESULT_DECL
)
3276 return new_cost (symbol_cost
[speed
], 0);
3279 return new_cost (address_cost
[speed
], 0);
3282 switch (TREE_CODE (expr
))
3284 case POINTER_PLUS_EXPR
:
3288 op0
= TREE_OPERAND (expr
, 0);
3289 op1
= TREE_OPERAND (expr
, 1);
3293 if (is_gimple_val (op0
))
3296 cost0
= force_expr_to_var_cost (op0
, speed
);
3298 if (is_gimple_val (op1
))
3301 cost1
= force_expr_to_var_cost (op1
, speed
);
3306 /* Just an arbitrary value, FIXME. */
3307 return new_cost (target_spill_cost
[speed
], 0);
3310 mode
= TYPE_MODE (TREE_TYPE (expr
));
3311 switch (TREE_CODE (expr
))
3313 case POINTER_PLUS_EXPR
:
3316 cost
= new_cost (add_cost (mode
, speed
), 0);
3320 if (cst_and_fits_in_hwi (op0
))
3321 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3322 else if (cst_and_fits_in_hwi (op1
))
3323 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3325 return new_cost (target_spill_cost
[speed
], 0);
3332 cost
= add_costs (cost
, cost0
);
3333 cost
= add_costs (cost
, cost1
);
3335 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */
3339 if (cost
.cost
> target_spill_cost
[speed
])
3340 cost
.cost
= target_spill_cost
[speed
];
3345 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3349 force_var_cost (struct ivopts_data
*data
,
3350 tree expr
, bitmap
*depends_on
)
3354 fd_ivopts_data
= data
;
3355 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3358 return force_expr_to_var_cost (expr
, data
->speed
);
3361 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3362 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3363 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3367 split_address_cost (struct ivopts_data
*data
,
3368 tree addr
, bool *symbol_present
, bool *var_present
,
3369 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3372 HOST_WIDE_INT bitsize
;
3373 HOST_WIDE_INT bitpos
;
3375 enum machine_mode mode
;
3376 int unsignedp
, volatilep
;
3378 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3379 &unsignedp
, &volatilep
, false);
3382 || bitpos
% BITS_PER_UNIT
!= 0
3383 || TREE_CODE (core
) != VAR_DECL
)
3385 *symbol_present
= false;
3386 *var_present
= true;
3387 fd_ivopts_data
= data
;
3388 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3389 return new_cost (target_spill_cost
[data
->speed
], 0);
3392 *offset
+= bitpos
/ BITS_PER_UNIT
;
3393 if (TREE_STATIC (core
)
3394 || DECL_EXTERNAL (core
))
3396 *symbol_present
= true;
3397 *var_present
= false;
3401 *symbol_present
= false;
3402 *var_present
= true;
3406 /* Estimates cost of expressing difference of addresses E1 - E2 as
3407 var + symbol + offset. The value of offset is added to OFFSET,
3408 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3409 part is missing. DEPENDS_ON is a set of the invariants the computation
3413 ptr_difference_cost (struct ivopts_data
*data
,
3414 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3415 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3417 HOST_WIDE_INT diff
= 0;
3419 bool speed
= optimize_loop_for_speed_p (data
->current_loop
);
3421 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3423 if (ptr_difference_const (e1
, e2
, &diff
))
3426 *symbol_present
= false;
3427 *var_present
= false;
3431 if (integer_zerop (e2
))
3432 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3433 symbol_present
, var_present
, offset
, depends_on
);
3435 *symbol_present
= false;
3436 *var_present
= true;
3438 cost
= force_var_cost (data
, e1
, depends_on
);
3439 cost
= add_costs (cost
, force_var_cost (data
, e2
, depends_on
));
3440 cost
.cost
+= add_cost (Pmode
, speed
);
3445 /* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3448 part is missing. DEPENDS_ON is a set of the invariants the computation
3452 difference_cost (struct ivopts_data
*data
,
3453 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3454 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3457 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3458 unsigned HOST_WIDE_INT off1
, off2
;
3460 e1
= strip_offset (e1
, &off1
);
3461 e2
= strip_offset (e2
, &off2
);
3462 *offset
+= off1
- off2
;
3467 if (TREE_CODE (e1
) == ADDR_EXPR
)
3468 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3470 *symbol_present
= false;
3472 if (operand_equal_p (e1
, e2
, 0))
3474 *var_present
= false;
3477 *var_present
= true;
3478 if (integer_zerop (e2
))
3479 return force_var_cost (data
, e1
, depends_on
);
3481 if (integer_zerop (e1
))
3483 cost
= force_var_cost (data
, e2
, depends_on
);
3484 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3489 cost
= force_var_cost (data
, e1
, depends_on
);
3490 cost
= add_costs (cost
, force_var_cost (data
, e2
, depends_on
));
3491 cost
.cost
+= add_cost (mode
, data
->speed
);
3496 /* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */
3503 get_computation_cost_at (struct ivopts_data
*data
,
3504 struct iv_use
*use
, struct iv_cand
*cand
,
3505 bool address_p
, bitmap
*depends_on
, gimple at
)
3507 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3509 tree utype
= TREE_TYPE (ubase
), ctype
;
3510 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3511 HOST_WIDE_INT ratio
, aratio
;
3512 bool var_present
, symbol_present
;
3516 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3520 /* Only consider real candidates. */
3522 return infinite_cost
;
3524 cbase
= cand
->iv
->base
;
3525 cstep
= cand
->iv
->step
;
3526 ctype
= TREE_TYPE (cbase
);
3528 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3530 /* We do not have a precision to express the values of use. */
3531 return infinite_cost
;
3536 /* Do not try to express address of an object with computation based
3537 on address of a different object. This may cause problems in rtl
3538 level alias analysis (that does not expect this to be happening,
3539 as this is illegal in C), and would be unlikely to be useful
3541 if (use
->iv
->base_object
3542 && cand
->iv
->base_object
3543 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3544 return infinite_cost
;
3547 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3549 /* TODO -- add direct handling of this case. */
3553 /* CSTEPI is removed from the offset in case statement is after the
3554 increment. If the step is not constant, we use zero instead.
3555 This is a bit imprecise (there is the extra addition), but
3556 redundancy elimination is likely to transform the code so that
3557 it uses value of the variable before increment anyway,
3558 so it is not that much unrealistic. */
3559 if (cst_and_fits_in_hwi (cstep
))
3560 cstepi
= int_cst_value (cstep
);
3564 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3565 return infinite_cost
;
3567 if (double_int_fits_in_shwi_p (rat
))
3568 ratio
= double_int_to_shwi (rat
);
3570 return infinite_cost
;
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like
3575 ubase - ratio * cbase + ratio * var
3577 (also holds in the case ratio == -1, TODO. */
3579 if (cst_and_fits_in_hwi (cbase
))
3581 offset
= - ratio
* int_cst_value (cbase
);
3582 cost
= difference_cost (data
,
3583 ubase
, build_int_cst (utype
, 0),
3584 &symbol_present
, &var_present
, &offset
,
3587 else if (ratio
== 1)
3589 cost
= difference_cost (data
,
3591 &symbol_present
, &var_present
, &offset
,
3596 cost
= force_var_cost (data
, cbase
, depends_on
);
3597 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
3598 cost
= add_costs (cost
,
3599 difference_cost (data
,
3600 ubase
, build_int_cst (utype
, 0),
3601 &symbol_present
, &var_present
,
3602 &offset
, depends_on
));
3605 /* If we are after the increment, the value of the candidate is higher by
3607 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3608 offset
-= ratio
* cstepi
;
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address,
3612 find the cost of addressing this. */
3614 return add_costs (cost
, get_address_cost (symbol_present
, var_present
,
3616 TYPE_MODE (TREE_TYPE (*use
->op_p
)), speed
));
3618 /* Otherwise estimate the costs for computing the expression. */
3619 aratio
= ratio
> 0 ? ratio
: -ratio
;
3620 if (!symbol_present
&& !var_present
&& !offset
)
3623 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
3629 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present
|| offset
))
3637 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */
3642 cost
.cost
+= n_sums
* add_cost (TYPE_MODE (ctype
), speed
);
3647 /* Just get the expression, expand it and measure the cost. */
3648 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3651 return infinite_cost
;
3654 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3656 return new_cost (computation_cost (comp
, speed
), 0);
3660 /* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in
3667 get_computation_cost (struct ivopts_data
*data
,
3668 struct iv_use
*use
, struct iv_cand
*cand
,
3669 bool address_p
, bitmap
*depends_on
)
3671 return get_computation_cost_at (data
,
3672 use
, cand
, address_p
, depends_on
, use
->stmt
);
3675 /* Determines cost of basing replacement of USE on CAND in a generic
3679 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3680 struct iv_use
*use
, struct iv_cand
*cand
)
3685 /* The simple case first -- if we need to express value of the preserved
3686 original biv, the cost is 0. This also prevents us from counting the
3687 cost of increment twice -- once at this use and once in the cost of
3689 if (cand
->pos
== IP_ORIGINAL
3690 && cand
->incremented_at
== use
->stmt
)
3692 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
);
3696 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3697 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3699 return !infinite_cost_p (cost
);
3702 /* Determines cost of basing replacement of USE on CAND in an address. */
3705 determine_use_iv_cost_address (struct ivopts_data
*data
,
3706 struct iv_use
*use
, struct iv_cand
*cand
)
3709 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3711 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3713 return !infinite_cost_p (cost
);
3716 /* Computes value of candidate CAND at position AT in iteration NITER, and
3717 stores it to VAL. */
3720 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
3723 aff_tree step
, delta
, nit
;
3724 struct iv
*iv
= cand
->iv
;
3725 tree type
= TREE_TYPE (iv
->base
);
3726 tree steptype
= type
;
3727 if (POINTER_TYPE_P (type
))
3728 steptype
= sizetype
;
3730 tree_to_aff_combination (iv
->step
, steptype
, &step
);
3731 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
3732 aff_combination_convert (&nit
, steptype
);
3733 aff_combination_mult (&nit
, &step
, &delta
);
3734 if (stmt_after_increment (loop
, cand
, at
))
3735 aff_combination_add (&delta
, &step
);
3737 tree_to_aff_combination (iv
->base
, type
, val
);
3738 aff_combination_add (val
, &delta
);
3741 /* Returns period of induction variable iv. */
3744 iv_period (struct iv
*iv
)
3746 tree step
= iv
->step
, period
, type
;
3749 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3751 /* Period of the iv is gcd (step, type range). Since type range is power
3752 of two, it suffices to determine the maximum power of two that divides
3754 pow2div
= num_ending_zeros (step
);
3755 type
= unsigned_type_for (TREE_TYPE (step
));
3757 period
= build_low_bits_mask (type
,
3758 (TYPE_PRECISION (type
)
3759 - tree_low_cst (pow2div
, 1)));
3764 /* Returns the comparison operator used when eliminating the iv USE. */
3766 static enum tree_code
3767 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3769 struct loop
*loop
= data
->current_loop
;
3773 ex_bb
= gimple_bb (use
->stmt
);
3774 exit
= EDGE_SUCC (ex_bb
, 0);
3775 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3776 exit
= EDGE_SUCC (ex_bb
, 1);
3778 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3781 /* Check whether it is possible to express the condition in USE by comparison
3782 of candidate CAND. If so, store the value compared with to BOUND. */
3785 may_eliminate_iv (struct ivopts_data
*data
,
3786 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3791 struct loop
*loop
= data
->current_loop
;
3794 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3797 /* For now works only for exits that dominate the loop latch.
3798 TODO: extend to other conditions inside loop body. */
3799 ex_bb
= gimple_bb (use
->stmt
);
3800 if (use
->stmt
!= last_stmt (ex_bb
)
3801 || gimple_code (use
->stmt
) != GIMPLE_COND
3802 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3805 exit
= EDGE_SUCC (ex_bb
, 0);
3806 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3807 exit
= EDGE_SUCC (ex_bb
, 1);
3808 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3811 nit
= niter_for_exit (data
, exit
);
3815 /* Determine whether we can use the variable to test the exit condition.
3816 This is the case iff the period of the induction variable is greater
3817 than the number of iterations for which the exit condition is true. */
3818 period
= iv_period (cand
->iv
);
3820 /* If the number of iterations is constant, compare against it directly. */
3821 if (TREE_CODE (nit
) == INTEGER_CST
)
3823 if (!tree_int_cst_lt (nit
, period
))
3827 /* If not, and if this is the only possible exit of the loop, see whether
3828 we can get a conservative estimate on the number of iterations of the
3829 entire loop and compare against that instead. */
3830 else if (loop_only_exit_p (loop
, exit
))
3832 double_int period_value
, max_niter
;
3833 if (!estimated_loop_iterations (loop
, true, &max_niter
))
3835 period_value
= tree_to_double_int (period
);
3836 if (double_int_ucmp (max_niter
, period_value
) >= 0)
3840 /* Otherwise, punt. */
3844 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
3845 *bound
= aff_combination_to_tree (&bnd
);
3849 /* Determines cost of basing replacement of USE on CAND in a condition. */
3852 determine_use_iv_cost_condition (struct ivopts_data
*data
,
3853 struct iv_use
*use
, struct iv_cand
*cand
)
3855 tree bound
= NULL_TREE
;
3857 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
3858 comp_cost elim_cost
, express_cost
, cost
;
3861 /* Only consider real candidates. */
3864 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
);
3868 /* Try iv elimination. */
3869 if (may_eliminate_iv (data
, use
, cand
, &bound
))
3871 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
3872 /* The bound is a loop invariant, so it will be only computed
3874 elim_cost
.cost
/= AVG_LOOP_NITER (data
->current_loop
);
3877 elim_cost
= infinite_cost
;
3879 /* Try expressing the original giv. If it is compared with an invariant,
3880 note that we cannot get rid of it. */
3881 ok
= extract_cond_operands (data
, use
->stmt
, NULL
, NULL
, NULL
, &cmp_iv
);
3884 express_cost
= get_computation_cost (data
, use
, cand
, false,
3885 &depends_on_express
);
3886 fd_ivopts_data
= data
;
3887 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
3889 /* Choose the better approach, preferring the eliminated IV. */
3890 if (compare_costs (elim_cost
, express_cost
) <= 0)
3893 depends_on
= depends_on_elim
;
3894 depends_on_elim
= NULL
;
3898 cost
= express_cost
;
3899 depends_on
= depends_on_express
;
3900 depends_on_express
= NULL
;
3904 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
3906 if (depends_on_elim
)
3907 BITMAP_FREE (depends_on_elim
);
3908 if (depends_on_express
)
3909 BITMAP_FREE (depends_on_express
);
3911 return !infinite_cost_p (cost
);
3914 /* Determines cost of basing replacement of USE on CAND. Returns false
3915 if USE cannot be based on CAND. */
3918 determine_use_iv_cost (struct ivopts_data
*data
,
3919 struct iv_use
*use
, struct iv_cand
*cand
)
3923 case USE_NONLINEAR_EXPR
:
3924 return determine_use_iv_cost_generic (data
, use
, cand
);
3927 return determine_use_iv_cost_address (data
, use
, cand
);
3930 return determine_use_iv_cost_condition (data
, use
, cand
);
3937 /* Determines costs of basing the use of the iv on an iv candidate. */
3940 determine_use_iv_costs (struct ivopts_data
*data
)
3944 struct iv_cand
*cand
;
3945 bitmap to_clear
= BITMAP_ALLOC (NULL
);
3947 alloc_use_cost_map (data
);
3949 for (i
= 0; i
< n_iv_uses (data
); i
++)
3951 use
= iv_use (data
, i
);
3953 if (data
->consider_all_candidates
)
3955 for (j
= 0; j
< n_iv_cands (data
); j
++)
3957 cand
= iv_cand (data
, j
);
3958 determine_use_iv_cost (data
, use
, cand
);
3965 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
3967 cand
= iv_cand (data
, j
);
3968 if (!determine_use_iv_cost (data
, use
, cand
))
3969 bitmap_set_bit (to_clear
, j
);
3972 /* Remove the candidates for that the cost is infinite from
3973 the list of related candidates. */
3974 bitmap_and_compl_into (use
->related_cands
, to_clear
);
3975 bitmap_clear (to_clear
);
3979 BITMAP_FREE (to_clear
);
3981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3983 fprintf (dump_file
, "Use-candidate costs:\n");
3985 for (i
= 0; i
< n_iv_uses (data
); i
++)
3987 use
= iv_use (data
, i
);
3989 fprintf (dump_file
, "Use %d:\n", i
);
3990 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
3991 for (j
= 0; j
< use
->n_map_members
; j
++)
3993 if (!use
->cost_map
[j
].cand
3994 || infinite_cost_p (use
->cost_map
[j
].cost
))
3997 fprintf (dump_file
, " %d\t%d\t%d\t",
3998 use
->cost_map
[j
].cand
->id
,
3999 use
->cost_map
[j
].cost
.cost
,
4000 use
->cost_map
[j
].cost
.complexity
);
4001 if (use
->cost_map
[j
].depends_on
)
4002 bitmap_print (dump_file
,
4003 use
->cost_map
[j
].depends_on
, "","");
4004 fprintf (dump_file
, "\n");
4007 fprintf (dump_file
, "\n");
4009 fprintf (dump_file
, "\n");
4013 /* Determines cost of the candidate CAND. */
4016 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4018 comp_cost cost_base
;
4019 unsigned cost
, cost_step
;
4028 /* There are two costs associated with the candidate -- its increment
4029 and its initialization. The second is almost negligible for any loop
4030 that rolls enough, so we take it just very little into account. */
4032 base
= cand
->iv
->base
;
4033 cost_base
= force_var_cost (data
, base
, NULL
);
4034 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4036 cost
= cost_step
+ cost_base
.cost
/ AVG_LOOP_NITER (current_loop
);
4038 /* Prefer the original ivs unless we may gain something by replacing it.
4039 The reason is to make debugging simpler; so this is not relevant for
4040 artificial ivs created by other optimization passes. */
4041 if (cand
->pos
!= IP_ORIGINAL
4042 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4045 /* Prefer not to insert statements into latch unless there are some
4046 already (so that we do not create unnecessary jumps). */
4047 if (cand
->pos
== IP_END
4048 && empty_block_p (ip_end_pos (data
->current_loop
)))
4054 /* Determines costs of computation of the candidates. */
4057 determine_iv_costs (struct ivopts_data
*data
)
4061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4063 fprintf (dump_file
, "Candidate costs:\n");
4064 fprintf (dump_file
, " cand\tcost\n");
4067 for (i
= 0; i
< n_iv_cands (data
); i
++)
4069 struct iv_cand
*cand
= iv_cand (data
, i
);
4071 determine_iv_cost (data
, cand
);
4073 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4074 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4077 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4078 fprintf (dump_file
, "\n");
4081 /* Calculates cost for having SIZE induction variables. */
4084 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4086 /* We add size to the cost, so that we prefer eliminating ivs
4088 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
);
4091 /* For each size of the induction variable set determine the penalty. */
4094 determine_set_costs (struct ivopts_data
*data
)
4098 gimple_stmt_iterator psi
;
4100 struct loop
*loop
= data
->current_loop
;
4103 /* We use the following model (definitely improvable, especially the
4104 cost function -- TODO):
4106 We estimate the number of registers available (using MD data), name it A.
4108 We estimate the number of registers used by the loop, name it U. This
4109 number is obtained as the number of loop phi nodes (not counting virtual
4110 registers and bivs) + the number of variables from outside of the loop.
4112 We set a reserve R (free regs that are used for temporary computations,
4113 etc.). For now the reserve is a constant 3.
4115 Let I be the number of induction variables.
4117 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4118 make a lot of ivs without a reason).
4119 -- if A - R < U + I <= A, the cost is I * PRES_COST
4120 -- if U + I > A, the cost is I * PRES_COST and
4121 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4125 fprintf (dump_file
, "Global costs:\n");
4126 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4127 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4128 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4132 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4134 phi
= gsi_stmt (psi
);
4135 op
= PHI_RESULT (phi
);
4137 if (!is_gimple_reg (op
))
4140 if (get_iv (data
, op
))
4146 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4148 struct version_info
*info
= ver_info (data
, j
);
4150 if (info
->inv_id
&& info
->has_nonlin_use
)
4154 data
->regs_used
= n
;
4155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fprintf (dump_file
, " regs_used %d\n", n
);
4158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4160 fprintf (dump_file
, " cost for size:\n");
4161 fprintf (dump_file
, " ivs\tcost\n");
4162 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4163 fprintf (dump_file
, " %d\t%d\n", j
,
4164 ivopts_global_cost_for_size (data
, j
));
4165 fprintf (dump_file
, "\n");
4169 /* Returns true if A is a cheaper cost pair than B. */
4172 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4182 cmp
= compare_costs (a
->cost
, b
->cost
);
4189 /* In case the costs are the same, prefer the cheaper candidate. */
4190 if (a
->cand
->cost
< b
->cand
->cost
)
4196 /* Computes the cost field of IVS structure. */
4199 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4201 comp_cost cost
= ivs
->cand_use_cost
;
4202 cost
.cost
+= ivs
->cand_cost
;
4203 cost
.cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4208 /* Remove invariants in set INVS to set IVS. */
4211 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4219 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4221 ivs
->n_invariant_uses
[iid
]--;
4222 if (ivs
->n_invariant_uses
[iid
] == 0)
4227 /* Set USE not to be expressed by any candidate in IVS. */
4230 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4233 unsigned uid
= use
->id
, cid
;
4234 struct cost_pair
*cp
;
4236 cp
= ivs
->cand_for_use
[uid
];
4242 ivs
->cand_for_use
[uid
] = NULL
;
4243 ivs
->n_cand_uses
[cid
]--;
4245 if (ivs
->n_cand_uses
[cid
] == 0)
4247 bitmap_clear_bit (ivs
->cands
, cid
);
4248 /* Do not count the pseudocandidates. */
4252 ivs
->cand_cost
-= cp
->cand
->cost
;
4254 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4257 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4259 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4260 iv_ca_recount_cost (data
, ivs
);
4263 /* Add invariants in set INVS to set IVS. */
4266 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4274 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4276 ivs
->n_invariant_uses
[iid
]++;
4277 if (ivs
->n_invariant_uses
[iid
] == 1)
4282 /* Set cost pair for USE in set IVS to CP. */
4285 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4286 struct iv_use
*use
, struct cost_pair
*cp
)
4288 unsigned uid
= use
->id
, cid
;
4290 if (ivs
->cand_for_use
[uid
] == cp
)
4293 if (ivs
->cand_for_use
[uid
])
4294 iv_ca_set_no_cp (data
, ivs
, use
);
4301 ivs
->cand_for_use
[uid
] = cp
;
4302 ivs
->n_cand_uses
[cid
]++;
4303 if (ivs
->n_cand_uses
[cid
] == 1)
4305 bitmap_set_bit (ivs
->cands
, cid
);
4306 /* Do not count the pseudocandidates. */
4310 ivs
->cand_cost
+= cp
->cand
->cost
;
4312 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4315 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4316 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4317 iv_ca_recount_cost (data
, ivs
);
4321 /* Extend set IVS by expressing USE by some of the candidates in it
4325 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4328 struct cost_pair
*best_cp
= NULL
, *cp
;
4332 gcc_assert (ivs
->upto
>= use
->id
);
4334 if (ivs
->upto
== use
->id
)
4340 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4342 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4344 if (cheaper_cost_pair (cp
, best_cp
))
4348 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4351 /* Get cost for assignment IVS. */
4354 iv_ca_cost (struct iv_ca
*ivs
)
4356 return (ivs
->bad_uses
? infinite_cost
: ivs
->cost
);
4359 /* Returns true if all dependences of CP are among invariants in IVS. */
4362 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4367 if (!cp
->depends_on
)
4370 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4372 if (ivs
->n_invariant_uses
[i
] == 0)
4379 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4380 it before NEXT_CHANGE. */
4382 static struct iv_ca_delta
*
4383 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4384 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4386 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4389 change
->old_cp
= old_cp
;
4390 change
->new_cp
= new_cp
;
4391 change
->next_change
= next_change
;
4396 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4399 static struct iv_ca_delta
*
4400 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4402 struct iv_ca_delta
*last
;
4410 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4412 last
->next_change
= l2
;
4417 /* Returns candidate by that USE is expressed in IVS. */
4419 static struct cost_pair
*
4420 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4422 return ivs
->cand_for_use
[use
->id
];
4425 /* Reverse the list of changes DELTA, forming the inverse to it. */
4427 static struct iv_ca_delta
*
4428 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4430 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4431 struct cost_pair
*tmp
;
4433 for (act
= delta
; act
; act
= next
)
4435 next
= act
->next_change
;
4436 act
->next_change
= prev
;
4440 act
->old_cp
= act
->new_cp
;
4447 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4448 reverted instead. */
4451 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4452 struct iv_ca_delta
*delta
, bool forward
)
4454 struct cost_pair
*from
, *to
;
4455 struct iv_ca_delta
*act
;
4458 delta
= iv_ca_delta_reverse (delta
);
4460 for (act
= delta
; act
; act
= act
->next_change
)
4464 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4465 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4469 iv_ca_delta_reverse (delta
);
4472 /* Returns true if CAND is used in IVS. */
4475 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4477 return ivs
->n_cand_uses
[cand
->id
] > 0;
4480 /* Returns number of induction variable candidates in the set IVS. */
4483 iv_ca_n_cands (struct iv_ca
*ivs
)
4485 return ivs
->n_cands
;
4488 /* Free the list of changes DELTA. */
4491 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4493 struct iv_ca_delta
*act
, *next
;
4495 for (act
= *delta
; act
; act
= next
)
4497 next
= act
->next_change
;
4504 /* Allocates new iv candidates assignment. */
4506 static struct iv_ca
*
4507 iv_ca_new (struct ivopts_data
*data
)
4509 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4513 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4514 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4515 nw
->cands
= BITMAP_ALLOC (NULL
);
4518 nw
->cand_use_cost
= zero_cost
;
4520 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4521 nw
->cost
= zero_cost
;
4526 /* Free memory occupied by the set IVS. */
4529 iv_ca_free (struct iv_ca
**ivs
)
4531 free ((*ivs
)->cand_for_use
);
4532 free ((*ivs
)->n_cand_uses
);
4533 BITMAP_FREE ((*ivs
)->cands
);
4534 free ((*ivs
)->n_invariant_uses
);
4539 /* Dumps IVS to FILE. */
4542 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4544 const char *pref
= " invariants ";
4546 comp_cost cost
= iv_ca_cost (ivs
);
4548 fprintf (file
, " cost %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
4549 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4551 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4552 if (ivs
->n_invariant_uses
[i
])
4554 fprintf (file
, "%s%d", pref
, i
);
4557 fprintf (file
, "\n");
4560 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4561 new set, and store differences in DELTA. Number of induction variables
4562 in the new set is stored to N_IVS. */
4565 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4566 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4572 struct cost_pair
*old_cp
, *new_cp
;
4575 for (i
= 0; i
< ivs
->upto
; i
++)
4577 use
= iv_use (data
, i
);
4578 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4581 && old_cp
->cand
== cand
)
4584 new_cp
= get_use_iv_cost (data
, use
, cand
);
4588 if (!iv_ca_has_deps (ivs
, new_cp
))
4591 if (!cheaper_cost_pair (new_cp
, old_cp
))
4594 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4597 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4598 cost
= iv_ca_cost (ivs
);
4600 *n_ivs
= iv_ca_n_cands (ivs
);
4601 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4606 /* Try narrowing set IVS by removing CAND. Return the cost of
4607 the new set and store the differences in DELTA. */
4610 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4611 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4615 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4617 struct iv_cand
*cnd
;
4621 for (i
= 0; i
< n_iv_uses (data
); i
++)
4623 use
= iv_use (data
, i
);
4625 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4626 if (old_cp
->cand
!= cand
)
4631 if (data
->consider_all_candidates
)
4633 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4638 cnd
= iv_cand (data
, ci
);
4640 cp
= get_use_iv_cost (data
, use
, cnd
);
4643 if (!iv_ca_has_deps (ivs
, cp
))
4646 if (!cheaper_cost_pair (cp
, new_cp
))
4654 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4659 cnd
= iv_cand (data
, ci
);
4661 cp
= get_use_iv_cost (data
, use
, cnd
);
4664 if (!iv_ca_has_deps (ivs
, cp
))
4667 if (!cheaper_cost_pair (cp
, new_cp
))
4676 iv_ca_delta_free (delta
);
4677 return infinite_cost
;
4680 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4683 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4684 cost
= iv_ca_cost (ivs
);
4685 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4690 /* Try optimizing the set of candidates IVS by removing candidates different
4691 from to EXCEPT_CAND from it. Return cost of the new set, and store
4692 differences in DELTA. */
4695 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4696 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4699 struct iv_ca_delta
*act_delta
, *best_delta
;
4701 comp_cost best_cost
, acost
;
4702 struct iv_cand
*cand
;
4705 best_cost
= iv_ca_cost (ivs
);
4707 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4709 cand
= iv_cand (data
, i
);
4711 if (cand
== except_cand
)
4714 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4716 if (compare_costs (acost
, best_cost
) < 0)
4719 iv_ca_delta_free (&best_delta
);
4720 best_delta
= act_delta
;
4723 iv_ca_delta_free (&act_delta
);
4732 /* Recurse to possibly remove other unnecessary ivs. */
4733 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4734 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4735 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4736 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4740 /* Tries to extend the sets IVS in the best possible way in order
4741 to express the USE. */
4744 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4747 comp_cost best_cost
, act_cost
;
4750 struct iv_cand
*cand
;
4751 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4752 struct cost_pair
*cp
;
4754 iv_ca_add_use (data
, ivs
, use
);
4755 best_cost
= iv_ca_cost (ivs
);
4757 cp
= iv_ca_cand_for_use (ivs
, use
);
4760 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4761 iv_ca_set_no_cp (data
, ivs
, use
);
4764 /* First try important candidates not based on any memory object. Only if
4765 this fails, try the specific ones. Rationale -- in loops with many
4766 variables the best choice often is to use just one generic biv. If we
4767 added here many ivs specific to the uses, the optimization algorithm later
4768 would be likely to get stuck in a local minimum, thus causing us to create
4769 too many ivs. The approach from few ivs to more seems more likely to be
4770 successful -- starting from few ivs, replacing an expensive use by a
4771 specific iv should always be a win. */
4772 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4774 cand
= iv_cand (data
, i
);
4776 if (cand
->iv
->base_object
!= NULL_TREE
)
4779 if (iv_ca_cand_used_p (ivs
, cand
))
4782 cp
= get_use_iv_cost (data
, use
, cand
);
4786 iv_ca_set_cp (data
, ivs
, use
, cp
);
4787 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4788 iv_ca_set_no_cp (data
, ivs
, use
);
4789 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4791 if (compare_costs (act_cost
, best_cost
) < 0)
4793 best_cost
= act_cost
;
4795 iv_ca_delta_free (&best_delta
);
4796 best_delta
= act_delta
;
4799 iv_ca_delta_free (&act_delta
);
4802 if (infinite_cost_p (best_cost
))
4804 for (i
= 0; i
< use
->n_map_members
; i
++)
4806 cp
= use
->cost_map
+ i
;
4811 /* Already tried this. */
4812 if (cand
->important
&& cand
->iv
->base_object
== NULL_TREE
)
4815 if (iv_ca_cand_used_p (ivs
, cand
))
4819 iv_ca_set_cp (data
, ivs
, use
, cp
);
4820 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4821 iv_ca_set_no_cp (data
, ivs
, use
);
4822 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4825 if (compare_costs (act_cost
, best_cost
) < 0)
4827 best_cost
= act_cost
;
4830 iv_ca_delta_free (&best_delta
);
4831 best_delta
= act_delta
;
4834 iv_ca_delta_free (&act_delta
);
4838 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4839 iv_ca_delta_free (&best_delta
);
4841 return !infinite_cost_p (best_cost
);
4844 /* Finds an initial assignment of candidates to uses. */
4846 static struct iv_ca
*
4847 get_initial_solution (struct ivopts_data
*data
)
4849 struct iv_ca
*ivs
= iv_ca_new (data
);
4852 for (i
= 0; i
< n_iv_uses (data
); i
++)
4853 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4862 /* Tries to improve set of induction variables IVS. */
4865 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4868 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
4869 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4870 struct iv_cand
*cand
;
4872 /* Try extending the set of induction variables by one. */
4873 for (i
= 0; i
< n_iv_cands (data
); i
++)
4875 cand
= iv_cand (data
, i
);
4877 if (iv_ca_cand_used_p (ivs
, cand
))
4880 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4884 /* If we successfully added the candidate and the set is small enough,
4885 try optimizing it by removing other candidates. */
4886 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
4888 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
4889 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
4890 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
4891 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
4894 if (compare_costs (acost
, best_cost
) < 0)
4897 iv_ca_delta_free (&best_delta
);
4898 best_delta
= act_delta
;
4901 iv_ca_delta_free (&act_delta
);
4906 /* Try removing the candidates from the set instead. */
4907 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
4909 /* Nothing more we can do. */
4914 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4915 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
4916 iv_ca_delta_free (&best_delta
);
4920 /* Attempts to find the optimal set of induction variables. We do simple
4921 greedy heuristic -- we try to replace at most one candidate in the selected
4922 solution and remove the unused ivs while this improves the cost. */
4924 static struct iv_ca
*
4925 find_optimal_iv_set (struct ivopts_data
*data
)
4931 /* Get the initial solution. */
4932 set
= get_initial_solution (data
);
4935 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4936 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
4940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4942 fprintf (dump_file
, "Initial set of candidates:\n");
4943 iv_ca_dump (data
, dump_file
, set
);
4946 while (try_improve_iv_set (data
, set
))
4948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4950 fprintf (dump_file
, "Improved to:\n");
4951 iv_ca_dump (data
, dump_file
, set
);
4955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4957 comp_cost cost
= iv_ca_cost (set
);
4958 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n", cost
.cost
, cost
.complexity
);
4961 for (i
= 0; i
< n_iv_uses (data
); i
++)
4963 use
= iv_use (data
, i
);
4964 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
4970 /* Creates a new induction variable corresponding to CAND. */
4973 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
4975 gimple_stmt_iterator incr_pos
;
4985 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
4989 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
4994 /* Mark that the iv is preserved. */
4995 name_info (data
, cand
->var_before
)->preserve_biv
= true;
4996 name_info (data
, cand
->var_after
)->preserve_biv
= true;
4998 /* Rewrite the increment so that it uses var_before directly. */
4999 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5004 gimple_add_tmp_var (cand
->var_before
);
5005 add_referenced_var (cand
->var_before
);
5007 base
= unshare_expr (cand
->iv
->base
);
5009 create_iv (base
, unshare_expr (cand
->iv
->step
),
5010 cand
->var_before
, data
->current_loop
,
5011 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5014 /* Creates new induction variables described in SET. */
5017 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5020 struct iv_cand
*cand
;
5023 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5025 cand
= iv_cand (data
, i
);
5026 create_new_iv (data
, cand
);
5030 /* Returns the phi-node in BB with result RESULT. */
5033 get_phi_with_result (basic_block bb
, tree result
)
5035 gimple_stmt_iterator i
= gsi_start_phis (bb
);
5037 for (; !gsi_end_p (i
); gsi_next (&i
))
5038 if (gimple_phi_result (gsi_stmt (i
)) == result
)
5039 return gsi_stmt (i
);
5046 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5047 is true, remove also the ssa name defined by the statement. */
5050 remove_statement (gimple stmt
, bool including_defined_name
)
5052 if (gimple_code (stmt
) == GIMPLE_PHI
)
5054 gimple bb_phi
= get_phi_with_result (gimple_bb (stmt
),
5055 gimple_phi_result (stmt
));
5056 gimple_stmt_iterator bsi
= gsi_for_stmt (bb_phi
);
5057 remove_phi_node (&bsi
, including_defined_name
);
5061 gimple_stmt_iterator bsi
= gsi_for_stmt (stmt
);
5062 gsi_remove (&bsi
, true);
5063 release_defs (stmt
);
5067 /* Rewrites USE (definition of iv used in a nonlinear expression)
5068 using candidate CAND. */
5071 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5072 struct iv_use
*use
, struct iv_cand
*cand
)
5077 gimple_stmt_iterator bsi
;
5079 /* An important special case -- if we are asked to express value of
5080 the original iv by itself, just exit; there is no need to
5081 introduce a new computation (that might also need casting the
5082 variable to unsigned and back). */
5083 if (cand
->pos
== IP_ORIGINAL
5084 && cand
->incremented_at
== use
->stmt
)
5086 tree step
, ctype
, utype
;
5087 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5089 gcc_assert (is_gimple_assign (use
->stmt
));
5090 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5092 step
= cand
->iv
->step
;
5093 ctype
= TREE_TYPE (step
);
5094 utype
= TREE_TYPE (cand
->var_after
);
5095 if (TREE_CODE (step
) == NEGATE_EXPR
)
5097 incr_code
= MINUS_EXPR
;
5098 step
= TREE_OPERAND (step
, 0);
5101 /* Check whether we may leave the computation unchanged.
5102 This is the case only if it does not rely on other
5103 computations in the loop -- otherwise, the computation
5104 we rely upon may be removed in remove_unused_ivs,
5105 thus leading to ICE. */
5106 old_code
= gimple_assign_rhs_code (use
->stmt
);
5107 if (old_code
== PLUS_EXPR
5108 || old_code
== MINUS_EXPR
5109 || old_code
== POINTER_PLUS_EXPR
)
5111 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5112 op
= gimple_assign_rhs2 (use
->stmt
);
5113 else if (old_code
!= MINUS_EXPR
5114 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5115 op
= gimple_assign_rhs1 (use
->stmt
);
5123 && (TREE_CODE (op
) == INTEGER_CST
5124 || operand_equal_p (op
, step
, 0)))
5127 /* Otherwise, add the necessary computations to express
5129 op
= fold_convert (ctype
, cand
->var_before
);
5130 comp
= fold_convert (utype
,
5131 build2 (incr_code
, ctype
, op
,
5132 unshare_expr (step
)));
5136 comp
= get_computation (data
->current_loop
, use
, cand
);
5137 gcc_assert (comp
!= NULL_TREE
);
5140 switch (gimple_code (use
->stmt
))
5143 tgt
= PHI_RESULT (use
->stmt
);
5145 /* If we should keep the biv, do not replace it. */
5146 if (name_info (data
, tgt
)->preserve_biv
)
5149 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5153 tgt
= gimple_assign_lhs (use
->stmt
);
5154 bsi
= gsi_for_stmt (use
->stmt
);
5161 op
= force_gimple_operand_gsi (&bsi
, comp
, false, SSA_NAME_VAR (tgt
),
5162 true, GSI_SAME_STMT
);
5164 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5166 ass
= gimple_build_assign (tgt
, op
);
5167 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5168 remove_statement (use
->stmt
, false);
5172 gimple_assign_set_rhs_from_tree (&bsi
, op
);
5173 use
->stmt
= gsi_stmt (bsi
);
5177 /* Replaces ssa name in index IDX by its basic variable. Callback for
5181 idx_remove_ssa_names (tree base
, tree
*idx
,
5182 void *data ATTRIBUTE_UNUSED
)
5186 if (TREE_CODE (*idx
) == SSA_NAME
)
5187 *idx
= SSA_NAME_VAR (*idx
);
5189 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
5191 op
= &TREE_OPERAND (base
, 2);
5193 && TREE_CODE (*op
) == SSA_NAME
)
5194 *op
= SSA_NAME_VAR (*op
);
5195 op
= &TREE_OPERAND (base
, 3);
5197 && TREE_CODE (*op
) == SSA_NAME
)
5198 *op
= SSA_NAME_VAR (*op
);
5204 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5207 unshare_and_remove_ssa_names (tree ref
)
5209 ref
= unshare_expr (ref
);
5210 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5215 /* Extract the alias analysis info for the memory reference REF. There are
5216 several ways how this information may be stored and what precisely is
5217 its semantics depending on the type of the reference, but there always is
5218 somewhere hidden one _DECL node that is used to determine the set of
5219 virtual operands for the reference. The code below deciphers this jungle
5220 and extracts this single useful piece of information. */
5223 get_ref_tag (tree ref
, tree orig
)
5225 tree var
= get_base_address (ref
);
5226 tree aref
= NULL_TREE
, tag
, sv
;
5227 HOST_WIDE_INT offset
, size
, maxsize
;
5229 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5231 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5239 if (TREE_CODE (var
) == INDIRECT_REF
)
5241 /* If the base is a dereference of a pointer, first check its name memory
5242 tag. If it does not have one, use its symbol memory tag. */
5243 var
= TREE_OPERAND (var
, 0);
5244 if (TREE_CODE (var
) != SSA_NAME
)
5247 if (SSA_NAME_PTR_INFO (var
))
5249 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5254 var
= SSA_NAME_VAR (var
);
5255 tag
= symbol_mem_tag (var
);
5256 gcc_assert (tag
!= NULL_TREE
);
5264 tag
= symbol_mem_tag (var
);
5272 /* Copies the reference information from OLD_REF to NEW_REF. */
5275 copy_ref_info (tree new_ref
, tree old_ref
)
5277 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5278 copy_mem_ref_info (new_ref
, old_ref
);
5281 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5282 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5286 /* Rewrites USE (address that is an iv) using candidate CAND. */
5289 rewrite_use_address (struct ivopts_data
*data
,
5290 struct iv_use
*use
, struct iv_cand
*cand
)
5293 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5297 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5299 unshare_aff_combination (&aff
);
5301 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
, data
->speed
);
5302 copy_ref_info (ref
, *use
->op_p
);
5306 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5310 rewrite_use_compare (struct ivopts_data
*data
,
5311 struct iv_use
*use
, struct iv_cand
*cand
)
5313 tree comp
, *var_p
, op
, bound
;
5314 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5315 enum tree_code compare
;
5316 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5322 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5323 tree var_type
= TREE_TYPE (var
);
5325 compare
= iv_elimination_compare (data
, use
);
5326 bound
= unshare_expr (fold_convert (var_type
, bound
));
5327 op
= force_gimple_operand_gsi (&bsi
, bound
, true, NULL_TREE
,
5328 true, GSI_SAME_STMT
);
5330 gimple_cond_set_lhs (use
->stmt
, var
);
5331 gimple_cond_set_code (use
->stmt
, compare
);
5332 gimple_cond_set_rhs (use
->stmt
, op
);
5336 /* The induction variable elimination failed; just express the original
5338 comp
= get_computation (data
->current_loop
, use
, cand
);
5339 gcc_assert (comp
!= NULL_TREE
);
5341 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
5344 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
5345 true, GSI_SAME_STMT
);
5348 /* Rewrites USE using candidate CAND. */
5351 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
5353 push_stmt_changes (&use
->stmt
);
5357 case USE_NONLINEAR_EXPR
:
5358 rewrite_use_nonlinear_expr (data
, use
, cand
);
5362 rewrite_use_address (data
, use
, cand
);
5366 rewrite_use_compare (data
, use
, cand
);
5373 pop_stmt_changes (&use
->stmt
);
5376 /* Rewrite the uses using the selected induction variables. */
5379 rewrite_uses (struct ivopts_data
*data
)
5382 struct iv_cand
*cand
;
5385 for (i
= 0; i
< n_iv_uses (data
); i
++)
5387 use
= iv_use (data
, i
);
5388 cand
= use
->selected
;
5391 rewrite_use (data
, use
, cand
);
5395 /* Removes the ivs that are not used after rewriting. */
5398 remove_unused_ivs (struct ivopts_data
*data
)
5403 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5405 struct version_info
*info
;
5407 info
= ver_info (data
, j
);
5409 && !integer_zerop (info
->iv
->step
)
5411 && !info
->iv
->have_use_for
5412 && !info
->preserve_biv
)
5413 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5417 /* Frees data allocated by the optimization of a single loop. */
5420 free_loop_data (struct ivopts_data
*data
)
5428 pointer_map_destroy (data
->niters
);
5429 data
->niters
= NULL
;
5432 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5434 struct version_info
*info
;
5436 info
= ver_info (data
, i
);
5440 info
->has_nonlin_use
= false;
5441 info
->preserve_biv
= false;
5444 bitmap_clear (data
->relevant
);
5445 bitmap_clear (data
->important_candidates
);
5447 for (i
= 0; i
< n_iv_uses (data
); i
++)
5449 struct iv_use
*use
= iv_use (data
, i
);
5452 BITMAP_FREE (use
->related_cands
);
5453 for (j
= 0; j
< use
->n_map_members
; j
++)
5454 if (use
->cost_map
[j
].depends_on
)
5455 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5456 free (use
->cost_map
);
5459 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5461 for (i
= 0; i
< n_iv_cands (data
); i
++)
5463 struct iv_cand
*cand
= iv_cand (data
, i
);
5467 if (cand
->depends_on
)
5468 BITMAP_FREE (cand
->depends_on
);
5471 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5473 if (data
->version_info_size
< num_ssa_names
)
5475 data
->version_info_size
= 2 * num_ssa_names
;
5476 free (data
->version_info
);
5477 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5480 data
->max_inv_id
= 0;
5482 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5483 SET_DECL_RTL (obj
, NULL_RTX
);
5485 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5488 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5492 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5494 free_loop_data (data
);
5495 free (data
->version_info
);
5496 BITMAP_FREE (data
->relevant
);
5497 BITMAP_FREE (data
->important_candidates
);
5499 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5500 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5501 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5504 /* Optimizes the LOOP. Returns true if anything changed. */
5507 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5509 bool changed
= false;
5510 struct iv_ca
*iv_ca
;
5513 gcc_assert (!data
->niters
);
5514 data
->current_loop
= loop
;
5515 data
->speed
= optimize_loop_for_speed_p (loop
);
5517 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5519 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5521 exit
= single_dom_exit (loop
);
5524 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5525 exit
->src
->index
, exit
->dest
->index
);
5526 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
5527 fprintf (dump_file
, "\n");
5530 fprintf (dump_file
, "\n");
5533 /* For each ssa name determines whether it behaves as an induction variable
5535 if (!find_induction_variables (data
))
5538 /* Finds interesting uses (item 1). */
5539 find_interesting_uses (data
);
5540 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5543 /* Finds candidates for the induction variables (item 2). */
5544 find_iv_candidates (data
);
5546 /* Calculates the costs (item 3, part 1). */
5547 determine_use_iv_costs (data
);
5548 determine_iv_costs (data
);
5549 determine_set_costs (data
);
5551 /* Find the optimal set of induction variables (item 3, part 2). */
5552 iv_ca
= find_optimal_iv_set (data
);
5557 /* Create the new induction variables (item 4, part 1). */
5558 create_new_ivs (data
, iv_ca
);
5559 iv_ca_free (&iv_ca
);
5561 /* Rewrite the uses (item 4, part 2). */
5562 rewrite_uses (data
);
5564 /* Remove the ivs that are unused after rewriting. */
5565 remove_unused_ivs (data
);
5567 /* We have changed the structure of induction variables; it might happen
5568 that definitions in the scev database refer to some of them that were
5573 free_loop_data (data
);
5578 /* Main entry point. Optimizes induction variables in loops. */
5581 tree_ssa_iv_optimize (void)
5584 struct ivopts_data data
;
5587 tree_ssa_iv_optimize_init (&data
);
5589 /* Optimize the loops starting with the innermost ones. */
5590 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
5592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5593 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5595 tree_ssa_iv_optimize_loop (&data
, loop
);
5598 tree_ssa_iv_optimize_finalize (&data
);