1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used
; /* Number of registers used. */
133 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
134 USE_OUTER
, /* The induction variable is used outside the loop. */
135 USE_ADDRESS
, /* Use in an address. */
136 USE_COMPARE
/* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand
*cand
; /* The candidate. */
143 unsigned cost
; /* The cost. */
144 bitmap depends_on
; /* The list of invariants that have to be
146 tree value
; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id
; /* The id of the use. */
155 enum use_type type
; /* Type of the use. */
156 struct iv
*iv
; /* The induction variable it is based on. */
157 tree stmt
; /* Statement in that it occurs. */
158 tree
*op_p
; /* The place where it occurs. */
159 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
163 struct cost_pair
*cost_map
;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand
*selected
;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL
, /* At the end, just before the exit condition. */
174 IP_END
, /* At the end of the latch block. */
175 IP_ORIGINAL
/* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id
; /* The number of the candidate. */
182 bool important
; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos
; /* Where it is computed. */
185 tree incremented_at
; /* For original biv, the statement where it is
187 tree var_before
; /* The variable used for it before increment. */
188 tree var_after
; /* The variable used for it after increment. */
189 struct iv
*iv
; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost
; /* Cost of the candidate. */
194 bitmap depends_on
; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use
*iv_use_p
;
202 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
204 typedef struct iv_cand
*iv_cand_p
;
205 DEF_VEC_P(iv_cand_p
);
206 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
210 /* The currently optimized loop. */
211 struct loop
*current_loop
;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size
;
219 /* The array of information for the ssa names. */
220 struct version_info
*version_info
;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p
,heap
) *iv_uses
;
231 /* The candidates. */
232 VEC(iv_cand_p
,heap
) *iv_candidates
;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates
;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates
;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair
**cand_for_use
;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses
;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost
;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses
;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair
*old_cp
;
290 /* A new assignment. */
291 struct cost_pair
*new_cp
;
293 /* Next change in the list. */
294 struct iv_ca_delta
*next_change
;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree
,heap
) *decl_rtl_to_reset
;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data
*data
)
324 return VEC_length (iv_use_p
, data
->iv_uses
);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use
*
330 iv_use (struct ivopts_data
*data
, unsigned i
)
332 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data
*data
)
340 return VEC_length (iv_cand_p
, data
->iv_candidates
);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand
*
346 iv_cand (struct ivopts_data
*data
, unsigned i
)
348 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
351 /* The data for LOOP. */
353 static inline struct loop_data
*
354 loop_data (struct loop
*loop
)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop
*loop
)
364 edge exit
= loop
->single_exit
;
369 if (!just_once_each_iteration_p (loop
, exit
->src
))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv
*);
379 dump_iv (FILE *file
, struct iv
*iv
)
383 fprintf (file
, "ssa name ");
384 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
385 fprintf (file
, "\n");
388 fprintf (file
, " type ");
389 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
390 fprintf (file
, "\n");
394 fprintf (file
, " base ");
395 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
396 fprintf (file
, "\n");
398 fprintf (file
, " step ");
399 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
400 fprintf (file
, "\n");
404 fprintf (file
, " invariant ");
405 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
406 fprintf (file
, "\n");
411 fprintf (file
, " base object ");
412 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
413 fprintf (file
, "\n");
417 fprintf (file
, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use
*);
424 dump_use (FILE *file
, struct iv_use
*use
)
426 fprintf (file
, "use %d\n", use
->id
);
430 case USE_NONLINEAR_EXPR
:
431 fprintf (file
, " generic\n");
435 fprintf (file
, " outside\n");
439 fprintf (file
, " address\n");
443 fprintf (file
, " compare\n");
450 fprintf (file
, " in statement ");
451 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
452 fprintf (file
, "\n");
454 fprintf (file
, " at position ");
456 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
457 fprintf (file
, "\n");
459 dump_iv (file
, use
->iv
);
461 if (use
->related_cands
)
463 fprintf (file
, " related candidates ");
464 dump_bitmap (file
, use
->related_cands
);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data
*);
472 dump_uses (FILE *file
, struct ivopts_data
*data
)
477 for (i
= 0; i
< n_iv_uses (data
); i
++)
479 use
= iv_use (data
, i
);
481 dump_use (file
, use
);
482 fprintf (file
, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand
*);
490 dump_cand (FILE *file
, struct iv_cand
*cand
)
492 struct iv
*iv
= cand
->iv
;
494 fprintf (file
, "candidate %d%s\n",
495 cand
->id
, cand
->important
? " (important)" : "");
497 if (cand
->depends_on
)
499 fprintf (file
, " depends on ");
500 dump_bitmap (file
, cand
->depends_on
);
505 fprintf (file
, " final value replacement\n");
512 fprintf (file
, " incremented before exit test\n");
516 fprintf (file
, " incremented at end\n");
520 fprintf (file
, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info
*
530 ver_info (struct ivopts_data
*data
, unsigned ver
)
532 return data
->version_info
+ ver
;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info
*
538 name_info (struct ivopts_data
*data
, tree name
)
540 return ver_info (data
, SSA_NAME_VERSION (name
));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
550 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
551 unsigned HOST_WIDE_INT inv
, ex
, val
;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a
& 1) && !(b
& 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i
= 0; i
< bits
- 1; i
++)
578 inv
= (inv
* ex
) & mask
;
579 ex
= (ex
* ex
) & mask
;
582 val
= (a
* inv
) & mask
;
584 gcc_assert (((val
* b
) & mask
) == a
);
586 if ((val
>> (bits
- 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
600 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
604 if (sbb
== loop
->latch
)
610 return stmt
== last_stmt (bb
);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
619 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
620 basic_block stmt_bb
= bb_for_stmt (stmt
);
621 block_stmt_iterator bsi
;
623 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
626 if (stmt_bb
!= cand_bb
)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
633 if (bsi_stmt (bsi
) == cand
->incremented_at
)
635 if (bsi_stmt (bsi
) == stmt
)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
652 return stmt_after_ip_normal_pos (loop
, stmt
);
655 return stmt_after_ip_original_pos (cand
, stmt
);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter
;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e
)
682 const struct nfe_cache_elt
*elt
= e
;
684 return htab_hash_pointer (elt
->exit
);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1
, const void *e2
)
692 const struct nfe_cache_elt
*elt1
= e1
;
694 return elt1
->exit
== e2
;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc
*
701 niter_for_exit (struct ivopts_data
*data
, edge exit
)
703 struct nfe_cache_elt
*nfe_desc
;
706 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
707 htab_hash_pointer (exit
),
712 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
713 nfe_desc
->exit
= exit
;
714 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
715 exit
, &nfe_desc
->niter
,
722 if (!nfe_desc
->valid_p
)
725 return &nfe_desc
->niter
;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
732 static struct tree_niter_desc
*
733 niter_for_single_dom_exit (struct ivopts_data
*data
)
735 edge exit
= single_dom_exit (data
->current_loop
);
740 return niter_for_exit (data
, exit
);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
747 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
751 data
->version_info_size
= 2 * num_ssa_names
;
752 data
->version_info
= xcalloc (data
->version_info_size
,
753 sizeof (struct version_info
));
754 data
->relevant
= BITMAP_ALLOC (NULL
);
755 data
->important_candidates
= BITMAP_ALLOC (NULL
);
756 data
->max_inv_id
= 0;
757 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
759 for (i
= 1; i
< loops
->num
; i
++)
760 if (loops
->parray
[i
])
761 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
763 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
764 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
765 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
772 determine_base_object (tree expr
)
774 enum tree_code code
= TREE_CODE (expr
);
775 tree base
, obj
, op0
, op1
;
777 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
786 obj
= TREE_OPERAND (expr
, 0);
787 base
= get_base_address (obj
);
792 if (TREE_CODE (base
) == INDIRECT_REF
)
793 return determine_base_object (TREE_OPERAND (base
, 0));
795 return fold_convert (ptr_type_node
,
796 build_fold_addr_expr (base
));
800 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
801 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
807 return (code
== PLUS_EXPR
809 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
811 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
815 return determine_base_object (TREE_OPERAND (expr
, 0));
818 return fold_convert (ptr_type_node
, expr
);
822 /* Allocates an induction variable with given initial value BASE and step STEP
826 alloc_iv (tree base
, tree step
)
828 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
830 if (step
&& integer_zerop (step
))
834 iv
->base_object
= determine_base_object (base
);
837 iv
->have_use_for
= false;
839 iv
->ssa_name
= NULL_TREE
;
844 /* Sets STEP and BASE for induction variable IV. */
847 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
849 struct version_info
*info
= name_info (data
, iv
);
851 gcc_assert (!info
->iv
);
853 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
854 info
->iv
= alloc_iv (base
, step
);
855 info
->iv
->ssa_name
= iv
;
858 /* Finds induction variable declaration for VAR. */
861 get_iv (struct ivopts_data
*data
, tree var
)
865 if (!name_info (data
, var
)->iv
)
867 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
870 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
871 set_iv (data
, var
, var
, NULL_TREE
);
874 return name_info (data
, var
)->iv
;
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
881 determine_biv_step (tree phi
)
883 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
884 tree name
= PHI_RESULT (phi
), base
, step
;
886 if (!is_gimple_reg (name
))
889 if (!simple_iv (loop
, phi
, name
, &base
, &step
, true))
898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
901 abnormal_ssa_name_p (tree exp
)
906 if (TREE_CODE (exp
) != SSA_NAME
)
909 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
913 abnormal phi node. Callback for for_each_index. */
916 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
917 void *data ATTRIBUTE_UNUSED
)
919 if (TREE_CODE (base
) == ARRAY_REF
)
921 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
923 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
927 return !abnormal_ssa_name_p (*index
);
930 /* Returns true if EXPR contains a ssa name that occurs in an
931 abnormal phi node. */
934 contains_abnormal_ssa_name_p (tree expr
)
937 enum tree_code_class
class;
942 code
= TREE_CODE (expr
);
943 class = TREE_CODE_CLASS (code
);
945 if (code
== SSA_NAME
)
946 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
948 if (code
== INTEGER_CST
949 || is_gimple_min_invariant (expr
))
952 if (code
== ADDR_EXPR
)
953 return !for_each_index (&TREE_OPERAND (expr
, 0),
954 idx_contains_abnormal_ssa_name_p
,
961 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
966 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
978 /* Finds basic ivs. */
981 find_bivs (struct ivopts_data
*data
)
983 tree phi
, step
, type
, base
;
985 struct loop
*loop
= data
->current_loop
;
987 VEC (tree
, heap
) *loop_phis
;
989 /* For allowing scev to remove some loop phi nodes in
990 unify_peeled_chrec, we have to compute the scev information
991 before assuming that each phi node is an induction variable. */
992 loop_phis
= VEC_alloc (tree
, heap
, 2);
993 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
995 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
998 VEC_safe_push (tree
, heap
, loop_phis
, phi
);
1001 for (i
= 0; VEC_iterate(tree
, loop_phis
, i
, phi
); i
++)
1002 determine_biv_step (phi
);
1004 VEC_free (tree
, heap
, loop_phis
);
1006 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1008 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1011 step
= determine_biv_step (phi
);
1015 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1016 base
= expand_simple_operations (base
);
1017 if (contains_abnormal_ssa_name_p (base
)
1018 || contains_abnormal_ssa_name_p (step
))
1021 type
= TREE_TYPE (PHI_RESULT (phi
));
1022 base
= fold_convert (type
, base
);
1024 step
= fold_convert (type
, step
);
1026 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1033 /* Marks basic ivs. */
1036 mark_bivs (struct ivopts_data
*data
)
1039 struct iv
*iv
, *incr_iv
;
1040 struct loop
*loop
= data
->current_loop
;
1041 basic_block incr_bb
;
1043 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1045 iv
= get_iv (data
, PHI_RESULT (phi
));
1049 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1050 incr_iv
= get_iv (data
, var
);
1054 /* If the increment is in the subloop, ignore it. */
1055 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1056 if (incr_bb
->loop_father
!= data
->current_loop
1057 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1061 incr_iv
->biv_p
= true;
1065 /* Checks whether STMT defines a linear induction variable and stores its
1066 parameters to BASE and STEP. */
1069 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
1070 tree
*base
, tree
*step
)
1073 struct loop
*loop
= data
->current_loop
;
1078 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1081 lhs
= TREE_OPERAND (stmt
, 0);
1082 if (TREE_CODE (lhs
) != SSA_NAME
)
1085 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
, true))
1087 *base
= expand_simple_operations (*base
);
1089 if (contains_abnormal_ssa_name_p (*base
)
1090 || contains_abnormal_ssa_name_p (*step
))
1096 /* Finds general ivs in statement STMT. */
1099 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1103 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
1106 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
1109 /* Finds general ivs in basic block BB. */
1112 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1114 block_stmt_iterator bsi
;
1116 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1117 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1120 /* Finds general ivs. */
1123 find_givs (struct ivopts_data
*data
)
1125 struct loop
*loop
= data
->current_loop
;
1126 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1129 for (i
= 0; i
< loop
->num_nodes
; i
++)
1130 find_givs_in_bb (data
, body
[i
]);
1134 /* For each ssa name defined in LOOP determines whether it is an induction
1135 variable and if so, its initial value and step. */
1138 find_induction_variables (struct ivopts_data
*data
)
1143 if (!find_bivs (data
))
1149 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1151 struct tree_niter_desc
*niter
;
1153 niter
= niter_for_single_dom_exit (data
);
1157 fprintf (dump_file
, " number of iterations ");
1158 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1159 fprintf (dump_file
, "\n");
1161 fprintf (dump_file
, " may be zero if ");
1162 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1163 fprintf (dump_file
, "\n");
1164 fprintf (dump_file
, "\n");
1167 fprintf (dump_file
, "Induction variables:\n\n");
1169 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1171 if (ver_info (data
, i
)->iv
)
1172 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1179 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1181 static struct iv_use
*
1182 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1183 tree stmt
, enum use_type use_type
)
1185 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1187 use
->id
= n_iv_uses (data
);
1188 use
->type
= use_type
;
1192 use
->related_cands
= BITMAP_ALLOC (NULL
);
1194 /* To avoid showing ssa name in the dumps, if it was not reset by the
1196 iv
->ssa_name
= NULL_TREE
;
1198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1199 dump_use (dump_file
, use
);
1201 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1206 /* Checks whether OP is a loop-level invariant and if so, records it.
1207 NONLINEAR_USE is true if the invariant is used in a way we do not
1208 handle specially. */
1211 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1214 struct version_info
*info
;
1216 if (TREE_CODE (op
) != SSA_NAME
1217 || !is_gimple_reg (op
))
1220 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1222 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1225 info
= name_info (data
, op
);
1227 info
->has_nonlin_use
|= nonlinear_use
;
1229 info
->inv_id
= ++data
->max_inv_id
;
1230 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1233 /* Checks whether the use OP is interesting and if so, records it
1236 static struct iv_use
*
1237 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1245 if (TREE_CODE (op
) != SSA_NAME
)
1248 iv
= get_iv (data
, op
);
1252 if (iv
->have_use_for
)
1254 use
= iv_use (data
, iv
->use_id
);
1256 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1257 || use
->type
== USE_OUTER
);
1259 if (type
== USE_NONLINEAR_EXPR
)
1260 use
->type
= USE_NONLINEAR_EXPR
;
1264 if (zero_p (iv
->step
))
1266 record_invariant (data
, op
, true);
1269 iv
->have_use_for
= true;
1271 civ
= xmalloc (sizeof (struct iv
));
1274 stmt
= SSA_NAME_DEF_STMT (op
);
1275 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1276 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1278 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1279 iv
->use_id
= use
->id
;
1284 /* Checks whether the use OP is interesting and if so, records it. */
1286 static struct iv_use
*
1287 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1289 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1292 /* Records a definition of induction variable OP that is used outside of the
1295 static struct iv_use
*
1296 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1298 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1301 /* Checks whether the condition *COND_P in STMT is interesting
1302 and if so, records it. */
1305 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1309 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1311 tree zero
= integer_zero_node
;
1313 const_iv
.step
= NULL_TREE
;
1315 if (TREE_CODE (*cond_p
) != SSA_NAME
1316 && !COMPARISON_CLASS_P (*cond_p
))
1319 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1326 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1327 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1330 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1331 iv0
= get_iv (data
, *op0_p
);
1335 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1336 iv1
= get_iv (data
, *op1_p
);
1340 if (/* When comparing with non-invariant value, we may not do any senseful
1341 induction variable elimination. */
1343 /* Eliminating condition based on two ivs would be nontrivial.
1344 ??? TODO -- it is not really important to handle this case. */
1345 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1347 find_interesting_uses_op (data
, *op0_p
);
1348 find_interesting_uses_op (data
, *op1_p
);
1352 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1354 /* If both are invariants, this is a work for unswitching. */
1358 civ
= xmalloc (sizeof (struct iv
));
1359 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1360 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1363 /* Returns true if expression EXPR is obviously invariant in LOOP,
1364 i.e. if all its operands are defined outside of the LOOP. */
1367 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1372 if (is_gimple_min_invariant (expr
))
1375 if (TREE_CODE (expr
) == SSA_NAME
)
1377 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1379 && flow_bb_inside_loop_p (loop
, def_bb
))
1388 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1389 for (i
= 0; i
< len
; i
++)
1390 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1396 /* Cumulates the steps of indices into DATA and replaces their values with the
1397 initial ones. Returns false when the value of the index cannot be determined.
1398 Callback for for_each_index. */
1400 struct ifs_ivopts_data
1402 struct ivopts_data
*ivopts_data
;
1408 idx_find_step (tree base
, tree
*idx
, void *data
)
1410 struct ifs_ivopts_data
*dta
= data
;
1412 tree step
, iv_step
, lbound
, off
;
1413 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1415 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1416 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1419 /* If base is a component ref, require that the offset of the reference
1421 if (TREE_CODE (base
) == COMPONENT_REF
)
1423 off
= component_ref_field_offset (base
);
1424 return expr_invariant_in_loop_p (loop
, off
);
1427 /* If base is array, first check whether we will be able to move the
1428 reference out of the loop (in order to take its address in strength
1429 reduction). In order for this to work we need both lower bound
1430 and step to be loop invariants. */
1431 if (TREE_CODE (base
) == ARRAY_REF
)
1433 step
= array_ref_element_size (base
);
1434 lbound
= array_ref_low_bound (base
);
1436 if (!expr_invariant_in_loop_p (loop
, step
)
1437 || !expr_invariant_in_loop_p (loop
, lbound
))
1441 if (TREE_CODE (*idx
) != SSA_NAME
)
1444 iv
= get_iv (dta
->ivopts_data
, *idx
);
1453 if (TREE_CODE (base
) == ARRAY_REF
)
1455 step
= array_ref_element_size (base
);
1457 /* We only handle addresses whose step is an integer constant. */
1458 if (TREE_CODE (step
) != INTEGER_CST
)
1462 /* The step for pointer arithmetics already is 1 byte. */
1463 step
= build_int_cst (sizetype
, 1);
1465 /* FIXME: convert_step should not be used outside chrec_convert: fix
1466 this by calling chrec_convert. */
1467 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1468 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1472 /* The index might wrap. */
1476 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1479 *dta
->step_p
= step
;
1481 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1486 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1487 object is passed to it in DATA. */
1490 idx_record_use (tree base
, tree
*idx
,
1493 find_interesting_uses_op (data
, *idx
);
1494 if (TREE_CODE (base
) == ARRAY_REF
)
1496 find_interesting_uses_op (data
, array_ref_element_size (base
));
1497 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1502 /* Returns true if memory reference REF may be unaligned. */
1505 may_be_unaligned_p (tree ref
)
1509 HOST_WIDE_INT bitsize
;
1510 HOST_WIDE_INT bitpos
;
1512 enum machine_mode mode
;
1513 int unsignedp
, volatilep
;
1514 unsigned base_align
;
1516 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1517 thus they are not misaligned. */
1518 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1521 /* The test below is basically copy of what expr.c:normal_inner_ref
1522 does to check whether the object must be loaded by parts when
1523 STRICT_ALIGNMENT is true. */
1524 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1525 &unsignedp
, &volatilep
, true);
1526 base_type
= TREE_TYPE (base
);
1527 base_align
= TYPE_ALIGN (base_type
);
1530 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1531 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1532 || bitpos
% BITS_PER_UNIT
!= 0))
1538 /* Finds addresses in *OP_P inside STMT. */
1541 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1543 tree base
= *op_p
, step
= NULL
;
1545 struct ifs_ivopts_data ifs_ivopts_data
;
1547 /* Do not play with volatile memory references. A bit too conservative,
1548 perhaps, but safe. */
1549 if (stmt_ann (stmt
)->has_volatile_ops
)
1552 /* Ignore bitfields for now. Not really something terribly complicated
1554 if (TREE_CODE (base
) == COMPONENT_REF
1555 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1558 if (STRICT_ALIGNMENT
1559 && may_be_unaligned_p (base
))
1562 base
= unshare_expr (base
);
1564 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1566 tree type
= build_pointer_type (TREE_TYPE (base
));
1570 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1572 civ
= get_iv (data
, TMR_BASE (base
));
1576 TMR_BASE (base
) = civ
->base
;
1579 if (TMR_INDEX (base
)
1580 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1582 civ
= get_iv (data
, TMR_INDEX (base
));
1586 TMR_INDEX (base
) = civ
->base
;
1591 if (TMR_STEP (base
))
1592 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1595 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1603 base
= tree_mem_ref_addr (type
, base
);
1607 ifs_ivopts_data
.ivopts_data
= data
;
1608 ifs_ivopts_data
.stmt
= stmt
;
1609 ifs_ivopts_data
.step_p
= &step
;
1610 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1614 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1615 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1617 base
= build_fold_addr_expr (base
);
1620 civ
= alloc_iv (base
, step
);
1621 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1625 for_each_index (op_p
, idx_record_use
, data
);
1628 /* Finds and records invariants used in STMT. */
1631 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1634 use_operand_p use_p
;
1637 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1639 op
= USE_FROM_PTR (use_p
);
1640 record_invariant (data
, op
, false);
1644 /* Finds interesting uses of induction variables in the statement STMT. */
1647 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1652 use_operand_p use_p
;
1654 find_invariants_stmt (data
, stmt
);
1656 if (TREE_CODE (stmt
) == COND_EXPR
)
1658 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1662 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1664 lhs
= TREE_OPERAND (stmt
, 0);
1665 rhs
= TREE_OPERAND (stmt
, 1);
1667 if (TREE_CODE (lhs
) == SSA_NAME
)
1669 /* If the statement defines an induction variable, the uses are not
1670 interesting by themselves. */
1672 iv
= get_iv (data
, lhs
);
1674 if (iv
&& !zero_p (iv
->step
))
1678 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1680 case tcc_comparison
:
1681 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1685 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1686 if (REFERENCE_CLASS_P (lhs
))
1687 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1693 if (REFERENCE_CLASS_P (lhs
)
1694 && is_gimple_val (rhs
))
1696 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1697 find_interesting_uses_op (data
, rhs
);
1701 /* TODO -- we should also handle address uses of type
1703 memory = call (whatever);
1710 if (TREE_CODE (stmt
) == PHI_NODE
1711 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1713 lhs
= PHI_RESULT (stmt
);
1714 iv
= get_iv (data
, lhs
);
1716 if (iv
&& !zero_p (iv
->step
))
1720 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1722 op
= USE_FROM_PTR (use_p
);
1724 if (TREE_CODE (op
) != SSA_NAME
)
1727 iv
= get_iv (data
, op
);
1731 find_interesting_uses_op (data
, op
);
1735 /* Finds interesting uses of induction variables outside of loops
1736 on loop exit edge EXIT. */
1739 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1743 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1745 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1746 find_interesting_uses_outer (data
, def
);
1750 /* Finds uses of the induction variables that are interesting. */
1753 find_interesting_uses (struct ivopts_data
*data
)
1756 block_stmt_iterator bsi
;
1758 basic_block
*body
= get_loop_body (data
->current_loop
);
1760 struct version_info
*info
;
1763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1764 fprintf (dump_file
, "Uses:\n\n");
1766 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1771 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1772 if (e
->dest
!= EXIT_BLOCK_PTR
1773 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1774 find_interesting_uses_outside (data
, e
);
1776 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1777 find_interesting_uses_stmt (data
, phi
);
1778 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1779 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1782 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1786 fprintf (dump_file
, "\n");
1788 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1790 info
= ver_info (data
, i
);
1793 fprintf (dump_file
, " ");
1794 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1795 fprintf (dump_file
, " is invariant (%d)%s\n",
1796 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1800 fprintf (dump_file
, "\n");
1806 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1807 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1808 we are at the top-level of the processed address. */
1811 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1812 unsigned HOST_WIDE_INT
*offset
)
1814 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1815 enum tree_code code
;
1816 tree type
, orig_type
= TREE_TYPE (expr
);
1817 unsigned HOST_WIDE_INT off0
, off1
, st
;
1818 tree orig_expr
= expr
;
1822 type
= TREE_TYPE (expr
);
1823 code
= TREE_CODE (expr
);
1829 if (!cst_and_fits_in_hwi (expr
)
1833 *offset
= int_cst_value (expr
);
1834 return build_int_cst_type (orig_type
, 0);
1838 op0
= TREE_OPERAND (expr
, 0);
1839 op1
= TREE_OPERAND (expr
, 1);
1841 op0
= strip_offset_1 (op0
, false, false, &off0
);
1842 op1
= strip_offset_1 (op1
, false, false, &off1
);
1844 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1845 if (op0
== TREE_OPERAND (expr
, 0)
1846 && op1
== TREE_OPERAND (expr
, 1))
1851 else if (zero_p (op0
))
1853 if (code
== PLUS_EXPR
)
1856 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1859 expr
= fold_build2 (code
, type
, op0
, op1
);
1861 return fold_convert (orig_type
, expr
);
1867 step
= array_ref_element_size (expr
);
1868 if (!cst_and_fits_in_hwi (step
))
1871 st
= int_cst_value (step
);
1872 op1
= TREE_OPERAND (expr
, 1);
1873 op1
= strip_offset_1 (op1
, false, false, &off1
);
1874 *offset
= off1
* st
;
1879 /* Strip the component reference completely. */
1880 op0
= TREE_OPERAND (expr
, 0);
1881 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1891 tmp
= component_ref_field_offset (expr
);
1893 && cst_and_fits_in_hwi (tmp
))
1895 /* Strip the component reference completely. */
1896 op0
= TREE_OPERAND (expr
, 0);
1897 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1898 *offset
= off0
+ int_cst_value (tmp
);
1904 op0
= TREE_OPERAND (expr
, 0);
1905 op0
= strip_offset_1 (op0
, true, true, &off0
);
1908 if (op0
== TREE_OPERAND (expr
, 0))
1911 expr
= build_fold_addr_expr (op0
);
1912 return fold_convert (orig_type
, expr
);
1915 inside_addr
= false;
1922 /* Default handling of expressions for that we want to recurse into
1923 the first operand. */
1924 op0
= TREE_OPERAND (expr
, 0);
1925 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1928 if (op0
== TREE_OPERAND (expr
, 0)
1929 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1932 expr
= copy_node (expr
);
1933 TREE_OPERAND (expr
, 0) = op0
;
1935 TREE_OPERAND (expr
, 1) = op1
;
1937 /* Inside address, we might strip the top level component references,
1938 thus changing type of the expression. Handling of ADDR_EXPR
1940 expr
= fold_convert (orig_type
, expr
);
1945 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1948 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1950 return strip_offset_1 (expr
, false, false, offset
);
1953 /* Returns variant of TYPE that can be used as base for different uses.
1954 For integer types, we return unsigned variant of the type, which
1955 avoids problems with overflows. For pointer types, we return void *. */
1958 generic_type_for (tree type
)
1960 if (POINTER_TYPE_P (type
))
1961 return ptr_type_node
;
1963 if (TYPE_UNSIGNED (type
))
1966 return unsigned_type_for (type
);
1969 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1970 the bitmap to that we should store it. */
1972 static struct ivopts_data
*fd_ivopts_data
;
1974 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1976 bitmap
*depends_on
= data
;
1977 struct version_info
*info
;
1979 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1981 info
= name_info (fd_ivopts_data
, *expr_p
);
1983 if (!info
->inv_id
|| info
->has_nonlin_use
)
1987 *depends_on
= BITMAP_ALLOC (NULL
);
1988 bitmap_set_bit (*depends_on
, info
->inv_id
);
1993 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1994 position to POS. If USE is not NULL, the candidate is set as related to
1995 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1996 replacement of the final value of the iv by a direct computation. */
1998 static struct iv_cand
*
1999 add_candidate_1 (struct ivopts_data
*data
,
2000 tree base
, tree step
, bool important
, enum iv_position pos
,
2001 struct iv_use
*use
, tree incremented_at
)
2004 struct iv_cand
*cand
= NULL
;
2005 tree type
, orig_type
;
2009 orig_type
= TREE_TYPE (base
);
2010 type
= generic_type_for (orig_type
);
2011 if (type
!= orig_type
)
2013 base
= fold_convert (type
, base
);
2015 step
= fold_convert (type
, step
);
2019 for (i
= 0; i
< n_iv_cands (data
); i
++)
2021 cand
= iv_cand (data
, i
);
2023 if (cand
->pos
!= pos
)
2026 if (cand
->incremented_at
!= incremented_at
)
2040 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
2043 if (zero_p (cand
->iv
->step
))
2050 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2055 if (i
== n_iv_cands (data
))
2057 cand
= xcalloc (1, sizeof (struct iv_cand
));
2063 cand
->iv
= alloc_iv (base
, step
);
2066 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2068 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2069 cand
->var_after
= cand
->var_before
;
2071 cand
->important
= important
;
2072 cand
->incremented_at
= incremented_at
;
2073 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2076 && TREE_CODE (step
) != INTEGER_CST
)
2078 fd_ivopts_data
= data
;
2079 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2082 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2083 dump_cand (dump_file
, cand
);
2086 if (important
&& !cand
->important
)
2088 cand
->important
= true;
2089 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2090 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2095 bitmap_set_bit (use
->related_cands
, i
);
2096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2097 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2104 /* Returns true if incrementing the induction variable at the end of the LOOP
2107 The purpose is to avoid splitting latch edge with a biv increment, thus
2108 creating a jump, possibly confusing other optimization passes and leaving
2109 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2110 is not available (so we do not have a better alternative), or if the latch
2111 edge is already nonempty. */
2114 allow_ip_end_pos_p (struct loop
*loop
)
2116 if (!ip_normal_pos (loop
))
2119 if (!empty_block_p (ip_end_pos (loop
)))
2125 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2126 position to POS. If USE is not NULL, the candidate is set as related to
2127 it. The candidate computation is scheduled on all available positions. */
2130 add_candidate (struct ivopts_data
*data
,
2131 tree base
, tree step
, bool important
, struct iv_use
*use
)
2133 if (ip_normal_pos (data
->current_loop
))
2134 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2135 if (ip_end_pos (data
->current_loop
)
2136 && allow_ip_end_pos_p (data
->current_loop
))
2137 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2140 /* Add a standard "0 + 1 * iteration" iv candidate for a
2141 type with SIZE bits. */
2144 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2147 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2148 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2152 /* Adds standard iv candidates. */
2155 add_standard_iv_candidates (struct ivopts_data
*data
)
2157 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2159 /* The same for a double-integer type if it is still fast enough. */
2160 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2161 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2165 /* Adds candidates bases on the old induction variable IV. */
2168 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2171 struct iv_cand
*cand
;
2173 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2175 /* The same, but with initial value zero. */
2176 add_candidate (data
,
2177 build_int_cst (TREE_TYPE (iv
->base
), 0),
2178 iv
->step
, true, NULL
);
2180 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2181 if (TREE_CODE (phi
) == PHI_NODE
)
2183 /* Additionally record the possibility of leaving the original iv
2185 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2186 cand
= add_candidate_1 (data
,
2187 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2188 SSA_NAME_DEF_STMT (def
));
2189 cand
->var_before
= iv
->ssa_name
;
2190 cand
->var_after
= def
;
2194 /* Adds candidates based on the old induction variables. */
2197 add_old_ivs_candidates (struct ivopts_data
*data
)
2203 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2205 iv
= ver_info (data
, i
)->iv
;
2206 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2207 add_old_iv_candidates (data
, iv
);
2211 /* Adds candidates based on the value of the induction variable IV and USE. */
2214 add_iv_value_candidates (struct ivopts_data
*data
,
2215 struct iv
*iv
, struct iv_use
*use
)
2217 unsigned HOST_WIDE_INT offset
;
2220 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2222 /* The same, but with initial value zero. Make such variable important,
2223 since it is generic enough so that possibly many uses may be based
2225 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2226 iv
->step
, true, use
);
2228 /* Third, try removing the constant offset. */
2229 base
= strip_offset (iv
->base
, &offset
);
2231 add_candidate (data
, base
, iv
->step
, false, use
);
2234 /* Possibly adds pseudocandidate for replacing the final value of USE by
2235 a direct computation. */
2238 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
2240 struct tree_niter_desc
*niter
;
2242 /* We must know where we exit the loop and how many times does it roll. */
2243 niter
= niter_for_single_dom_exit (data
);
2245 || !zero_p (niter
->may_be_zero
))
2248 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
2251 /* Adds candidates based on the uses. */
2254 add_derived_ivs_candidates (struct ivopts_data
*data
)
2258 for (i
= 0; i
< n_iv_uses (data
); i
++)
2260 struct iv_use
*use
= iv_use (data
, i
);
2267 case USE_NONLINEAR_EXPR
:
2270 /* Just add the ivs based on the value of the iv used here. */
2271 add_iv_value_candidates (data
, use
->iv
, use
);
2275 add_iv_value_candidates (data
, use
->iv
, use
);
2277 /* Additionally, add the pseudocandidate for the possibility to
2278 replace the final value by a direct computation. */
2279 add_iv_outer_candidates (data
, use
);
2288 /* Record important candidates and add them to related_cands bitmaps
2292 record_important_candidates (struct ivopts_data
*data
)
2297 for (i
= 0; i
< n_iv_cands (data
); i
++)
2299 struct iv_cand
*cand
= iv_cand (data
, i
);
2301 if (cand
->important
)
2302 bitmap_set_bit (data
->important_candidates
, i
);
2305 data
->consider_all_candidates
= (n_iv_cands (data
)
2306 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2308 if (data
->consider_all_candidates
)
2310 /* We will not need "related_cands" bitmaps in this case,
2311 so release them to decrease peak memory consumption. */
2312 for (i
= 0; i
< n_iv_uses (data
); i
++)
2314 use
= iv_use (data
, i
);
2315 BITMAP_FREE (use
->related_cands
);
2320 /* Add important candidates to the related_cands bitmaps. */
2321 for (i
= 0; i
< n_iv_uses (data
); i
++)
2322 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2323 data
->important_candidates
);
2327 /* Finds the candidates for the induction variables. */
2330 find_iv_candidates (struct ivopts_data
*data
)
2332 /* Add commonly used ivs. */
2333 add_standard_iv_candidates (data
);
2335 /* Add old induction variables. */
2336 add_old_ivs_candidates (data
);
2338 /* Add induction variables derived from uses. */
2339 add_derived_ivs_candidates (data
);
2341 /* Record the important candidates. */
2342 record_important_candidates (data
);
2345 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2346 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2347 we allocate a simple list to every use. */
2350 alloc_use_cost_map (struct ivopts_data
*data
)
2352 unsigned i
, size
, s
, j
;
2354 for (i
= 0; i
< n_iv_uses (data
); i
++)
2356 struct iv_use
*use
= iv_use (data
, i
);
2359 if (data
->consider_all_candidates
)
2360 size
= n_iv_cands (data
);
2364 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2369 /* Round up to the power of two, so that moduling by it is fast. */
2370 for (size
= 1; size
< s
; size
<<= 1)
2374 use
->n_map_members
= size
;
2375 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2379 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2380 on invariants DEPENDS_ON and that the value used in expressing it
2384 set_use_iv_cost (struct ivopts_data
*data
,
2385 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2386 bitmap depends_on
, tree value
)
2392 BITMAP_FREE (depends_on
);
2396 if (data
->consider_all_candidates
)
2398 use
->cost_map
[cand
->id
].cand
= cand
;
2399 use
->cost_map
[cand
->id
].cost
= cost
;
2400 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2401 use
->cost_map
[cand
->id
].value
= value
;
2405 /* n_map_members is a power of two, so this computes modulo. */
2406 s
= cand
->id
& (use
->n_map_members
- 1);
2407 for (i
= s
; i
< use
->n_map_members
; i
++)
2408 if (!use
->cost_map
[i
].cand
)
2410 for (i
= 0; i
< s
; i
++)
2411 if (!use
->cost_map
[i
].cand
)
2417 use
->cost_map
[i
].cand
= cand
;
2418 use
->cost_map
[i
].cost
= cost
;
2419 use
->cost_map
[i
].depends_on
= depends_on
;
2420 use
->cost_map
[i
].value
= value
;
2423 /* Gets cost of (USE, CANDIDATE) pair. */
2425 static struct cost_pair
*
2426 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2427 struct iv_cand
*cand
)
2430 struct cost_pair
*ret
;
2435 if (data
->consider_all_candidates
)
2437 ret
= use
->cost_map
+ cand
->id
;
2444 /* n_map_members is a power of two, so this computes modulo. */
2445 s
= cand
->id
& (use
->n_map_members
- 1);
2446 for (i
= s
; i
< use
->n_map_members
; i
++)
2447 if (use
->cost_map
[i
].cand
== cand
)
2448 return use
->cost_map
+ i
;
2450 for (i
= 0; i
< s
; i
++)
2451 if (use
->cost_map
[i
].cand
== cand
)
2452 return use
->cost_map
+ i
;
2457 /* Returns estimate on cost of computing SEQ. */
2465 for (; seq
; seq
= NEXT_INSN (seq
))
2467 set
= single_set (seq
);
2469 cost
+= rtx_cost (set
, SET
);
2477 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2479 produce_memory_decl_rtl (tree obj
, int *regno
)
2484 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2486 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2487 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2490 x
= gen_raw_REG (Pmode
, (*regno
)++);
2492 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2495 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2496 walk_tree. DATA contains the actual fake register number. */
2499 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2501 tree obj
= NULL_TREE
;
2505 switch (TREE_CODE (*expr_p
))
2508 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2509 handled_component_p (*expr_p
);
2510 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2514 x
= produce_memory_decl_rtl (obj
, regno
);
2519 obj
= SSA_NAME_VAR (*expr_p
);
2520 if (!DECL_RTL_SET_P (obj
))
2521 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2530 if (DECL_RTL_SET_P (obj
))
2533 if (DECL_MODE (obj
) == BLKmode
)
2534 x
= produce_memory_decl_rtl (obj
, regno
);
2536 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2546 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2547 SET_DECL_RTL (obj
, x
);
2553 /* Determines cost of the computation of EXPR. */
2556 computation_cost (tree expr
)
2559 tree type
= TREE_TYPE (expr
);
2561 /* Avoid using hard regs in ways which may be unsupported. */
2562 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2564 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2566 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2570 cost
= seq_cost (seq
);
2572 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2577 /* Returns variable containing the value of candidate CAND at statement AT. */
2580 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2582 if (stmt_after_increment (loop
, cand
, stmt
))
2583 return cand
->var_after
;
2585 return cand
->var_before
;
2588 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2589 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2592 tree_int_cst_sign_bit (tree t
)
2594 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2595 unsigned HOST_WIDE_INT w
;
2597 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2598 w
= TREE_INT_CST_LOW (t
);
2601 w
= TREE_INT_CST_HIGH (t
);
2602 bitno
-= HOST_BITS_PER_WIDE_INT
;
2605 return (w
>> bitno
) & 1;
2608 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2609 return cst. Otherwise return NULL_TREE. */
2612 constant_multiple_of (tree type
, tree top
, tree bot
)
2614 tree res
, mby
, p0
, p1
;
2615 enum tree_code code
;
2621 if (operand_equal_p (top
, bot
, 0))
2622 return build_int_cst (type
, 1);
2624 code
= TREE_CODE (top
);
2628 mby
= TREE_OPERAND (top
, 1);
2629 if (TREE_CODE (mby
) != INTEGER_CST
)
2632 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2636 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2637 fold_convert (type
, mby
));
2641 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2644 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2648 return fold_binary_to_constant (code
, type
, p0
, p1
);
2651 if (TREE_CODE (bot
) != INTEGER_CST
)
2654 bot
= fold_convert (type
, bot
);
2655 top
= fold_convert (type
, top
);
2657 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2658 the result afterwards. */
2659 if (tree_int_cst_sign_bit (bot
))
2662 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2667 /* Ditto for TOP. */
2668 if (tree_int_cst_sign_bit (top
))
2671 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2674 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2677 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2679 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2687 /* Sets COMB to CST. */
2690 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2691 unsigned HOST_WIDE_INT cst
)
2693 unsigned prec
= TYPE_PRECISION (type
);
2696 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2699 comb
->rest
= NULL_TREE
;
2700 comb
->offset
= cst
& comb
->mask
;
2703 /* Sets COMB to single element ELT. */
2706 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2708 unsigned prec
= TYPE_PRECISION (type
);
2711 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2714 comb
->elts
[0] = elt
;
2716 comb
->rest
= NULL_TREE
;
2720 /* Scales COMB by SCALE. */
2723 aff_combination_scale (struct affine_tree_combination
*comb
,
2724 unsigned HOST_WIDE_INT scale
)
2733 aff_combination_const (comb
, comb
->type
, 0);
2737 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2738 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2740 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2741 comb
->elts
[j
] = comb
->elts
[i
];
2742 if (comb
->coefs
[j
] != 0)
2749 if (comb
->n
< MAX_AFF_ELTS
)
2751 comb
->coefs
[comb
->n
] = scale
;
2752 comb
->elts
[comb
->n
] = comb
->rest
;
2753 comb
->rest
= NULL_TREE
;
2757 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2758 build_int_cst_type (comb
->type
, scale
));
2762 /* Adds ELT * SCALE to COMB. */
2765 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2766 unsigned HOST_WIDE_INT scale
)
2773 for (i
= 0; i
< comb
->n
; i
++)
2774 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2776 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2781 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2782 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2786 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2787 comb
->coefs
[comb
->n
] = 1;
2788 comb
->elts
[comb
->n
] = comb
->rest
;
2789 comb
->rest
= NULL_TREE
;
2794 if (comb
->n
< MAX_AFF_ELTS
)
2796 comb
->coefs
[comb
->n
] = scale
;
2797 comb
->elts
[comb
->n
] = elt
;
2803 elt
= fold_convert (comb
->type
, elt
);
2805 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2806 fold_convert (comb
->type
, elt
),
2807 build_int_cst_type (comb
->type
, scale
));
2810 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2815 /* Adds COMB2 to COMB1. */
2818 aff_combination_add (struct affine_tree_combination
*comb1
,
2819 struct affine_tree_combination
*comb2
)
2823 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2824 for (i
= 0; i
< comb2
->n
; i
++)
2825 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2827 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2830 /* Splits EXPR into an affine combination of parts. */
2833 tree_to_aff_combination (tree expr
, tree type
,
2834 struct affine_tree_combination
*comb
)
2836 struct affine_tree_combination tmp
;
2837 enum tree_code code
;
2838 tree cst
, core
, toffset
;
2839 HOST_WIDE_INT bitpos
, bitsize
;
2840 enum machine_mode mode
;
2841 int unsignedp
, volatilep
;
2845 code
= TREE_CODE (expr
);
2849 aff_combination_const (comb
, type
, int_cst_value (expr
));
2854 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2855 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2856 if (code
== MINUS_EXPR
)
2857 aff_combination_scale (&tmp
, -1);
2858 aff_combination_add (comb
, &tmp
);
2862 cst
= TREE_OPERAND (expr
, 1);
2863 if (TREE_CODE (cst
) != INTEGER_CST
)
2865 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2866 aff_combination_scale (comb
, int_cst_value (cst
));
2870 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2871 aff_combination_scale (comb
, -1);
2875 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2876 &toffset
, &mode
, &unsignedp
, &volatilep
,
2878 if (bitpos
% BITS_PER_UNIT
!= 0)
2880 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2881 core
= build_fold_addr_expr (core
);
2882 if (TREE_CODE (core
) == ADDR_EXPR
)
2883 aff_combination_add_elt (comb
, core
, 1);
2886 tree_to_aff_combination (core
, type
, &tmp
);
2887 aff_combination_add (comb
, &tmp
);
2891 tree_to_aff_combination (toffset
, type
, &tmp
);
2892 aff_combination_add (comb
, &tmp
);
2900 aff_combination_elt (comb
, type
, expr
);
2903 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2906 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2907 unsigned HOST_WIDE_INT mask
)
2909 enum tree_code code
;
2912 elt
= fold_convert (type
, elt
);
2919 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2925 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2927 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2931 return fold_build2 (MULT_EXPR
, type
, elt
,
2932 build_int_cst_type (type
, scale
));
2934 if ((scale
| (mask
>> 1)) == mask
)
2936 /* Scale is negative. */
2938 scale
= (-scale
) & mask
;
2943 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2944 build_int_cst_type (type
, scale
));
2945 return fold_build2 (code
, type
, expr
, elt
);
2948 /* Copies the tree elements of COMB to ensure that they are not shared. */
2951 unshare_aff_combination (struct affine_tree_combination
*comb
)
2955 for (i
= 0; i
< comb
->n
; i
++)
2956 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2958 comb
->rest
= unshare_expr (comb
->rest
);
2961 /* Makes tree from the affine combination COMB. */
2964 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2966 tree type
= comb
->type
;
2967 tree expr
= comb
->rest
;
2969 unsigned HOST_WIDE_INT off
, sgn
;
2971 /* Handle the special case produced by get_computation_aff when
2972 the type does not fit in HOST_WIDE_INT. */
2973 if (comb
->n
== 0 && comb
->offset
== 0)
2974 return fold_convert (type
, expr
);
2976 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2978 for (i
= 0; i
< comb
->n
; i
++)
2979 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2982 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2984 /* Offset is negative. */
2985 off
= (-comb
->offset
) & comb
->mask
;
2993 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2997 /* Determines the expression by that USE is expressed from induction variable
2998 CAND at statement AT in LOOP. The expression is stored in a decomposed
2999 form into AFF. Returns false if USE cannot be expressed using CAND. */
3002 get_computation_aff (struct loop
*loop
,
3003 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
3004 struct affine_tree_combination
*aff
)
3006 tree ubase
= use
->iv
->base
;
3007 tree ustep
= use
->iv
->step
;
3008 tree cbase
= cand
->iv
->base
;
3009 tree cstep
= cand
->iv
->step
;
3010 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3014 unsigned HOST_WIDE_INT ustepi
, cstepi
;
3015 HOST_WIDE_INT ratioi
;
3016 struct affine_tree_combination cbase_aff
, expr_aff
;
3017 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
3019 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3021 /* We do not have a precision to express the values of use. */
3025 expr
= var_at_stmt (loop
, cand
, at
);
3027 if (TREE_TYPE (expr
) != ctype
)
3029 /* This may happen with the original ivs. */
3030 expr
= fold_convert (ctype
, expr
);
3033 if (TYPE_UNSIGNED (utype
))
3037 uutype
= unsigned_type_for (utype
);
3038 ubase
= fold_convert (uutype
, ubase
);
3039 ustep
= fold_convert (uutype
, ustep
);
3042 if (uutype
!= ctype
)
3044 expr
= fold_convert (uutype
, expr
);
3045 cbase
= fold_convert (uutype
, cbase
);
3046 cstep
= fold_convert (uutype
, cstep
);
3048 /* If the conversion is not noop, we must take it into account when
3049 considering the value of the step. */
3050 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3054 if (cst_and_fits_in_hwi (cstep_orig
)
3055 && cst_and_fits_in_hwi (ustep_orig
))
3057 ustepi
= int_cst_value (ustep_orig
);
3058 cstepi
= int_cst_value (cstep_orig
);
3060 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
3062 /* TODO maybe consider case when ustep divides cstep and the ratio is
3063 a power of 2 (so that the division is fast to execute)? We would
3064 need to be much more careful with overflows etc. then. */
3068 ratio
= build_int_cst_type (uutype
, ratioi
);
3072 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
3076 /* Ratioi is only used to detect special cases when the multiplicative
3077 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3078 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3079 to integer_onep/integer_all_onesp, since the former ignores
3081 if (cst_and_fits_in_hwi (ratio
))
3082 ratioi
= int_cst_value (ratio
);
3083 else if (integer_onep (ratio
))
3085 else if (integer_all_onesp (ratio
))
3091 /* We may need to shift the value if we are after the increment. */
3092 if (stmt_after_increment (loop
, cand
, at
))
3093 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3095 /* use = ubase - ratio * cbase + ratio * var.
3097 In general case ubase + ratio * (var - cbase) could be better (one less
3098 multiplication), but often it is possible to eliminate redundant parts
3099 of computations from (ubase - ratio * cbase) term, and if it does not
3100 happen, fold is able to apply the distributive law to obtain this form
3103 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3105 /* Let's compute in trees and just return the result in AFF. This case
3106 should not be very common, and fold itself is not that bad either,
3107 so making the aff. functions more complicated to handle this case
3108 is not that urgent. */
3111 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3112 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3114 else if (ratioi
== -1)
3116 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3117 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3121 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3122 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3123 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3124 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3135 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3136 possible to compute ratioi. */
3137 gcc_assert (ratioi
);
3139 tree_to_aff_combination (ubase
, uutype
, aff
);
3140 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3141 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3142 aff_combination_scale (&cbase_aff
, -ratioi
);
3143 aff_combination_scale (&expr_aff
, ratioi
);
3144 aff_combination_add (aff
, &cbase_aff
);
3145 aff_combination_add (aff
, &expr_aff
);
3150 /* Determines the expression by that USE is expressed from induction variable
3151 CAND at statement AT in LOOP. The computation is unshared. */
3154 get_computation_at (struct loop
*loop
,
3155 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3157 struct affine_tree_combination aff
;
3158 tree type
= TREE_TYPE (use
->iv
->base
);
3160 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3162 unshare_aff_combination (&aff
);
3163 return fold_convert (type
, aff_combination_to_tree (&aff
));
3166 /* Determines the expression by that USE is expressed from induction variable
3167 CAND in LOOP. The computation is unshared. */
3170 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3172 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3175 /* Returns cost of addition in MODE. */
3178 add_cost (enum machine_mode mode
)
3180 static unsigned costs
[NUM_MACHINE_MODES
];
3188 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3189 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3190 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3195 cost
= seq_cost (seq
);
3201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3202 fprintf (dump_file
, "Addition in %s costs %d\n",
3203 GET_MODE_NAME (mode
), cost
);
3207 /* Entry in a hashtable of already known costs for multiplication. */
3210 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3211 enum machine_mode mode
; /* In mode. */
3212 unsigned cost
; /* The cost. */
3215 /* Counts hash value for the ENTRY. */
3218 mbc_entry_hash (const void *entry
)
3220 const struct mbc_entry
*e
= entry
;
3222 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3225 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3228 mbc_entry_eq (const void *entry1
, const void *entry2
)
3230 const struct mbc_entry
*e1
= entry1
;
3231 const struct mbc_entry
*e2
= entry2
;
3233 return (e1
->mode
== e2
->mode
3234 && e1
->cst
== e2
->cst
);
3237 /* Returns cost of multiplication by constant CST in MODE. */
3240 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3242 static htab_t costs
;
3243 struct mbc_entry
**cached
, act
;
3248 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3252 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3254 return (*cached
)->cost
;
3256 *cached
= xmalloc (sizeof (struct mbc_entry
));
3257 (*cached
)->mode
= mode
;
3258 (*cached
)->cst
= cst
;
3261 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3262 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3266 cost
= seq_cost (seq
);
3268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3269 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3270 (int) cst
, GET_MODE_NAME (mode
), cost
);
3272 (*cached
)->cost
= cost
;
3277 /* Returns true if multiplying by RATIO is allowed in address. */
3280 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3282 #define MAX_RATIO 128
3283 static sbitmap valid_mult
;
3287 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3291 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3292 sbitmap_zero (valid_mult
);
3293 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3294 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3296 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3297 if (memory_address_p (Pmode
, addr
))
3298 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3303 fprintf (dump_file
, " allowed multipliers:");
3304 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3305 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3306 fprintf (dump_file
, " %d", (int) i
);
3307 fprintf (dump_file
, "\n");
3308 fprintf (dump_file
, "\n");
3312 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3315 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3318 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3319 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3320 variable is omitted. The created memory accesses MODE.
3322 TODO -- there must be some better way. This all is quite crude. */
3325 get_address_cost (bool symbol_present
, bool var_present
,
3326 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3328 static bool initialized
= false;
3329 static HOST_WIDE_INT rat
, off
;
3330 static HOST_WIDE_INT min_offset
, max_offset
;
3331 static unsigned costs
[2][2][2][2];
3332 unsigned cost
, acost
;
3333 rtx seq
, addr
, base
;
3334 bool offset_p
, ratio_p
;
3336 HOST_WIDE_INT s_offset
;
3337 unsigned HOST_WIDE_INT mask
;
3345 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3347 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3348 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3350 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3351 if (!memory_address_p (Pmode
, addr
))
3354 max_offset
= i
>> 1;
3357 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3359 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3360 if (!memory_address_p (Pmode
, addr
))
3363 min_offset
= -(i
>> 1);
3365 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3367 fprintf (dump_file
, "get_address_cost:\n");
3368 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3369 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3373 for (i
= 2; i
<= MAX_RATIO
; i
++)
3374 if (multiplier_allowed_in_address_p (i
))
3381 bits
= GET_MODE_BITSIZE (Pmode
);
3382 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3384 if ((offset
>> (bits
- 1) & 1))
3389 offset_p
= (s_offset
!= 0
3390 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3391 ratio_p
= (ratio
!= 1
3392 && multiplier_allowed_in_address_p (ratio
));
3394 if (ratio
!= 1 && !ratio_p
)
3395 cost
+= multiply_by_cost (ratio
, Pmode
);
3397 if (s_offset
&& !offset_p
&& !symbol_present
)
3399 cost
+= add_cost (Pmode
);
3403 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3406 int old_cse_not_expected
;
3409 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3410 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3412 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3415 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3419 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3421 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3422 gen_rtx_fmt_ee (PLUS
, Pmode
,
3424 gen_int_mode (off
, Pmode
)));
3427 base
= gen_int_mode (off
, Pmode
);
3432 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3435 /* To avoid splitting addressing modes, pretend that no cse will
3437 old_cse_not_expected
= cse_not_expected
;
3438 cse_not_expected
= true;
3439 addr
= memory_address (Pmode
, addr
);
3440 cse_not_expected
= old_cse_not_expected
;
3444 acost
= seq_cost (seq
);
3445 acost
+= address_cost (addr
, Pmode
);
3449 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3452 return cost
+ acost
;
3455 /* Estimates cost of forcing expression EXPR into a variable. */
3458 force_expr_to_var_cost (tree expr
)
3460 static bool costs_initialized
= false;
3461 static unsigned integer_cost
;
3462 static unsigned symbol_cost
;
3463 static unsigned address_cost
;
3465 unsigned cost0
, cost1
, cost
;
3466 enum machine_mode mode
;
3468 if (!costs_initialized
)
3470 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3471 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3472 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3474 tree type
= build_pointer_type (integer_type_node
);
3476 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3479 SET_DECL_RTL (var
, x
);
3480 TREE_STATIC (var
) = 1;
3481 addr
= build1 (ADDR_EXPR
, type
, var
);
3482 symbol_cost
= computation_cost (addr
) + 1;
3485 = computation_cost (build2 (PLUS_EXPR
, type
,
3487 build_int_cst_type (type
, 2000))) + 1;
3488 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3490 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3491 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3492 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3493 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3494 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3495 fprintf (dump_file
, "\n");
3498 costs_initialized
= true;
3503 if (SSA_VAR_P (expr
))
3506 if (TREE_INVARIANT (expr
))
3508 if (TREE_CODE (expr
) == INTEGER_CST
)
3509 return integer_cost
;
3511 if (TREE_CODE (expr
) == ADDR_EXPR
)
3513 tree obj
= TREE_OPERAND (expr
, 0);
3515 if (TREE_CODE (obj
) == VAR_DECL
3516 || TREE_CODE (obj
) == PARM_DECL
3517 || TREE_CODE (obj
) == RESULT_DECL
)
3521 return address_cost
;
3524 switch (TREE_CODE (expr
))
3529 op0
= TREE_OPERAND (expr
, 0);
3530 op1
= TREE_OPERAND (expr
, 1);
3534 if (is_gimple_val (op0
))
3537 cost0
= force_expr_to_var_cost (op0
);
3539 if (is_gimple_val (op1
))
3542 cost1
= force_expr_to_var_cost (op1
);
3547 /* Just an arbitrary value, FIXME. */
3548 return target_spill_cost
;
3551 mode
= TYPE_MODE (TREE_TYPE (expr
));
3552 switch (TREE_CODE (expr
))
3556 cost
= add_cost (mode
);
3560 if (cst_and_fits_in_hwi (op0
))
3561 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3562 else if (cst_and_fits_in_hwi (op1
))
3563 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3565 return target_spill_cost
;
3575 /* Bound the cost by target_spill_cost. The parts of complicated
3576 computations often are either loop invariant or at least can
3577 be shared between several iv uses, so letting this grow without
3578 limits would not give reasonable results. */
3579 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3582 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3583 invariants the computation depends on. */
3586 force_var_cost (struct ivopts_data
*data
,
3587 tree expr
, bitmap
*depends_on
)
3591 fd_ivopts_data
= data
;
3592 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3595 return force_expr_to_var_cost (expr
);
3598 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3599 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3600 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3601 invariants the computation depends on. */
3604 split_address_cost (struct ivopts_data
*data
,
3605 tree addr
, bool *symbol_present
, bool *var_present
,
3606 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3609 HOST_WIDE_INT bitsize
;
3610 HOST_WIDE_INT bitpos
;
3612 enum machine_mode mode
;
3613 int unsignedp
, volatilep
;
3615 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3616 &unsignedp
, &volatilep
, false);
3619 || bitpos
% BITS_PER_UNIT
!= 0
3620 || TREE_CODE (core
) != VAR_DECL
)
3622 *symbol_present
= false;
3623 *var_present
= true;
3624 fd_ivopts_data
= data
;
3625 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3626 return target_spill_cost
;
3629 *offset
+= bitpos
/ BITS_PER_UNIT
;
3630 if (TREE_STATIC (core
)
3631 || DECL_EXTERNAL (core
))
3633 *symbol_present
= true;
3634 *var_present
= false;
3638 *symbol_present
= false;
3639 *var_present
= true;
3643 /* Estimates cost of expressing difference of addresses E1 - E2 as
3644 var + symbol + offset. The value of offset is added to OFFSET,
3645 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3646 part is missing. DEPENDS_ON is a set of the invariants the computation
3650 ptr_difference_cost (struct ivopts_data
*data
,
3651 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3652 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3654 HOST_WIDE_INT diff
= 0;
3657 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3659 if (ptr_difference_const (e1
, e2
, &diff
))
3662 *symbol_present
= false;
3663 *var_present
= false;
3667 if (e2
== integer_zero_node
)
3668 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3669 symbol_present
, var_present
, offset
, depends_on
);
3671 *symbol_present
= false;
3672 *var_present
= true;
3674 cost
= force_var_cost (data
, e1
, depends_on
);
3675 cost
+= force_var_cost (data
, e2
, depends_on
);
3676 cost
+= add_cost (Pmode
);
3681 /* Estimates cost of expressing difference E1 - E2 as
3682 var + symbol + offset. The value of offset is added to OFFSET,
3683 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3684 part is missing. DEPENDS_ON is a set of the invariants the computation
3688 difference_cost (struct ivopts_data
*data
,
3689 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3690 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3693 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3694 unsigned HOST_WIDE_INT off1
, off2
;
3696 e1
= strip_offset (e1
, &off1
);
3697 e2
= strip_offset (e2
, &off2
);
3698 *offset
+= off1
- off2
;
3703 if (TREE_CODE (e1
) == ADDR_EXPR
)
3704 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3706 *symbol_present
= false;
3708 if (operand_equal_p (e1
, e2
, 0))
3710 *var_present
= false;
3713 *var_present
= true;
3715 return force_var_cost (data
, e1
, depends_on
);
3719 cost
= force_var_cost (data
, e2
, depends_on
);
3720 cost
+= multiply_by_cost (-1, mode
);
3725 cost
= force_var_cost (data
, e1
, depends_on
);
3726 cost
+= force_var_cost (data
, e2
, depends_on
);
3727 cost
+= add_cost (mode
);
3732 /* Determines the cost of the computation by that USE is expressed
3733 from induction variable CAND. If ADDRESS_P is true, we just need
3734 to create an address from it, otherwise we want to get it into
3735 register. A set of invariants we depend on is stored in
3736 DEPENDS_ON. AT is the statement at that the value is computed. */
3739 get_computation_cost_at (struct ivopts_data
*data
,
3740 struct iv_use
*use
, struct iv_cand
*cand
,
3741 bool address_p
, bitmap
*depends_on
, tree at
)
3743 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3745 tree utype
= TREE_TYPE (ubase
), ctype
;
3746 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3747 HOST_WIDE_INT ratio
, aratio
;
3748 bool var_present
, symbol_present
;
3749 unsigned cost
= 0, n_sums
;
3753 /* Only consider real candidates. */
3757 cbase
= cand
->iv
->base
;
3758 cstep
= cand
->iv
->step
;
3759 ctype
= TREE_TYPE (cbase
);
3761 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3763 /* We do not have a precision to express the values of use. */
3769 /* Do not try to express address of an object with computation based
3770 on address of a different object. This may cause problems in rtl
3771 level alias analysis (that does not expect this to be happening,
3772 as this is illegal in C), and would be unlikely to be useful
3774 if (use
->iv
->base_object
3775 && cand
->iv
->base_object
3776 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3780 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3782 /* TODO -- add direct handling of this case. */
3786 /* CSTEPI is removed from the offset in case statement is after the
3787 increment. If the step is not constant, we use zero instead.
3788 This is a bit imprecise (there is the extra addition), but
3789 redundancy elimination is likely to transform the code so that
3790 it uses value of the variable before increment anyway,
3791 so it is not that much unrealistic. */
3792 if (cst_and_fits_in_hwi (cstep
))
3793 cstepi
= int_cst_value (cstep
);
3797 if (cst_and_fits_in_hwi (ustep
)
3798 && cst_and_fits_in_hwi (cstep
))
3800 ustepi
= int_cst_value (ustep
);
3802 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3809 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3814 if (cst_and_fits_in_hwi (rat
))
3815 ratio
= int_cst_value (rat
);
3816 else if (integer_onep (rat
))
3818 else if (integer_all_onesp (rat
))
3824 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3825 or ratio == 1, it is better to handle this like
3827 ubase - ratio * cbase + ratio * var
3829 (also holds in the case ratio == -1, TODO. */
3831 if (cst_and_fits_in_hwi (cbase
))
3833 offset
= - ratio
* int_cst_value (cbase
);
3834 cost
+= difference_cost (data
,
3835 ubase
, integer_zero_node
,
3836 &symbol_present
, &var_present
, &offset
,
3839 else if (ratio
== 1)
3841 cost
+= difference_cost (data
,
3843 &symbol_present
, &var_present
, &offset
,
3848 cost
+= force_var_cost (data
, cbase
, depends_on
);
3849 cost
+= add_cost (TYPE_MODE (ctype
));
3850 cost
+= difference_cost (data
,
3851 ubase
, integer_zero_node
,
3852 &symbol_present
, &var_present
, &offset
,
3856 /* If we are after the increment, the value of the candidate is higher by
3858 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3859 offset
-= ratio
* cstepi
;
3861 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3862 (symbol/var/const parts may be omitted). If we are looking for an address,
3863 find the cost of addressing this. */
3865 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3867 /* Otherwise estimate the costs for computing the expression. */
3868 aratio
= ratio
> 0 ? ratio
: -ratio
;
3869 if (!symbol_present
&& !var_present
&& !offset
)
3872 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3878 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3882 /* Symbol + offset should be compile-time computable. */
3883 && (symbol_present
|| offset
))
3886 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3890 /* Just get the expression, expand it and measure the cost. */
3891 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3897 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3899 return computation_cost (comp
);
3903 /* Determines the cost of the computation by that USE is expressed
3904 from induction variable CAND. If ADDRESS_P is true, we just need
3905 to create an address from it, otherwise we want to get it into
3906 register. A set of invariants we depend on is stored in
3910 get_computation_cost (struct ivopts_data
*data
,
3911 struct iv_use
*use
, struct iv_cand
*cand
,
3912 bool address_p
, bitmap
*depends_on
)
3914 return get_computation_cost_at (data
,
3915 use
, cand
, address_p
, depends_on
, use
->stmt
);
3918 /* Determines cost of basing replacement of USE on CAND in a generic
3922 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3923 struct iv_use
*use
, struct iv_cand
*cand
)
3928 /* The simple case first -- if we need to express value of the preserved
3929 original biv, the cost is 0. This also prevents us from counting the
3930 cost of increment twice -- once at this use and once in the cost of
3932 if (cand
->pos
== IP_ORIGINAL
3933 && cand
->incremented_at
== use
->stmt
)
3935 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3939 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3940 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3942 return cost
!= INFTY
;
3945 /* Determines cost of basing replacement of USE on CAND in an address. */
3948 determine_use_iv_cost_address (struct ivopts_data
*data
,
3949 struct iv_use
*use
, struct iv_cand
*cand
)
3952 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3954 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3956 return cost
!= INFTY
;
3959 /* Computes value of induction variable IV in iteration NITER. */
3962 iv_value (struct iv
*iv
, tree niter
)
3965 tree type
= TREE_TYPE (iv
->base
);
3967 niter
= fold_convert (type
, niter
);
3968 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3970 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3973 /* Computes value of candidate CAND at position AT in iteration NITER. */
3976 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3978 tree val
= iv_value (cand
->iv
, niter
);
3979 tree type
= TREE_TYPE (cand
->iv
->base
);
3981 if (stmt_after_increment (loop
, cand
, at
))
3982 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3987 /* Returns period of induction variable iv. */
3990 iv_period (struct iv
*iv
)
3992 tree step
= iv
->step
, period
, type
;
3995 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3997 /* Period of the iv is gcd (step, type range). Since type range is power
3998 of two, it suffices to determine the maximum power of two that divides
4000 pow2div
= num_ending_zeros (step
);
4001 type
= unsigned_type_for (TREE_TYPE (step
));
4003 period
= build_low_bits_mask (type
,
4004 (TYPE_PRECISION (type
)
4005 - tree_low_cst (pow2div
, 1)));
4010 /* Returns the comparison operator used when eliminating the iv USE. */
4012 static enum tree_code
4013 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4015 struct loop
*loop
= data
->current_loop
;
4019 ex_bb
= bb_for_stmt (use
->stmt
);
4020 exit
= EDGE_SUCC (ex_bb
, 0);
4021 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4022 exit
= EDGE_SUCC (ex_bb
, 1);
4024 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4027 /* Check whether it is possible to express the condition in USE by comparison
4028 of candidate CAND. If so, store the value compared with to BOUND. */
4031 may_eliminate_iv (struct ivopts_data
*data
,
4032 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4036 struct tree_niter_desc
*niter
;
4038 tree wider_type
, period
, per_type
;
4039 struct loop
*loop
= data
->current_loop
;
4041 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4044 /* For now works only for exits that dominate the loop latch. TODO -- extend
4045 for other conditions inside loop body. */
4046 ex_bb
= bb_for_stmt (use
->stmt
);
4047 if (use
->stmt
!= last_stmt (ex_bb
)
4048 || TREE_CODE (use
->stmt
) != COND_EXPR
)
4050 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4053 exit
= EDGE_SUCC (ex_bb
, 0);
4054 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4055 exit
= EDGE_SUCC (ex_bb
, 1);
4056 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4059 niter
= niter_for_exit (data
, exit
);
4061 || !zero_p (niter
->may_be_zero
))
4065 nit_type
= TREE_TYPE (nit
);
4067 /* Determine whether we may use the variable to test whether niter iterations
4068 elapsed. This is the case iff the period of the induction variable is
4069 greater than the number of iterations. */
4070 period
= iv_period (cand
->iv
);
4073 per_type
= TREE_TYPE (period
);
4075 wider_type
= TREE_TYPE (period
);
4076 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4077 wider_type
= per_type
;
4079 wider_type
= nit_type
;
4081 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
4082 fold_convert (wider_type
, period
),
4083 fold_convert (wider_type
, nit
))))
4086 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
4090 /* Determines cost of basing replacement of USE on CAND in a condition. */
4093 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4094 struct iv_use
*use
, struct iv_cand
*cand
)
4096 tree bound
= NULL_TREE
, op
, cond
;
4097 bitmap depends_on
= NULL
;
4100 /* Only consider real candidates. */
4103 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4107 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4109 cost
= force_var_cost (data
, bound
, &depends_on
);
4111 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4112 return cost
!= INFTY
;
4115 /* The induction variable elimination failed; just express the original
4116 giv. If it is compared with an invariant, note that we cannot get
4118 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4121 if (TREE_CODE (cond
) != SSA_NAME
)
4123 op
= TREE_OPERAND (cond
, 0);
4124 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4125 op
= TREE_OPERAND (cond
, 1);
4126 if (TREE_CODE (op
) == SSA_NAME
)
4128 op
= get_iv (data
, op
)->base
;
4129 fd_ivopts_data
= data
;
4130 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4134 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4135 return cost
!= INFTY
;
4138 /* Checks whether it is possible to replace the final value of USE by
4139 a direct computation. If so, the formula is stored to *VALUE. */
4142 may_replace_final_value (struct ivopts_data
*data
, struct iv_use
*use
,
4145 struct loop
*loop
= data
->current_loop
;
4147 struct tree_niter_desc
*niter
;
4149 exit
= single_dom_exit (loop
);
4153 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
4154 bb_for_stmt (use
->stmt
)));
4156 niter
= niter_for_single_dom_exit (data
);
4158 || !zero_p (niter
->may_be_zero
))
4161 *value
= iv_value (use
->iv
, niter
->niter
);
4166 /* Determines cost of replacing final value of USE using CAND. */
4169 determine_use_iv_cost_outer (struct ivopts_data
*data
,
4170 struct iv_use
*use
, struct iv_cand
*cand
)
4175 tree value
= NULL_TREE
;
4176 struct loop
*loop
= data
->current_loop
;
4178 /* The simple case first -- if we need to express value of the preserved
4179 original biv, the cost is 0. This also prevents us from counting the
4180 cost of increment twice -- once at this use and once in the cost of
4182 if (cand
->pos
== IP_ORIGINAL
4183 && cand
->incremented_at
== use
->stmt
)
4185 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
4191 if (!may_replace_final_value (data
, use
, &value
))
4193 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4198 cost
= force_var_cost (data
, value
, &depends_on
);
4200 cost
/= AVG_LOOP_NITER (loop
);
4202 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, value
);
4203 return cost
!= INFTY
;
4206 exit
= single_dom_exit (loop
);
4209 /* If there is just a single exit, we may use value of the candidate
4210 after we take it to determine the value of use. */
4211 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
4212 last_stmt (exit
->src
));
4214 cost
/= AVG_LOOP_NITER (loop
);
4218 /* Otherwise we just need to compute the iv. */
4219 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4222 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4224 return cost
!= INFTY
;
4227 /* Determines cost of basing replacement of USE on CAND. Returns false
4228 if USE cannot be based on CAND. */
4231 determine_use_iv_cost (struct ivopts_data
*data
,
4232 struct iv_use
*use
, struct iv_cand
*cand
)
4236 case USE_NONLINEAR_EXPR
:
4237 return determine_use_iv_cost_generic (data
, use
, cand
);
4240 return determine_use_iv_cost_outer (data
, use
, cand
);
4243 return determine_use_iv_cost_address (data
, use
, cand
);
4246 return determine_use_iv_cost_condition (data
, use
, cand
);
4253 /* Determines costs of basing the use of the iv on an iv candidate. */
4256 determine_use_iv_costs (struct ivopts_data
*data
)
4260 struct iv_cand
*cand
;
4261 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4263 alloc_use_cost_map (data
);
4265 for (i
= 0; i
< n_iv_uses (data
); i
++)
4267 use
= iv_use (data
, i
);
4269 if (data
->consider_all_candidates
)
4271 for (j
= 0; j
< n_iv_cands (data
); j
++)
4273 cand
= iv_cand (data
, j
);
4274 determine_use_iv_cost (data
, use
, cand
);
4281 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4283 cand
= iv_cand (data
, j
);
4284 if (!determine_use_iv_cost (data
, use
, cand
))
4285 bitmap_set_bit (to_clear
, j
);
4288 /* Remove the candidates for that the cost is infinite from
4289 the list of related candidates. */
4290 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4291 bitmap_clear (to_clear
);
4295 BITMAP_FREE (to_clear
);
4297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4299 fprintf (dump_file
, "Use-candidate costs:\n");
4301 for (i
= 0; i
< n_iv_uses (data
); i
++)
4303 use
= iv_use (data
, i
);
4305 fprintf (dump_file
, "Use %d:\n", i
);
4306 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4307 for (j
= 0; j
< use
->n_map_members
; j
++)
4309 if (!use
->cost_map
[j
].cand
4310 || use
->cost_map
[j
].cost
== INFTY
)
4313 fprintf (dump_file
, " %d\t%d\t",
4314 use
->cost_map
[j
].cand
->id
,
4315 use
->cost_map
[j
].cost
);
4316 if (use
->cost_map
[j
].depends_on
)
4317 bitmap_print (dump_file
,
4318 use
->cost_map
[j
].depends_on
, "","");
4319 fprintf (dump_file
, "\n");
4322 fprintf (dump_file
, "\n");
4324 fprintf (dump_file
, "\n");
4328 /* Determines cost of the candidate CAND. */
4331 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4333 unsigned cost_base
, cost_step
;
4342 /* There are two costs associated with the candidate -- its increment
4343 and its initialization. The second is almost negligible for any loop
4344 that rolls enough, so we take it just very little into account. */
4346 base
= cand
->iv
->base
;
4347 cost_base
= force_var_cost (data
, base
, NULL
);
4348 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4350 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4352 /* Prefer the original iv unless we may gain something by replacing it;
4353 this is not really relevant for artificial ivs created by other
4355 if (cand
->pos
== IP_ORIGINAL
4356 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4359 /* Prefer not to insert statements into latch unless there are some
4360 already (so that we do not create unnecessary jumps). */
4361 if (cand
->pos
== IP_END
4362 && empty_block_p (ip_end_pos (data
->current_loop
)))
4366 /* Determines costs of computation of the candidates. */
4369 determine_iv_costs (struct ivopts_data
*data
)
4373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4375 fprintf (dump_file
, "Candidate costs:\n");
4376 fprintf (dump_file
, " cand\tcost\n");
4379 for (i
= 0; i
< n_iv_cands (data
); i
++)
4381 struct iv_cand
*cand
= iv_cand (data
, i
);
4383 determine_iv_cost (data
, cand
);
4385 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4386 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4390 fprintf (dump_file
, "\n");
4393 /* Calculates cost for having SIZE induction variables. */
4396 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4398 return global_cost_for_size (size
,
4399 loop_data (data
->current_loop
)->regs_used
,
4403 /* For each size of the induction variable set determine the penalty. */
4406 determine_set_costs (struct ivopts_data
*data
)
4410 struct loop
*loop
= data
->current_loop
;
4413 /* We use the following model (definitely improvable, especially the
4414 cost function -- TODO):
4416 We estimate the number of registers available (using MD data), name it A.
4418 We estimate the number of registers used by the loop, name it U. This
4419 number is obtained as the number of loop phi nodes (not counting virtual
4420 registers and bivs) + the number of variables from outside of the loop.
4422 We set a reserve R (free regs that are used for temporary computations,
4423 etc.). For now the reserve is a constant 3.
4425 Let I be the number of induction variables.
4427 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4428 make a lot of ivs without a reason).
4429 -- if A - R < U + I <= A, the cost is I * PRES_COST
4430 -- if U + I > A, the cost is I * PRES_COST and
4431 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4433 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4435 fprintf (dump_file
, "Global costs:\n");
4436 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4437 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4438 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4439 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4443 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4445 op
= PHI_RESULT (phi
);
4447 if (!is_gimple_reg (op
))
4450 if (get_iv (data
, op
))
4456 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4458 struct version_info
*info
= ver_info (data
, j
);
4460 if (info
->inv_id
&& info
->has_nonlin_use
)
4464 loop_data (loop
)->regs_used
= n
;
4465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4466 fprintf (dump_file
, " regs_used %d\n", n
);
4468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4470 fprintf (dump_file
, " cost for size:\n");
4471 fprintf (dump_file
, " ivs\tcost\n");
4472 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4473 fprintf (dump_file
, " %d\t%d\n", j
,
4474 ivopts_global_cost_for_size (data
, j
));
4475 fprintf (dump_file
, "\n");
4479 /* Returns true if A is a cheaper cost pair than B. */
4482 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4490 if (a
->cost
< b
->cost
)
4493 if (a
->cost
> b
->cost
)
4496 /* In case the costs are the same, prefer the cheaper candidate. */
4497 if (a
->cand
->cost
< b
->cand
->cost
)
4503 /* Computes the cost field of IVS structure. */
4506 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4510 cost
+= ivs
->cand_use_cost
;
4511 cost
+= ivs
->cand_cost
;
4512 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4517 /* Remove invariants in set INVS to set IVS. */
4520 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4528 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4530 ivs
->n_invariant_uses
[iid
]--;
4531 if (ivs
->n_invariant_uses
[iid
] == 0)
4536 /* Set USE not to be expressed by any candidate in IVS. */
4539 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4542 unsigned uid
= use
->id
, cid
;
4543 struct cost_pair
*cp
;
4545 cp
= ivs
->cand_for_use
[uid
];
4551 ivs
->cand_for_use
[uid
] = NULL
;
4552 ivs
->n_cand_uses
[cid
]--;
4554 if (ivs
->n_cand_uses
[cid
] == 0)
4556 bitmap_clear_bit (ivs
->cands
, cid
);
4557 /* Do not count the pseudocandidates. */
4561 ivs
->cand_cost
-= cp
->cand
->cost
;
4563 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4566 ivs
->cand_use_cost
-= cp
->cost
;
4568 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4569 iv_ca_recount_cost (data
, ivs
);
4572 /* Add invariants in set INVS to set IVS. */
4575 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4583 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4585 ivs
->n_invariant_uses
[iid
]++;
4586 if (ivs
->n_invariant_uses
[iid
] == 1)
4591 /* Set cost pair for USE in set IVS to CP. */
4594 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4595 struct iv_use
*use
, struct cost_pair
*cp
)
4597 unsigned uid
= use
->id
, cid
;
4599 if (ivs
->cand_for_use
[uid
] == cp
)
4602 if (ivs
->cand_for_use
[uid
])
4603 iv_ca_set_no_cp (data
, ivs
, use
);
4610 ivs
->cand_for_use
[uid
] = cp
;
4611 ivs
->n_cand_uses
[cid
]++;
4612 if (ivs
->n_cand_uses
[cid
] == 1)
4614 bitmap_set_bit (ivs
->cands
, cid
);
4615 /* Do not count the pseudocandidates. */
4619 ivs
->cand_cost
+= cp
->cand
->cost
;
4621 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4624 ivs
->cand_use_cost
+= cp
->cost
;
4625 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4626 iv_ca_recount_cost (data
, ivs
);
4630 /* Extend set IVS by expressing USE by some of the candidates in it
4634 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4637 struct cost_pair
*best_cp
= NULL
, *cp
;
4641 gcc_assert (ivs
->upto
>= use
->id
);
4643 if (ivs
->upto
== use
->id
)
4649 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4651 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4653 if (cheaper_cost_pair (cp
, best_cp
))
4657 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4660 /* Get cost for assignment IVS. */
4663 iv_ca_cost (struct iv_ca
*ivs
)
4665 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4668 /* Returns true if all dependences of CP are among invariants in IVS. */
4671 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4676 if (!cp
->depends_on
)
4679 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4681 if (ivs
->n_invariant_uses
[i
] == 0)
4688 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4689 it before NEXT_CHANGE. */
4691 static struct iv_ca_delta
*
4692 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4693 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4695 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
4698 change
->old_cp
= old_cp
;
4699 change
->new_cp
= new_cp
;
4700 change
->next_change
= next_change
;
4705 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4708 static struct iv_ca_delta
*
4709 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4711 struct iv_ca_delta
*last
;
4719 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4721 last
->next_change
= l2
;
4726 /* Returns candidate by that USE is expressed in IVS. */
4728 static struct cost_pair
*
4729 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4731 return ivs
->cand_for_use
[use
->id
];
4734 /* Reverse the list of changes DELTA, forming the inverse to it. */
4736 static struct iv_ca_delta
*
4737 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4739 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4740 struct cost_pair
*tmp
;
4742 for (act
= delta
; act
; act
= next
)
4744 next
= act
->next_change
;
4745 act
->next_change
= prev
;
4749 act
->old_cp
= act
->new_cp
;
4756 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4757 reverted instead. */
4760 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4761 struct iv_ca_delta
*delta
, bool forward
)
4763 struct cost_pair
*from
, *to
;
4764 struct iv_ca_delta
*act
;
4767 delta
= iv_ca_delta_reverse (delta
);
4769 for (act
= delta
; act
; act
= act
->next_change
)
4773 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4774 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4778 iv_ca_delta_reverse (delta
);
4781 /* Returns true if CAND is used in IVS. */
4784 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4786 return ivs
->n_cand_uses
[cand
->id
] > 0;
4789 /* Returns number of induction variable candidates in the set IVS. */
4792 iv_ca_n_cands (struct iv_ca
*ivs
)
4794 return ivs
->n_cands
;
4797 /* Free the list of changes DELTA. */
4800 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4802 struct iv_ca_delta
*act
, *next
;
4804 for (act
= *delta
; act
; act
= next
)
4806 next
= act
->next_change
;
4813 /* Allocates new iv candidates assignment. */
4815 static struct iv_ca
*
4816 iv_ca_new (struct ivopts_data
*data
)
4818 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
4822 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
4823 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
4824 nw
->cands
= BITMAP_ALLOC (NULL
);
4827 nw
->cand_use_cost
= 0;
4829 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
4835 /* Free memory occupied by the set IVS. */
4838 iv_ca_free (struct iv_ca
**ivs
)
4840 free ((*ivs
)->cand_for_use
);
4841 free ((*ivs
)->n_cand_uses
);
4842 BITMAP_FREE ((*ivs
)->cands
);
4843 free ((*ivs
)->n_invariant_uses
);
4848 /* Dumps IVS to FILE. */
4851 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4853 const char *pref
= " invariants ";
4856 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4857 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4859 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4860 if (ivs
->n_invariant_uses
[i
])
4862 fprintf (file
, "%s%d", pref
, i
);
4865 fprintf (file
, "\n");
4868 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4869 new set, and store differences in DELTA. Number of induction variables
4870 in the new set is stored to N_IVS. */
4873 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4874 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4879 struct cost_pair
*old_cp
, *new_cp
;
4882 for (i
= 0; i
< ivs
->upto
; i
++)
4884 use
= iv_use (data
, i
);
4885 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4888 && old_cp
->cand
== cand
)
4891 new_cp
= get_use_iv_cost (data
, use
, cand
);
4895 if (!iv_ca_has_deps (ivs
, new_cp
))
4898 if (!cheaper_cost_pair (new_cp
, old_cp
))
4901 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4904 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4905 cost
= iv_ca_cost (ivs
);
4907 *n_ivs
= iv_ca_n_cands (ivs
);
4908 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4913 /* Try narrowing set IVS by removing CAND. Return the cost of
4914 the new set and store the differences in DELTA. */
4917 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4918 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4922 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4924 struct iv_cand
*cnd
;
4928 for (i
= 0; i
< n_iv_uses (data
); i
++)
4930 use
= iv_use (data
, i
);
4932 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4933 if (old_cp
->cand
!= cand
)
4938 if (data
->consider_all_candidates
)
4940 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4945 cnd
= iv_cand (data
, ci
);
4947 cp
= get_use_iv_cost (data
, use
, cnd
);
4950 if (!iv_ca_has_deps (ivs
, cp
))
4953 if (!cheaper_cost_pair (cp
, new_cp
))
4961 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4966 cnd
= iv_cand (data
, ci
);
4968 cp
= get_use_iv_cost (data
, use
, cnd
);
4971 if (!iv_ca_has_deps (ivs
, cp
))
4974 if (!cheaper_cost_pair (cp
, new_cp
))
4983 iv_ca_delta_free (delta
);
4987 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4990 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4991 cost
= iv_ca_cost (ivs
);
4992 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4997 /* Try optimizing the set of candidates IVS by removing candidates different
4998 from to EXCEPT_CAND from it. Return cost of the new set, and store
4999 differences in DELTA. */
5002 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5003 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5006 struct iv_ca_delta
*act_delta
, *best_delta
;
5007 unsigned i
, best_cost
, acost
;
5008 struct iv_cand
*cand
;
5011 best_cost
= iv_ca_cost (ivs
);
5013 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5015 cand
= iv_cand (data
, i
);
5017 if (cand
== except_cand
)
5020 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5022 if (acost
< best_cost
)
5025 iv_ca_delta_free (&best_delta
);
5026 best_delta
= act_delta
;
5029 iv_ca_delta_free (&act_delta
);
5038 /* Recurse to possibly remove other unnecessary ivs. */
5039 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5040 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5041 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5042 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5046 /* Tries to extend the sets IVS in the best possible way in order
5047 to express the USE. */
5050 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5053 unsigned best_cost
, act_cost
;
5056 struct iv_cand
*cand
;
5057 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5058 struct cost_pair
*cp
;
5060 iv_ca_add_use (data
, ivs
, use
);
5061 best_cost
= iv_ca_cost (ivs
);
5063 cp
= iv_ca_cand_for_use (ivs
, use
);
5066 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5067 iv_ca_set_no_cp (data
, ivs
, use
);
5070 /* First try important candidates. Only if it fails, try the specific ones.
5071 Rationale -- in loops with many variables the best choice often is to use
5072 just one generic biv. If we added here many ivs specific to the uses,
5073 the optimization algorithm later would be likely to get stuck in a local
5074 minimum, thus causing us to create too many ivs. The approach from
5075 few ivs to more seems more likely to be successful -- starting from few
5076 ivs, replacing an expensive use by a specific iv should always be a
5078 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5080 cand
= iv_cand (data
, i
);
5082 if (iv_ca_cand_used_p (ivs
, cand
))
5085 cp
= get_use_iv_cost (data
, use
, cand
);
5089 iv_ca_set_cp (data
, ivs
, use
, cp
);
5090 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5091 iv_ca_set_no_cp (data
, ivs
, use
);
5092 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5094 if (act_cost
< best_cost
)
5096 best_cost
= act_cost
;
5098 iv_ca_delta_free (&best_delta
);
5099 best_delta
= act_delta
;
5102 iv_ca_delta_free (&act_delta
);
5105 if (best_cost
== INFTY
)
5107 for (i
= 0; i
< use
->n_map_members
; i
++)
5109 cp
= use
->cost_map
+ i
;
5114 /* Already tried this. */
5115 if (cand
->important
)
5118 if (iv_ca_cand_used_p (ivs
, cand
))
5122 iv_ca_set_cp (data
, ivs
, use
, cp
);
5123 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5124 iv_ca_set_no_cp (data
, ivs
, use
);
5125 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5128 if (act_cost
< best_cost
)
5130 best_cost
= act_cost
;
5133 iv_ca_delta_free (&best_delta
);
5134 best_delta
= act_delta
;
5137 iv_ca_delta_free (&act_delta
);
5141 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5142 iv_ca_delta_free (&best_delta
);
5144 return (best_cost
!= INFTY
);
5147 /* Finds an initial assignment of candidates to uses. */
5149 static struct iv_ca
*
5150 get_initial_solution (struct ivopts_data
*data
)
5152 struct iv_ca
*ivs
= iv_ca_new (data
);
5155 for (i
= 0; i
< n_iv_uses (data
); i
++)
5156 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5165 /* Tries to improve set of induction variables IVS. */
5168 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5170 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5171 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5172 struct iv_cand
*cand
;
5174 /* Try extending the set of induction variables by one. */
5175 for (i
= 0; i
< n_iv_cands (data
); i
++)
5177 cand
= iv_cand (data
, i
);
5179 if (iv_ca_cand_used_p (ivs
, cand
))
5182 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5186 /* If we successfully added the candidate and the set is small enough,
5187 try optimizing it by removing other candidates. */
5188 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5190 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5191 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5192 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5193 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5196 if (acost
< best_cost
)
5199 iv_ca_delta_free (&best_delta
);
5200 best_delta
= act_delta
;
5203 iv_ca_delta_free (&act_delta
);
5208 /* Try removing the candidates from the set instead. */
5209 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5211 /* Nothing more we can do. */
5216 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5217 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5218 iv_ca_delta_free (&best_delta
);
5222 /* Attempts to find the optimal set of induction variables. We do simple
5223 greedy heuristic -- we try to replace at most one candidate in the selected
5224 solution and remove the unused ivs while this improves the cost. */
5226 static struct iv_ca
*
5227 find_optimal_iv_set (struct ivopts_data
*data
)
5233 /* Get the initial solution. */
5234 set
= get_initial_solution (data
);
5237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5238 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5244 fprintf (dump_file
, "Initial set of candidates:\n");
5245 iv_ca_dump (data
, dump_file
, set
);
5248 while (try_improve_iv_set (data
, set
))
5250 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5252 fprintf (dump_file
, "Improved to:\n");
5253 iv_ca_dump (data
, dump_file
, set
);
5257 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5258 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5260 for (i
= 0; i
< n_iv_uses (data
); i
++)
5262 use
= iv_use (data
, i
);
5263 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5269 /* Creates a new induction variable corresponding to CAND. */
5272 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5274 block_stmt_iterator incr_pos
;
5284 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5288 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5293 /* Mark that the iv is preserved. */
5294 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5295 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5297 /* Rewrite the increment so that it uses var_before directly. */
5298 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5303 gimple_add_tmp_var (cand
->var_before
);
5304 add_referenced_tmp_var (cand
->var_before
);
5306 base
= unshare_expr (cand
->iv
->base
);
5308 create_iv (base
, unshare_expr (cand
->iv
->step
),
5309 cand
->var_before
, data
->current_loop
,
5310 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5313 /* Creates new induction variables described in SET. */
5316 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5319 struct iv_cand
*cand
;
5322 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5324 cand
= iv_cand (data
, i
);
5325 create_new_iv (data
, cand
);
5329 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5330 is true, remove also the ssa name defined by the statement. */
5333 remove_statement (tree stmt
, bool including_defined_name
)
5335 if (TREE_CODE (stmt
) == PHI_NODE
)
5337 if (!including_defined_name
)
5339 /* Prevent the ssa name defined by the statement from being removed. */
5340 SET_PHI_RESULT (stmt
, NULL
);
5342 remove_phi_node (stmt
, NULL_TREE
);
5346 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5352 /* Rewrites USE (definition of iv used in a nonlinear expression)
5353 using candidate CAND. */
5356 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5357 struct iv_use
*use
, struct iv_cand
*cand
)
5360 tree op
, stmts
, tgt
, ass
;
5361 block_stmt_iterator bsi
, pbsi
;
5363 /* An important special case -- if we are asked to express value of
5364 the original iv by itself, just exit; there is no need to
5365 introduce a new computation (that might also need casting the
5366 variable to unsigned and back). */
5367 if (cand
->pos
== IP_ORIGINAL
5368 && cand
->incremented_at
== use
->stmt
)
5370 tree step
, ctype
, utype
;
5371 enum tree_code incr_code
= PLUS_EXPR
;
5373 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5374 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5376 step
= cand
->iv
->step
;
5377 ctype
= TREE_TYPE (step
);
5378 utype
= TREE_TYPE (cand
->var_after
);
5379 if (TREE_CODE (step
) == NEGATE_EXPR
)
5381 incr_code
= MINUS_EXPR
;
5382 step
= TREE_OPERAND (step
, 0);
5385 /* Check whether we may leave the computation unchanged.
5386 This is the case only if it does not rely on other
5387 computations in the loop -- otherwise, the computation
5388 we rely upon may be removed in remove_unused_ivs,
5389 thus leading to ICE. */
5390 op
= TREE_OPERAND (use
->stmt
, 1);
5391 if (TREE_CODE (op
) == PLUS_EXPR
5392 || TREE_CODE (op
) == MINUS_EXPR
)
5394 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5395 op
= TREE_OPERAND (op
, 1);
5396 else if (TREE_CODE (op
) == PLUS_EXPR
5397 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5398 op
= TREE_OPERAND (op
, 0);
5406 && (TREE_CODE (op
) == INTEGER_CST
5407 || operand_equal_p (op
, step
, 0)))
5410 /* Otherwise, add the necessary computations to express
5412 op
= fold_convert (ctype
, cand
->var_before
);
5413 comp
= fold_convert (utype
,
5414 build2 (incr_code
, ctype
, op
,
5415 unshare_expr (step
)));
5418 comp
= get_computation (data
->current_loop
, use
, cand
);
5420 switch (TREE_CODE (use
->stmt
))
5423 tgt
= PHI_RESULT (use
->stmt
);
5425 /* If we should keep the biv, do not replace it. */
5426 if (name_info (data
, tgt
)->preserve_biv
)
5429 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5430 while (!bsi_end_p (pbsi
)
5431 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5439 tgt
= TREE_OPERAND (use
->stmt
, 0);
5440 bsi
= bsi_for_stmt (use
->stmt
);
5447 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5449 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5452 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5453 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5454 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5455 remove_statement (use
->stmt
, false);
5456 SSA_NAME_DEF_STMT (tgt
) = ass
;
5461 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5462 TREE_OPERAND (use
->stmt
, 1) = op
;
5466 /* Replaces ssa name in index IDX by its basic variable. Callback for
5470 idx_remove_ssa_names (tree base
, tree
*idx
,
5471 void *data ATTRIBUTE_UNUSED
)
5475 if (TREE_CODE (*idx
) == SSA_NAME
)
5476 *idx
= SSA_NAME_VAR (*idx
);
5478 if (TREE_CODE (base
) == ARRAY_REF
)
5480 op
= &TREE_OPERAND (base
, 2);
5482 && TREE_CODE (*op
) == SSA_NAME
)
5483 *op
= SSA_NAME_VAR (*op
);
5484 op
= &TREE_OPERAND (base
, 3);
5486 && TREE_CODE (*op
) == SSA_NAME
)
5487 *op
= SSA_NAME_VAR (*op
);
5493 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5496 unshare_and_remove_ssa_names (tree ref
)
5498 ref
= unshare_expr (ref
);
5499 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5504 /* Extract the alias analysis info for the memory reference REF. There are
5505 several ways how this information may be stored and what precisely is
5506 its semantics depending on the type of the reference, but there always is
5507 somewhere hidden one _DECL node that is used to determine the set of
5508 virtual operands for the reference. The code below deciphers this jungle
5509 and extracts this single useful piece of information. */
5512 get_ref_tag (tree ref
)
5514 tree var
= get_base_address (ref
);
5520 if (TREE_CODE (var
) == INDIRECT_REF
)
5522 /* In case the base is a dereference of a pointer, first check its name
5523 mem tag, and if it does not have one, use type mem tag. */
5524 var
= TREE_OPERAND (var
, 0);
5525 if (TREE_CODE (var
) != SSA_NAME
)
5528 if (SSA_NAME_PTR_INFO (var
))
5530 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5535 var
= SSA_NAME_VAR (var
);
5536 tag
= var_ann (var
)->type_mem_tag
;
5537 gcc_assert (tag
!= NULL_TREE
);
5545 tag
= var_ann (var
)->type_mem_tag
;
5553 /* Copies the reference information from OLD_REF to NEW_REF. */
5556 copy_ref_info (tree new_ref
, tree old_ref
)
5558 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5559 copy_mem_ref_info (new_ref
, old_ref
);
5562 TMR_TAG (new_ref
) = get_ref_tag (old_ref
);
5563 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5567 /* Rewrites USE (address that is an iv) using candidate CAND. */
5570 rewrite_use_address (struct ivopts_data
*data
,
5571 struct iv_use
*use
, struct iv_cand
*cand
)
5573 struct affine_tree_combination aff
;
5574 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5577 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5578 unshare_aff_combination (&aff
);
5580 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5581 copy_ref_info (ref
, *use
->op_p
);
5585 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5589 rewrite_use_compare (struct ivopts_data
*data
,
5590 struct iv_use
*use
, struct iv_cand
*cand
)
5593 tree
*op_p
, cond
, op
, stmts
, bound
;
5594 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5595 enum tree_code compare
;
5596 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5601 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5602 tree var_type
= TREE_TYPE (var
);
5604 compare
= iv_elimination_compare (data
, use
);
5605 bound
= fold_convert (var_type
, bound
);
5606 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5610 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5612 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5613 update_stmt (use
->stmt
);
5617 /* The induction variable elimination failed; just express the original
5619 comp
= get_computation (data
->current_loop
, use
, cand
);
5622 op_p
= &TREE_OPERAND (cond
, 0);
5623 if (TREE_CODE (*op_p
) != SSA_NAME
5624 || zero_p (get_iv (data
, *op_p
)->step
))
5625 op_p
= &TREE_OPERAND (cond
, 1);
5627 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5629 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5634 /* Ensure that operand *OP_P may be used at the end of EXIT without
5635 violating loop closed ssa form. */
5638 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5641 struct loop
*def_loop
;
5644 use
= USE_FROM_PTR (op_p
);
5645 if (TREE_CODE (use
) != SSA_NAME
)
5648 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5652 def_loop
= def_bb
->loop_father
;
5653 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5656 /* Try finding a phi node that copies the value out of the loop. */
5657 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5658 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5663 /* Create such a phi node. */
5664 tree new_name
= duplicate_ssa_name (use
, NULL
);
5666 phi
= create_phi_node (new_name
, exit
->dest
);
5667 SSA_NAME_DEF_STMT (new_name
) = phi
;
5668 add_phi_arg (phi
, use
, exit
);
5671 SET_USE (op_p
, PHI_RESULT (phi
));
5674 /* Ensure that operands of STMT may be used at the end of EXIT without
5675 violating loop closed ssa form. */
5678 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5681 use_operand_p use_p
;
5683 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5684 protect_loop_closed_ssa_form_use (exit
, use_p
);
5687 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5688 so that they are emitted on the correct place, and so that the loop closed
5689 ssa form is preserved. */
5692 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5694 tree_stmt_iterator tsi
;
5695 block_stmt_iterator bsi
;
5696 tree phi
, stmt
, def
, next
;
5698 if (!single_pred_p (exit
->dest
))
5699 split_loop_exit_edge (exit
);
5701 /* Ensure there is label in exit->dest, so that we can
5703 tree_block_label (exit
->dest
);
5704 bsi
= bsi_after_labels (exit
->dest
);
5706 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5708 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5710 bsi_insert_after (&bsi
, tsi_stmt (tsi
), BSI_NEW_STMT
);
5711 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5716 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
5717 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5723 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5725 next
= PHI_CHAIN (phi
);
5727 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5729 def
= PHI_RESULT (phi
);
5730 remove_statement (phi
, false);
5731 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5733 SSA_NAME_DEF_STMT (def
) = stmt
;
5734 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
5739 /* Rewrites the final value of USE (that is only needed outside of the loop)
5740 using candidate CAND. */
5743 rewrite_use_outer (struct ivopts_data
*data
,
5744 struct iv_use
*use
, struct iv_cand
*cand
)
5747 tree value
, op
, stmts
, tgt
;
5750 switch (TREE_CODE (use
->stmt
))
5753 tgt
= PHI_RESULT (use
->stmt
);
5756 tgt
= TREE_OPERAND (use
->stmt
, 0);
5762 exit
= single_dom_exit (data
->current_loop
);
5768 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5769 value
= unshare_expr (cp
->value
);
5772 value
= get_computation_at (data
->current_loop
,
5773 use
, cand
, last_stmt (exit
->src
));
5775 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
5777 /* If we will preserve the iv anyway and we would need to perform
5778 some computation to replace the final value, do nothing. */
5779 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
5782 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5784 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
5786 if (USE_FROM_PTR (use_p
) == tgt
)
5787 SET_USE (use_p
, op
);
5791 compute_phi_arg_on_exit (exit
, stmts
, op
);
5793 /* Enable removal of the statement. We cannot remove it directly,
5794 since we may still need the aliasing information attached to the
5795 ssa name defined by it. */
5796 name_info (data
, tgt
)->iv
->have_use_for
= false;
5800 /* If the variable is going to be preserved anyway, there is nothing to
5802 if (name_info (data
, tgt
)->preserve_biv
)
5805 /* Otherwise we just need to compute the iv. */
5806 rewrite_use_nonlinear_expr (data
, use
, cand
);
5809 /* Rewrites USE using candidate CAND. */
5812 rewrite_use (struct ivopts_data
*data
,
5813 struct iv_use
*use
, struct iv_cand
*cand
)
5817 case USE_NONLINEAR_EXPR
:
5818 rewrite_use_nonlinear_expr (data
, use
, cand
);
5822 rewrite_use_outer (data
, use
, cand
);
5826 rewrite_use_address (data
, use
, cand
);
5830 rewrite_use_compare (data
, use
, cand
);
5836 update_stmt (use
->stmt
);
5839 /* Rewrite the uses using the selected induction variables. */
5842 rewrite_uses (struct ivopts_data
*data
)
5845 struct iv_cand
*cand
;
5848 for (i
= 0; i
< n_iv_uses (data
); i
++)
5850 use
= iv_use (data
, i
);
5851 cand
= use
->selected
;
5854 rewrite_use (data
, use
, cand
);
5858 /* Removes the ivs that are not used after rewriting. */
5861 remove_unused_ivs (struct ivopts_data
*data
)
5866 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5868 struct version_info
*info
;
5870 info
= ver_info (data
, j
);
5872 && !zero_p (info
->iv
->step
)
5874 && !info
->iv
->have_use_for
5875 && !info
->preserve_biv
)
5876 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5880 /* Frees data allocated by the optimization of a single loop. */
5883 free_loop_data (struct ivopts_data
*data
)
5889 htab_empty (data
->niters
);
5891 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5893 struct version_info
*info
;
5895 info
= ver_info (data
, i
);
5899 info
->has_nonlin_use
= false;
5900 info
->preserve_biv
= false;
5903 bitmap_clear (data
->relevant
);
5904 bitmap_clear (data
->important_candidates
);
5906 for (i
= 0; i
< n_iv_uses (data
); i
++)
5908 struct iv_use
*use
= iv_use (data
, i
);
5911 BITMAP_FREE (use
->related_cands
);
5912 for (j
= 0; j
< use
->n_map_members
; j
++)
5913 if (use
->cost_map
[j
].depends_on
)
5914 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5915 free (use
->cost_map
);
5918 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5920 for (i
= 0; i
< n_iv_cands (data
); i
++)
5922 struct iv_cand
*cand
= iv_cand (data
, i
);
5926 if (cand
->depends_on
)
5927 BITMAP_FREE (cand
->depends_on
);
5930 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5932 if (data
->version_info_size
< num_ssa_names
)
5934 data
->version_info_size
= 2 * num_ssa_names
;
5935 free (data
->version_info
);
5936 data
->version_info
= xcalloc (data
->version_info_size
,
5937 sizeof (struct version_info
));
5940 data
->max_inv_id
= 0;
5942 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5943 SET_DECL_RTL (obj
, NULL_RTX
);
5945 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5948 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5952 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5956 for (i
= 1; i
< loops
->num
; i
++)
5957 if (loops
->parray
[i
])
5959 free (loops
->parray
[i
]->aux
);
5960 loops
->parray
[i
]->aux
= NULL
;
5963 free_loop_data (data
);
5964 free (data
->version_info
);
5965 BITMAP_FREE (data
->relevant
);
5966 BITMAP_FREE (data
->important_candidates
);
5967 htab_delete (data
->niters
);
5969 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5970 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5971 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5974 /* Optimizes the LOOP. Returns true if anything changed. */
5977 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5979 bool changed
= false;
5980 struct iv_ca
*iv_ca
;
5983 data
->current_loop
= loop
;
5985 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5987 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5989 exit
= single_dom_exit (loop
);
5992 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5993 exit
->src
->index
, exit
->dest
->index
);
5994 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5995 fprintf (dump_file
, "\n");
5998 fprintf (dump_file
, "\n");
6001 /* For each ssa name determines whether it behaves as an induction variable
6003 if (!find_induction_variables (data
))
6006 /* Finds interesting uses (item 1). */
6007 find_interesting_uses (data
);
6008 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6011 /* Finds candidates for the induction variables (item 2). */
6012 find_iv_candidates (data
);
6014 /* Calculates the costs (item 3, part 1). */
6015 determine_use_iv_costs (data
);
6016 determine_iv_costs (data
);
6017 determine_set_costs (data
);
6019 /* Find the optimal set of induction variables (item 3, part 2). */
6020 iv_ca
= find_optimal_iv_set (data
);
6025 /* Create the new induction variables (item 4, part 1). */
6026 create_new_ivs (data
, iv_ca
);
6027 iv_ca_free (&iv_ca
);
6029 /* Rewrite the uses (item 4, part 2). */
6030 rewrite_uses (data
);
6032 /* Remove the ivs that are unused after rewriting. */
6033 remove_unused_ivs (data
);
6035 /* We have changed the structure of induction variables; it might happen
6036 that definitions in the scev database refer to some of them that were
6041 free_loop_data (data
);
6046 /* Main entry point. Optimizes induction variables in LOOPS. */
6049 tree_ssa_iv_optimize (struct loops
*loops
)
6052 struct ivopts_data data
;
6054 tree_ssa_iv_optimize_init (loops
, &data
);
6056 /* Optimize the loops starting with the innermost ones. */
6057 loop
= loops
->tree_root
;
6061 /* Scan the loops, inner ones first. */
6062 while (loop
!= loops
->tree_root
)
6064 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6065 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6067 tree_ssa_iv_optimize_loop (&data
, loop
);
6079 tree_ssa_iv_optimize_finalize (loops
, &data
);