1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used
; /* Number of registers used. */
133 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
134 USE_OUTER
, /* The induction variable is used outside the loop. */
135 USE_ADDRESS
, /* Use in an address. */
136 USE_COMPARE
/* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand
*cand
; /* The candidate. */
143 unsigned cost
; /* The cost. */
144 bitmap depends_on
; /* The list of invariants that have to be
146 tree value
; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id
; /* The id of the use. */
155 enum use_type type
; /* Type of the use. */
156 struct iv
*iv
; /* The induction variable it is based on. */
157 tree stmt
; /* Statement in that it occurs. */
158 tree
*op_p
; /* The place where it occurs. */
159 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
163 struct cost_pair
*cost_map
;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand
*selected
;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL
, /* At the end, just before the exit condition. */
174 IP_END
, /* At the end of the latch block. */
175 IP_ORIGINAL
/* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id
; /* The number of the candidate. */
182 bool important
; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos
; /* Where it is computed. */
185 tree incremented_at
; /* For original biv, the statement where it is
187 tree var_before
; /* The variable used for it before increment. */
188 tree var_after
; /* The variable used for it after increment. */
189 struct iv
*iv
; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost
; /* Cost of the candidate. */
194 bitmap depends_on
; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use
*iv_use_p
;
202 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
204 typedef struct iv_cand
*iv_cand_p
;
205 DEF_VEC_P(iv_cand_p
);
206 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
210 /* The currently optimized loop. */
211 struct loop
*current_loop
;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size
;
219 /* The array of information for the ssa names. */
220 struct version_info
*version_info
;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p
,heap
) *iv_uses
;
231 /* The candidates. */
232 VEC(iv_cand_p
,heap
) *iv_candidates
;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates
;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates
;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair
**cand_for_use
;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses
;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost
;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses
;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair
*old_cp
;
290 /* A new assignment. */
291 struct cost_pair
*new_cp
;
293 /* Next change in the list. */
294 struct iv_ca_delta
*next_change
;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree
,heap
) *decl_rtl_to_reset
;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data
*data
)
324 return VEC_length (iv_use_p
, data
->iv_uses
);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use
*
330 iv_use (struct ivopts_data
*data
, unsigned i
)
332 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data
*data
)
340 return VEC_length (iv_cand_p
, data
->iv_candidates
);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand
*
346 iv_cand (struct ivopts_data
*data
, unsigned i
)
348 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
351 /* The data for LOOP. */
353 static inline struct loop_data
*
354 loop_data (struct loop
*loop
)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop
*loop
)
364 edge exit
= loop
->single_exit
;
369 if (!just_once_each_iteration_p (loop
, exit
->src
))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv
*);
379 dump_iv (FILE *file
, struct iv
*iv
)
383 fprintf (file
, "ssa name ");
384 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
385 fprintf (file
, "\n");
388 fprintf (file
, " type ");
389 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
390 fprintf (file
, "\n");
394 fprintf (file
, " base ");
395 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
396 fprintf (file
, "\n");
398 fprintf (file
, " step ");
399 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
400 fprintf (file
, "\n");
404 fprintf (file
, " invariant ");
405 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
406 fprintf (file
, "\n");
411 fprintf (file
, " base object ");
412 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
413 fprintf (file
, "\n");
417 fprintf (file
, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use
*);
424 dump_use (FILE *file
, struct iv_use
*use
)
426 fprintf (file
, "use %d\n", use
->id
);
430 case USE_NONLINEAR_EXPR
:
431 fprintf (file
, " generic\n");
435 fprintf (file
, " outside\n");
439 fprintf (file
, " address\n");
443 fprintf (file
, " compare\n");
450 fprintf (file
, " in statement ");
451 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
452 fprintf (file
, "\n");
454 fprintf (file
, " at position ");
456 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
457 fprintf (file
, "\n");
459 dump_iv (file
, use
->iv
);
461 if (use
->related_cands
)
463 fprintf (file
, " related candidates ");
464 dump_bitmap (file
, use
->related_cands
);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data
*);
472 dump_uses (FILE *file
, struct ivopts_data
*data
)
477 for (i
= 0; i
< n_iv_uses (data
); i
++)
479 use
= iv_use (data
, i
);
481 dump_use (file
, use
);
482 fprintf (file
, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand
*);
490 dump_cand (FILE *file
, struct iv_cand
*cand
)
492 struct iv
*iv
= cand
->iv
;
494 fprintf (file
, "candidate %d%s\n",
495 cand
->id
, cand
->important
? " (important)" : "");
497 if (cand
->depends_on
)
499 fprintf (file
, " depends on ");
500 dump_bitmap (file
, cand
->depends_on
);
505 fprintf (file
, " final value replacement\n");
512 fprintf (file
, " incremented before exit test\n");
516 fprintf (file
, " incremented at end\n");
520 fprintf (file
, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info
*
530 ver_info (struct ivopts_data
*data
, unsigned ver
)
532 return data
->version_info
+ ver
;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info
*
538 name_info (struct ivopts_data
*data
, tree name
)
540 return ver_info (data
, SSA_NAME_VERSION (name
));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
550 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
551 unsigned HOST_WIDE_INT inv
, ex
, val
;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a
& 1) && !(b
& 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i
= 0; i
< bits
- 1; i
++)
578 inv
= (inv
* ex
) & mask
;
579 ex
= (ex
* ex
) & mask
;
582 val
= (a
* inv
) & mask
;
584 gcc_assert (((val
* b
) & mask
) == a
);
586 if ((val
>> (bits
- 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
600 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
604 if (sbb
== loop
->latch
)
610 return stmt
== last_stmt (bb
);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
619 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
620 basic_block stmt_bb
= bb_for_stmt (stmt
);
621 block_stmt_iterator bsi
;
623 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
626 if (stmt_bb
!= cand_bb
)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
633 if (bsi_stmt (bsi
) == cand
->incremented_at
)
635 if (bsi_stmt (bsi
) == stmt
)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
652 return stmt_after_ip_normal_pos (loop
, stmt
);
655 return stmt_after_ip_original_pos (cand
, stmt
);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter
;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e
)
682 const struct nfe_cache_elt
*elt
= e
;
684 return htab_hash_pointer (elt
->exit
);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1
, const void *e2
)
692 const struct nfe_cache_elt
*elt1
= e1
;
694 return elt1
->exit
== e2
;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc
*
701 niter_for_exit (struct ivopts_data
*data
, edge exit
)
703 struct nfe_cache_elt
*nfe_desc
;
706 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
707 htab_hash_pointer (exit
),
712 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
713 nfe_desc
->exit
= exit
;
714 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
715 exit
, &nfe_desc
->niter
,
722 if (!nfe_desc
->valid_p
)
725 return &nfe_desc
->niter
;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
732 static struct tree_niter_desc
*
733 niter_for_single_dom_exit (struct ivopts_data
*data
)
735 edge exit
= single_dom_exit (data
->current_loop
);
740 return niter_for_exit (data
, exit
);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
747 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
751 data
->version_info_size
= 2 * num_ssa_names
;
752 data
->version_info
= xcalloc (data
->version_info_size
,
753 sizeof (struct version_info
));
754 data
->relevant
= BITMAP_ALLOC (NULL
);
755 data
->important_candidates
= BITMAP_ALLOC (NULL
);
756 data
->max_inv_id
= 0;
757 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
759 for (i
= 1; i
< loops
->num
; i
++)
760 if (loops
->parray
[i
])
761 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
763 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
764 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
765 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
772 determine_base_object (tree expr
)
774 enum tree_code code
= TREE_CODE (expr
);
775 tree base
, obj
, op0
, op1
;
777 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
786 obj
= TREE_OPERAND (expr
, 0);
787 base
= get_base_address (obj
);
792 if (TREE_CODE (base
) == INDIRECT_REF
)
793 return determine_base_object (TREE_OPERAND (base
, 0));
795 return fold_convert (ptr_type_node
,
796 build_fold_addr_expr (base
));
800 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
801 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
807 return (code
== PLUS_EXPR
809 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
811 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
815 return determine_base_object (TREE_OPERAND (expr
, 0));
818 return fold_convert (ptr_type_node
, expr
);
822 /* Allocates an induction variable with given initial value BASE and step STEP
826 alloc_iv (tree base
, tree step
)
828 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
830 if (step
&& integer_zerop (step
))
834 iv
->base_object
= determine_base_object (base
);
837 iv
->have_use_for
= false;
839 iv
->ssa_name
= NULL_TREE
;
844 /* Sets STEP and BASE for induction variable IV. */
847 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
849 struct version_info
*info
= name_info (data
, iv
);
851 gcc_assert (!info
->iv
);
853 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
854 info
->iv
= alloc_iv (base
, step
);
855 info
->iv
->ssa_name
= iv
;
858 /* Finds induction variable declaration for VAR. */
861 get_iv (struct ivopts_data
*data
, tree var
)
865 if (!name_info (data
, var
)->iv
)
867 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
870 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
871 set_iv (data
, var
, var
, NULL_TREE
);
874 return name_info (data
, var
)->iv
;
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
881 determine_biv_step (tree phi
)
883 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
884 tree name
= PHI_RESULT (phi
), base
, step
;
886 if (!is_gimple_reg (name
))
889 if (!simple_iv (loop
, phi
, name
, &base
, &step
, true))
898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
901 abnormal_ssa_name_p (tree exp
)
906 if (TREE_CODE (exp
) != SSA_NAME
)
909 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
913 abnormal phi node. Callback for for_each_index. */
916 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
917 void *data ATTRIBUTE_UNUSED
)
919 if (TREE_CODE (base
) == ARRAY_REF
)
921 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
923 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
927 return !abnormal_ssa_name_p (*index
);
930 /* Returns true if EXPR contains a ssa name that occurs in an
931 abnormal phi node. */
934 contains_abnormal_ssa_name_p (tree expr
)
937 enum tree_code_class
class;
942 code
= TREE_CODE (expr
);
943 class = TREE_CODE_CLASS (code
);
945 if (code
== SSA_NAME
)
946 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
948 if (code
== INTEGER_CST
949 || is_gimple_min_invariant (expr
))
952 if (code
== ADDR_EXPR
)
953 return !for_each_index (&TREE_OPERAND (expr
, 0),
954 idx_contains_abnormal_ssa_name_p
,
961 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
966 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
978 /* Finds basic ivs. */
981 find_bivs (struct ivopts_data
*data
)
983 tree phi
, step
, type
, base
;
985 struct loop
*loop
= data
->current_loop
;
987 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
989 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
992 step
= determine_biv_step (phi
);
996 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
997 base
= expand_simple_operations (base
);
998 if (contains_abnormal_ssa_name_p (base
)
999 || contains_abnormal_ssa_name_p (step
))
1002 type
= TREE_TYPE (PHI_RESULT (phi
));
1003 base
= fold_convert (type
, base
);
1005 step
= fold_convert (type
, step
);
1007 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1014 /* Marks basic ivs. */
1017 mark_bivs (struct ivopts_data
*data
)
1020 struct iv
*iv
, *incr_iv
;
1021 struct loop
*loop
= data
->current_loop
;
1022 basic_block incr_bb
;
1024 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1026 iv
= get_iv (data
, PHI_RESULT (phi
));
1030 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1031 incr_iv
= get_iv (data
, var
);
1035 /* If the increment is in the subloop, ignore it. */
1036 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1037 if (incr_bb
->loop_father
!= data
->current_loop
1038 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1042 incr_iv
->biv_p
= true;
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047 parameters to BASE and STEP. */
1050 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
1051 tree
*base
, tree
*step
)
1054 struct loop
*loop
= data
->current_loop
;
1059 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1062 lhs
= TREE_OPERAND (stmt
, 0);
1063 if (TREE_CODE (lhs
) != SSA_NAME
)
1066 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
, true))
1068 *base
= expand_simple_operations (*base
);
1070 if (contains_abnormal_ssa_name_p (*base
)
1071 || contains_abnormal_ssa_name_p (*step
))
1077 /* Finds general ivs in statement STMT. */
1080 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1084 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
1087 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
1090 /* Finds general ivs in basic block BB. */
1093 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1095 block_stmt_iterator bsi
;
1097 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1098 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1101 /* Finds general ivs. */
1104 find_givs (struct ivopts_data
*data
)
1106 struct loop
*loop
= data
->current_loop
;
1107 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1110 for (i
= 0; i
< loop
->num_nodes
; i
++)
1111 find_givs_in_bb (data
, body
[i
]);
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116 variable and if so, its initial value and step. */
1119 find_induction_variables (struct ivopts_data
*data
)
1124 if (!find_bivs (data
))
1130 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1132 struct tree_niter_desc
*niter
;
1134 niter
= niter_for_single_dom_exit (data
);
1138 fprintf (dump_file
, " number of iterations ");
1139 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1140 fprintf (dump_file
, "\n");
1142 fprintf (dump_file
, " may be zero if ");
1143 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1144 fprintf (dump_file
, "\n");
1145 fprintf (dump_file
, "\n");
1148 fprintf (dump_file
, "Induction variables:\n\n");
1150 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1152 if (ver_info (data
, i
)->iv
)
1153 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1162 static struct iv_use
*
1163 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1164 tree stmt
, enum use_type use_type
)
1166 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1168 use
->id
= n_iv_uses (data
);
1169 use
->type
= use_type
;
1173 use
->related_cands
= BITMAP_ALLOC (NULL
);
1175 /* To avoid showing ssa name in the dumps, if it was not reset by the
1177 iv
->ssa_name
= NULL_TREE
;
1179 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1180 dump_use (dump_file
, use
);
1182 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188 NONLINEAR_USE is true if the invariant is used in a way we do not
1189 handle specially. */
1192 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1195 struct version_info
*info
;
1197 if (TREE_CODE (op
) != SSA_NAME
1198 || !is_gimple_reg (op
))
1201 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1203 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1206 info
= name_info (data
, op
);
1208 info
->has_nonlin_use
|= nonlinear_use
;
1210 info
->inv_id
= ++data
->max_inv_id
;
1211 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1214 /* Checks whether the use OP is interesting and if so, records it
1217 static struct iv_use
*
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1226 if (TREE_CODE (op
) != SSA_NAME
)
1229 iv
= get_iv (data
, op
);
1233 if (iv
->have_use_for
)
1235 use
= iv_use (data
, iv
->use_id
);
1237 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1238 || use
->type
== USE_OUTER
);
1240 if (type
== USE_NONLINEAR_EXPR
)
1241 use
->type
= USE_NONLINEAR_EXPR
;
1245 if (zero_p (iv
->step
))
1247 record_invariant (data
, op
, true);
1250 iv
->have_use_for
= true;
1252 civ
= xmalloc (sizeof (struct iv
));
1255 stmt
= SSA_NAME_DEF_STMT (op
);
1256 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1257 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1259 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1260 iv
->use_id
= use
->id
;
1265 /* Checks whether the use OP is interesting and if so, records it. */
1267 static struct iv_use
*
1268 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1270 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1273 /* Records a definition of induction variable OP that is used outside of the
1276 static struct iv_use
*
1277 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1279 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283 and if so, records it. */
1286 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1290 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1292 tree zero
= integer_zero_node
;
1294 const_iv
.step
= NULL_TREE
;
1296 if (TREE_CODE (*cond_p
) != SSA_NAME
1297 && !COMPARISON_CLASS_P (*cond_p
))
1300 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1307 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1308 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1311 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1312 iv0
= get_iv (data
, *op0_p
);
1316 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1317 iv1
= get_iv (data
, *op1_p
);
1321 if (/* When comparing with non-invariant value, we may not do any senseful
1322 induction variable elimination. */
1324 /* Eliminating condition based on two ivs would be nontrivial.
1325 ??? TODO -- it is not really important to handle this case. */
1326 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1328 find_interesting_uses_op (data
, *op0_p
);
1329 find_interesting_uses_op (data
, *op1_p
);
1333 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1335 /* If both are invariants, this is a work for unswitching. */
1339 civ
= xmalloc (sizeof (struct iv
));
1340 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1341 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345 i.e. if all its operands are defined outside of the LOOP. */
1348 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1353 if (is_gimple_min_invariant (expr
))
1356 if (TREE_CODE (expr
) == SSA_NAME
)
1358 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1360 && flow_bb_inside_loop_p (loop
, def_bb
))
1369 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1370 for (i
= 0; i
< len
; i
++)
1371 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378 initial ones. Returns false when the value of the index cannot be determined.
1379 Callback for for_each_index. */
1381 struct ifs_ivopts_data
1383 struct ivopts_data
*ivopts_data
;
1389 idx_find_step (tree base
, tree
*idx
, void *data
)
1391 struct ifs_ivopts_data
*dta
= data
;
1393 tree step
, iv_step
, lbound
, off
;
1394 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1396 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1397 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1400 /* If base is a component ref, require that the offset of the reference
1402 if (TREE_CODE (base
) == COMPONENT_REF
)
1404 off
= component_ref_field_offset (base
);
1405 return expr_invariant_in_loop_p (loop
, off
);
1408 /* If base is array, first check whether we will be able to move the
1409 reference out of the loop (in order to take its address in strength
1410 reduction). In order for this to work we need both lower bound
1411 and step to be loop invariants. */
1412 if (TREE_CODE (base
) == ARRAY_REF
)
1414 step
= array_ref_element_size (base
);
1415 lbound
= array_ref_low_bound (base
);
1417 if (!expr_invariant_in_loop_p (loop
, step
)
1418 || !expr_invariant_in_loop_p (loop
, lbound
))
1422 if (TREE_CODE (*idx
) != SSA_NAME
)
1425 iv
= get_iv (dta
->ivopts_data
, *idx
);
1434 if (TREE_CODE (base
) == ARRAY_REF
)
1436 step
= array_ref_element_size (base
);
1438 /* We only handle addresses whose step is an integer constant. */
1439 if (TREE_CODE (step
) != INTEGER_CST
)
1443 /* The step for pointer arithmetics already is 1 byte. */
1444 step
= build_int_cst (sizetype
, 1);
1446 /* FIXME: convert_step should not be used outside chrec_convert: fix
1447 this by calling chrec_convert. */
1448 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1449 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1453 /* The index might wrap. */
1457 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1460 *dta
->step_p
= step
;
1462 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1467 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1471 idx_record_use (tree base
, tree
*idx
,
1474 find_interesting_uses_op (data
, *idx
);
1475 if (TREE_CODE (base
) == ARRAY_REF
)
1477 find_interesting_uses_op (data
, array_ref_element_size (base
));
1478 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1483 /* Returns true if memory reference REF may be unaligned. */
1486 may_be_unaligned_p (tree ref
)
1490 HOST_WIDE_INT bitsize
;
1491 HOST_WIDE_INT bitpos
;
1493 enum machine_mode mode
;
1494 int unsignedp
, volatilep
;
1495 unsigned base_align
;
1497 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498 thus they are not misaligned. */
1499 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1502 /* The test below is basically copy of what expr.c:normal_inner_ref
1503 does to check whether the object must be loaded by parts when
1504 STRICT_ALIGNMENT is true. */
1505 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1506 &unsignedp
, &volatilep
, true);
1507 base_type
= TREE_TYPE (base
);
1508 base_align
= TYPE_ALIGN (base_type
);
1511 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1512 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1513 || bitpos
% BITS_PER_UNIT
!= 0))
1519 /* Finds addresses in *OP_P inside STMT. */
1522 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1524 tree base
= *op_p
, step
= NULL
;
1526 struct ifs_ivopts_data ifs_ivopts_data
;
1528 /* Do not play with volatile memory references. A bit too conservative,
1529 perhaps, but safe. */
1530 if (stmt_ann (stmt
)->has_volatile_ops
)
1533 /* Ignore bitfields for now. Not really something terribly complicated
1535 if (TREE_CODE (base
) == COMPONENT_REF
1536 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1539 if (STRICT_ALIGNMENT
1540 && may_be_unaligned_p (base
))
1543 base
= unshare_expr (base
);
1545 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1547 tree type
= build_pointer_type (TREE_TYPE (base
));
1551 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1553 civ
= get_iv (data
, TMR_BASE (base
));
1557 TMR_BASE (base
) = civ
->base
;
1560 if (TMR_INDEX (base
)
1561 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1563 civ
= get_iv (data
, TMR_INDEX (base
));
1567 TMR_INDEX (base
) = civ
->base
;
1572 if (TMR_STEP (base
))
1573 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1576 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1584 base
= tree_mem_ref_addr (type
, base
);
1588 ifs_ivopts_data
.ivopts_data
= data
;
1589 ifs_ivopts_data
.stmt
= stmt
;
1590 ifs_ivopts_data
.step_p
= &step
;
1591 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1595 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1596 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1598 base
= build_fold_addr_expr (base
);
1601 civ
= alloc_iv (base
, step
);
1602 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1606 for_each_index (op_p
, idx_record_use
, data
);
1609 /* Finds and records invariants used in STMT. */
1612 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1615 use_operand_p use_p
;
1618 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1620 op
= USE_FROM_PTR (use_p
);
1621 record_invariant (data
, op
, false);
1625 /* Finds interesting uses of induction variables in the statement STMT. */
1628 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1633 use_operand_p use_p
;
1635 find_invariants_stmt (data
, stmt
);
1637 if (TREE_CODE (stmt
) == COND_EXPR
)
1639 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1643 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1645 lhs
= TREE_OPERAND (stmt
, 0);
1646 rhs
= TREE_OPERAND (stmt
, 1);
1648 if (TREE_CODE (lhs
) == SSA_NAME
)
1650 /* If the statement defines an induction variable, the uses are not
1651 interesting by themselves. */
1653 iv
= get_iv (data
, lhs
);
1655 if (iv
&& !zero_p (iv
->step
))
1659 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1661 case tcc_comparison
:
1662 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1666 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1667 if (REFERENCE_CLASS_P (lhs
))
1668 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1674 if (REFERENCE_CLASS_P (lhs
)
1675 && is_gimple_val (rhs
))
1677 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1678 find_interesting_uses_op (data
, rhs
);
1682 /* TODO -- we should also handle address uses of type
1684 memory = call (whatever);
1691 if (TREE_CODE (stmt
) == PHI_NODE
1692 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1694 lhs
= PHI_RESULT (stmt
);
1695 iv
= get_iv (data
, lhs
);
1697 if (iv
&& !zero_p (iv
->step
))
1701 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1703 op
= USE_FROM_PTR (use_p
);
1705 if (TREE_CODE (op
) != SSA_NAME
)
1708 iv
= get_iv (data
, op
);
1712 find_interesting_uses_op (data
, op
);
1716 /* Finds interesting uses of induction variables outside of loops
1717 on loop exit edge EXIT. */
1720 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1724 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1726 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1727 find_interesting_uses_outer (data
, def
);
1731 /* Finds uses of the induction variables that are interesting. */
1734 find_interesting_uses (struct ivopts_data
*data
)
1737 block_stmt_iterator bsi
;
1739 basic_block
*body
= get_loop_body (data
->current_loop
);
1741 struct version_info
*info
;
1744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1745 fprintf (dump_file
, "Uses:\n\n");
1747 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1752 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1753 if (e
->dest
!= EXIT_BLOCK_PTR
1754 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1755 find_interesting_uses_outside (data
, e
);
1757 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1758 find_interesting_uses_stmt (data
, phi
);
1759 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1760 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1767 fprintf (dump_file
, "\n");
1769 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1771 info
= ver_info (data
, i
);
1774 fprintf (dump_file
, " ");
1775 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1776 fprintf (dump_file
, " is invariant (%d)%s\n",
1777 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1781 fprintf (dump_file
, "\n");
1787 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1788 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1789 we are at the top-level of the processed address. */
1792 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1793 unsigned HOST_WIDE_INT
*offset
)
1795 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1796 enum tree_code code
;
1797 tree type
, orig_type
= TREE_TYPE (expr
);
1798 unsigned HOST_WIDE_INT off0
, off1
, st
;
1799 tree orig_expr
= expr
;
1803 type
= TREE_TYPE (expr
);
1804 code
= TREE_CODE (expr
);
1810 if (!cst_and_fits_in_hwi (expr
)
1814 *offset
= int_cst_value (expr
);
1815 return build_int_cst_type (orig_type
, 0);
1819 op0
= TREE_OPERAND (expr
, 0);
1820 op1
= TREE_OPERAND (expr
, 1);
1822 op0
= strip_offset_1 (op0
, false, false, &off0
);
1823 op1
= strip_offset_1 (op1
, false, false, &off1
);
1825 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1826 if (op0
== TREE_OPERAND (expr
, 0)
1827 && op1
== TREE_OPERAND (expr
, 1))
1832 else if (zero_p (op0
))
1834 if (code
== PLUS_EXPR
)
1837 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1840 expr
= fold_build2 (code
, type
, op0
, op1
);
1842 return fold_convert (orig_type
, expr
);
1848 step
= array_ref_element_size (expr
);
1849 if (!cst_and_fits_in_hwi (step
))
1852 st
= int_cst_value (step
);
1853 op1
= TREE_OPERAND (expr
, 1);
1854 op1
= strip_offset_1 (op1
, false, false, &off1
);
1855 *offset
= off1
* st
;
1860 /* Strip the component reference completely. */
1861 op0
= TREE_OPERAND (expr
, 0);
1862 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1872 tmp
= component_ref_field_offset (expr
);
1874 && cst_and_fits_in_hwi (tmp
))
1876 /* Strip the component reference completely. */
1877 op0
= TREE_OPERAND (expr
, 0);
1878 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1879 *offset
= off0
+ int_cst_value (tmp
);
1885 op0
= TREE_OPERAND (expr
, 0);
1886 op0
= strip_offset_1 (op0
, true, true, &off0
);
1889 if (op0
== TREE_OPERAND (expr
, 0))
1892 expr
= build_fold_addr_expr (op0
);
1893 return fold_convert (orig_type
, expr
);
1896 inside_addr
= false;
1903 /* Default handling of expressions for that we want to recurse into
1904 the first operand. */
1905 op0
= TREE_OPERAND (expr
, 0);
1906 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1909 if (op0
== TREE_OPERAND (expr
, 0)
1910 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1913 expr
= copy_node (expr
);
1914 TREE_OPERAND (expr
, 0) = op0
;
1916 TREE_OPERAND (expr
, 1) = op1
;
1918 /* Inside address, we might strip the top level component references,
1919 thus changing type of the expression. Handling of ADDR_EXPR
1921 expr
= fold_convert (orig_type
, expr
);
1926 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1929 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1931 return strip_offset_1 (expr
, false, false, offset
);
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935 For integer types, we return unsigned variant of the type, which
1936 avoids problems with overflows. For pointer types, we return void *. */
1939 generic_type_for (tree type
)
1941 if (POINTER_TYPE_P (type
))
1942 return ptr_type_node
;
1944 if (TYPE_UNSIGNED (type
))
1947 return unsigned_type_for (type
);
1950 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1951 the bitmap to that we should store it. */
1953 static struct ivopts_data
*fd_ivopts_data
;
1955 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1957 bitmap
*depends_on
= data
;
1958 struct version_info
*info
;
1960 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1962 info
= name_info (fd_ivopts_data
, *expr_p
);
1964 if (!info
->inv_id
|| info
->has_nonlin_use
)
1968 *depends_on
= BITMAP_ALLOC (NULL
);
1969 bitmap_set_bit (*depends_on
, info
->inv_id
);
1974 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1975 position to POS. If USE is not NULL, the candidate is set as related to
1976 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1977 replacement of the final value of the iv by a direct computation. */
1979 static struct iv_cand
*
1980 add_candidate_1 (struct ivopts_data
*data
,
1981 tree base
, tree step
, bool important
, enum iv_position pos
,
1982 struct iv_use
*use
, tree incremented_at
)
1985 struct iv_cand
*cand
= NULL
;
1986 tree type
, orig_type
;
1990 orig_type
= TREE_TYPE (base
);
1991 type
= generic_type_for (orig_type
);
1992 if (type
!= orig_type
)
1994 base
= fold_convert (type
, base
);
1996 step
= fold_convert (type
, step
);
2000 for (i
= 0; i
< n_iv_cands (data
); i
++)
2002 cand
= iv_cand (data
, i
);
2004 if (cand
->pos
!= pos
)
2007 if (cand
->incremented_at
!= incremented_at
)
2021 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
2024 if (zero_p (cand
->iv
->step
))
2031 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2036 if (i
== n_iv_cands (data
))
2038 cand
= xcalloc (1, sizeof (struct iv_cand
));
2044 cand
->iv
= alloc_iv (base
, step
);
2047 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2049 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2050 cand
->var_after
= cand
->var_before
;
2052 cand
->important
= important
;
2053 cand
->incremented_at
= incremented_at
;
2054 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2057 && TREE_CODE (step
) != INTEGER_CST
)
2059 fd_ivopts_data
= data
;
2060 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2064 dump_cand (dump_file
, cand
);
2067 if (important
&& !cand
->important
)
2069 cand
->important
= true;
2070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2071 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2076 bitmap_set_bit (use
->related_cands
, i
);
2077 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2078 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2088 The purpose is to avoid splitting latch edge with a biv increment, thus
2089 creating a jump, possibly confusing other optimization passes and leaving
2090 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2091 is not available (so we do not have a better alternative), or if the latch
2092 edge is already nonempty. */
2095 allow_ip_end_pos_p (struct loop
*loop
)
2097 if (!ip_normal_pos (loop
))
2100 if (!empty_block_p (ip_end_pos (loop
)))
2106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2107 position to POS. If USE is not NULL, the candidate is set as related to
2108 it. The candidate computation is scheduled on all available positions. */
2111 add_candidate (struct ivopts_data
*data
,
2112 tree base
, tree step
, bool important
, struct iv_use
*use
)
2114 if (ip_normal_pos (data
->current_loop
))
2115 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2116 if (ip_end_pos (data
->current_loop
)
2117 && allow_ip_end_pos_p (data
->current_loop
))
2118 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122 type with SIZE bits. */
2125 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2128 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2129 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2133 /* Adds standard iv candidates. */
2136 add_standard_iv_candidates (struct ivopts_data
*data
)
2138 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2140 /* The same for a double-integer type if it is still fast enough. */
2141 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2142 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2146 /* Adds candidates bases on the old induction variable IV. */
2149 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2152 struct iv_cand
*cand
;
2154 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2156 /* The same, but with initial value zero. */
2157 add_candidate (data
,
2158 build_int_cst (TREE_TYPE (iv
->base
), 0),
2159 iv
->step
, true, NULL
);
2161 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2162 if (TREE_CODE (phi
) == PHI_NODE
)
2164 /* Additionally record the possibility of leaving the original iv
2166 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2167 cand
= add_candidate_1 (data
,
2168 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2169 SSA_NAME_DEF_STMT (def
));
2170 cand
->var_before
= iv
->ssa_name
;
2171 cand
->var_after
= def
;
2175 /* Adds candidates based on the old induction variables. */
2178 add_old_ivs_candidates (struct ivopts_data
*data
)
2184 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2186 iv
= ver_info (data
, i
)->iv
;
2187 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2188 add_old_iv_candidates (data
, iv
);
2192 /* Adds candidates based on the value of the induction variable IV and USE. */
2195 add_iv_value_candidates (struct ivopts_data
*data
,
2196 struct iv
*iv
, struct iv_use
*use
)
2198 unsigned HOST_WIDE_INT offset
;
2201 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2203 /* The same, but with initial value zero. Make such variable important,
2204 since it is generic enough so that possibly many uses may be based
2206 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2207 iv
->step
, true, use
);
2209 /* Third, try removing the constant offset. */
2210 base
= strip_offset (iv
->base
, &offset
);
2212 add_candidate (data
, base
, iv
->step
, false, use
);
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216 a direct computation. */
2219 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
2221 struct tree_niter_desc
*niter
;
2223 /* We must know where we exit the loop and how many times does it roll. */
2224 niter
= niter_for_single_dom_exit (data
);
2226 || !zero_p (niter
->may_be_zero
))
2229 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
2232 /* Adds candidates based on the uses. */
2235 add_derived_ivs_candidates (struct ivopts_data
*data
)
2239 for (i
= 0; i
< n_iv_uses (data
); i
++)
2241 struct iv_use
*use
= iv_use (data
, i
);
2248 case USE_NONLINEAR_EXPR
:
2251 /* Just add the ivs based on the value of the iv used here. */
2252 add_iv_value_candidates (data
, use
->iv
, use
);
2256 add_iv_value_candidates (data
, use
->iv
, use
);
2258 /* Additionally, add the pseudocandidate for the possibility to
2259 replace the final value by a direct computation. */
2260 add_iv_outer_candidates (data
, use
);
2269 /* Record important candidates and add them to related_cands bitmaps
2273 record_important_candidates (struct ivopts_data
*data
)
2278 for (i
= 0; i
< n_iv_cands (data
); i
++)
2280 struct iv_cand
*cand
= iv_cand (data
, i
);
2282 if (cand
->important
)
2283 bitmap_set_bit (data
->important_candidates
, i
);
2286 data
->consider_all_candidates
= (n_iv_cands (data
)
2287 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2289 if (data
->consider_all_candidates
)
2291 /* We will not need "related_cands" bitmaps in this case,
2292 so release them to decrease peak memory consumption. */
2293 for (i
= 0; i
< n_iv_uses (data
); i
++)
2295 use
= iv_use (data
, i
);
2296 BITMAP_FREE (use
->related_cands
);
2301 /* Add important candidates to the related_cands bitmaps. */
2302 for (i
= 0; i
< n_iv_uses (data
); i
++)
2303 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2304 data
->important_candidates
);
2308 /* Finds the candidates for the induction variables. */
2311 find_iv_candidates (struct ivopts_data
*data
)
2313 /* Add commonly used ivs. */
2314 add_standard_iv_candidates (data
);
2316 /* Add old induction variables. */
2317 add_old_ivs_candidates (data
);
2319 /* Add induction variables derived from uses. */
2320 add_derived_ivs_candidates (data
);
2322 /* Record the important candidates. */
2323 record_important_candidates (data
);
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328 we allocate a simple list to every use. */
2331 alloc_use_cost_map (struct ivopts_data
*data
)
2333 unsigned i
, size
, s
, j
;
2335 for (i
= 0; i
< n_iv_uses (data
); i
++)
2337 struct iv_use
*use
= iv_use (data
, i
);
2340 if (data
->consider_all_candidates
)
2341 size
= n_iv_cands (data
);
2345 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2350 /* Round up to the power of two, so that moduling by it is fast. */
2351 for (size
= 1; size
< s
; size
<<= 1)
2355 use
->n_map_members
= size
;
2356 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361 on invariants DEPENDS_ON and that the value used in expressing it
2365 set_use_iv_cost (struct ivopts_data
*data
,
2366 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2367 bitmap depends_on
, tree value
)
2373 BITMAP_FREE (depends_on
);
2377 if (data
->consider_all_candidates
)
2379 use
->cost_map
[cand
->id
].cand
= cand
;
2380 use
->cost_map
[cand
->id
].cost
= cost
;
2381 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2382 use
->cost_map
[cand
->id
].value
= value
;
2386 /* n_map_members is a power of two, so this computes modulo. */
2387 s
= cand
->id
& (use
->n_map_members
- 1);
2388 for (i
= s
; i
< use
->n_map_members
; i
++)
2389 if (!use
->cost_map
[i
].cand
)
2391 for (i
= 0; i
< s
; i
++)
2392 if (!use
->cost_map
[i
].cand
)
2398 use
->cost_map
[i
].cand
= cand
;
2399 use
->cost_map
[i
].cost
= cost
;
2400 use
->cost_map
[i
].depends_on
= depends_on
;
2401 use
->cost_map
[i
].value
= value
;
2404 /* Gets cost of (USE, CANDIDATE) pair. */
2406 static struct cost_pair
*
2407 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2408 struct iv_cand
*cand
)
2411 struct cost_pair
*ret
;
2416 if (data
->consider_all_candidates
)
2418 ret
= use
->cost_map
+ cand
->id
;
2425 /* n_map_members is a power of two, so this computes modulo. */
2426 s
= cand
->id
& (use
->n_map_members
- 1);
2427 for (i
= s
; i
< use
->n_map_members
; i
++)
2428 if (use
->cost_map
[i
].cand
== cand
)
2429 return use
->cost_map
+ i
;
2431 for (i
= 0; i
< s
; i
++)
2432 if (use
->cost_map
[i
].cand
== cand
)
2433 return use
->cost_map
+ i
;
2438 /* Returns estimate on cost of computing SEQ. */
2446 for (; seq
; seq
= NEXT_INSN (seq
))
2448 set
= single_set (seq
);
2450 cost
+= rtx_cost (set
, SET
);
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2460 produce_memory_decl_rtl (tree obj
, int *regno
)
2465 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2467 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2468 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2471 x
= gen_raw_REG (Pmode
, (*regno
)++);
2473 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2476 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2477 walk_tree. DATA contains the actual fake register number. */
2480 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2482 tree obj
= NULL_TREE
;
2486 switch (TREE_CODE (*expr_p
))
2489 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2490 handled_component_p (*expr_p
);
2491 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2495 x
= produce_memory_decl_rtl (obj
, regno
);
2500 obj
= SSA_NAME_VAR (*expr_p
);
2501 if (!DECL_RTL_SET_P (obj
))
2502 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2511 if (DECL_RTL_SET_P (obj
))
2514 if (DECL_MODE (obj
) == BLKmode
)
2515 x
= produce_memory_decl_rtl (obj
, regno
);
2517 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2527 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2528 SET_DECL_RTL (obj
, x
);
2534 /* Determines cost of the computation of EXPR. */
2537 computation_cost (tree expr
)
2540 tree type
= TREE_TYPE (expr
);
2542 /* Avoid using hard regs in ways which may be unsupported. */
2543 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2545 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2547 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2551 cost
= seq_cost (seq
);
2553 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2558 /* Returns variable containing the value of candidate CAND at statement AT. */
2561 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2563 if (stmt_after_increment (loop
, cand
, stmt
))
2564 return cand
->var_after
;
2566 return cand
->var_before
;
2569 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2570 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2573 tree_int_cst_sign_bit (tree t
)
2575 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2576 unsigned HOST_WIDE_INT w
;
2578 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2579 w
= TREE_INT_CST_LOW (t
);
2582 w
= TREE_INT_CST_HIGH (t
);
2583 bitno
-= HOST_BITS_PER_WIDE_INT
;
2586 return (w
>> bitno
) & 1;
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590 return cst. Otherwise return NULL_TREE. */
2593 constant_multiple_of (tree type
, tree top
, tree bot
)
2595 tree res
, mby
, p0
, p1
;
2596 enum tree_code code
;
2602 if (operand_equal_p (top
, bot
, 0))
2603 return build_int_cst (type
, 1);
2605 code
= TREE_CODE (top
);
2609 mby
= TREE_OPERAND (top
, 1);
2610 if (TREE_CODE (mby
) != INTEGER_CST
)
2613 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2617 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2618 fold_convert (type
, mby
));
2622 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2625 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2629 return fold_binary_to_constant (code
, type
, p0
, p1
);
2632 if (TREE_CODE (bot
) != INTEGER_CST
)
2635 bot
= fold_convert (type
, bot
);
2636 top
= fold_convert (type
, top
);
2638 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639 the result afterwards. */
2640 if (tree_int_cst_sign_bit (bot
))
2643 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2648 /* Ditto for TOP. */
2649 if (tree_int_cst_sign_bit (top
))
2652 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2655 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2658 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2660 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2668 /* Sets COMB to CST. */
2671 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2672 unsigned HOST_WIDE_INT cst
)
2674 unsigned prec
= TYPE_PRECISION (type
);
2677 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2680 comb
->rest
= NULL_TREE
;
2681 comb
->offset
= cst
& comb
->mask
;
2684 /* Sets COMB to single element ELT. */
2687 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2689 unsigned prec
= TYPE_PRECISION (type
);
2692 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2695 comb
->elts
[0] = elt
;
2697 comb
->rest
= NULL_TREE
;
2701 /* Scales COMB by SCALE. */
2704 aff_combination_scale (struct affine_tree_combination
*comb
,
2705 unsigned HOST_WIDE_INT scale
)
2714 aff_combination_const (comb
, comb
->type
, 0);
2718 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2719 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2721 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2722 comb
->elts
[j
] = comb
->elts
[i
];
2723 if (comb
->coefs
[j
] != 0)
2730 if (comb
->n
< MAX_AFF_ELTS
)
2732 comb
->coefs
[comb
->n
] = scale
;
2733 comb
->elts
[comb
->n
] = comb
->rest
;
2734 comb
->rest
= NULL_TREE
;
2738 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2739 build_int_cst_type (comb
->type
, scale
));
2743 /* Adds ELT * SCALE to COMB. */
2746 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2747 unsigned HOST_WIDE_INT scale
)
2754 for (i
= 0; i
< comb
->n
; i
++)
2755 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2757 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2762 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2763 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2766 if (comb
->n
< MAX_AFF_ELTS
)
2768 comb
->coefs
[comb
->n
] = scale
;
2769 comb
->elts
[comb
->n
] = elt
;
2775 elt
= fold_convert (comb
->type
, elt
);
2777 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2778 fold_convert (comb
->type
, elt
),
2779 build_int_cst_type (comb
->type
, scale
));
2782 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2787 /* Adds COMB2 to COMB1. */
2790 aff_combination_add (struct affine_tree_combination
*comb1
,
2791 struct affine_tree_combination
*comb2
)
2795 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2796 for (i
= 0; i
< comb2
-> n
; i
++)
2797 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2799 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2802 /* Splits EXPR into an affine combination of parts. */
2805 tree_to_aff_combination (tree expr
, tree type
,
2806 struct affine_tree_combination
*comb
)
2808 struct affine_tree_combination tmp
;
2809 enum tree_code code
;
2810 tree cst
, core
, toffset
;
2811 HOST_WIDE_INT bitpos
, bitsize
;
2812 enum machine_mode mode
;
2813 int unsignedp
, volatilep
;
2817 code
= TREE_CODE (expr
);
2821 aff_combination_const (comb
, type
, int_cst_value (expr
));
2826 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2827 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2828 if (code
== MINUS_EXPR
)
2829 aff_combination_scale (&tmp
, -1);
2830 aff_combination_add (comb
, &tmp
);
2834 cst
= TREE_OPERAND (expr
, 1);
2835 if (TREE_CODE (cst
) != INTEGER_CST
)
2837 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2838 aff_combination_scale (comb
, int_cst_value (cst
));
2842 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2843 aff_combination_scale (comb
, -1);
2847 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2848 &toffset
, &mode
, &unsignedp
, &volatilep
,
2850 if (bitpos
% BITS_PER_UNIT
!= 0)
2852 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2853 core
= build_fold_addr_expr (core
);
2854 if (TREE_CODE (core
) == ADDR_EXPR
)
2855 aff_combination_add_elt (comb
, core
, 1);
2858 tree_to_aff_combination (core
, type
, &tmp
);
2859 aff_combination_add (comb
, &tmp
);
2863 tree_to_aff_combination (toffset
, type
, &tmp
);
2864 aff_combination_add (comb
, &tmp
);
2872 aff_combination_elt (comb
, type
, expr
);
2875 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2878 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2879 unsigned HOST_WIDE_INT mask
)
2881 enum tree_code code
;
2884 elt
= fold_convert (type
, elt
);
2891 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2897 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2899 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2903 return fold_build2 (MULT_EXPR
, type
, elt
,
2904 build_int_cst_type (type
, scale
));
2906 if ((scale
| (mask
>> 1)) == mask
)
2908 /* Scale is negative. */
2910 scale
= (-scale
) & mask
;
2915 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2916 build_int_cst_type (type
, scale
));
2917 return fold_build2 (code
, type
, expr
, elt
);
2920 /* Copies the tree elements of COMB to ensure that they are not shared. */
2923 unshare_aff_combination (struct affine_tree_combination
*comb
)
2927 for (i
= 0; i
< comb
->n
; i
++)
2928 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2930 comb
->rest
= unshare_expr (comb
->rest
);
2933 /* Makes tree from the affine combination COMB. */
2936 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2938 tree type
= comb
->type
;
2939 tree expr
= comb
->rest
;
2941 unsigned HOST_WIDE_INT off
, sgn
;
2943 /* Handle the special case produced by get_computation_aff when
2944 the type does not fit in HOST_WIDE_INT. */
2945 if (comb
->n
== 0 && comb
->offset
== 0)
2946 return fold_convert (type
, expr
);
2948 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2950 for (i
= 0; i
< comb
->n
; i
++)
2951 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2954 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2956 /* Offset is negative. */
2957 off
= (-comb
->offset
) & comb
->mask
;
2965 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2969 /* Determines the expression by that USE is expressed from induction variable
2970 CAND at statement AT in LOOP. The expression is stored in a decomposed
2971 form into AFF. Returns false if USE cannot be expressed using CAND. */
2974 get_computation_aff (struct loop
*loop
,
2975 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2976 struct affine_tree_combination
*aff
)
2978 tree ubase
= use
->iv
->base
;
2979 tree ustep
= use
->iv
->step
;
2980 tree cbase
= cand
->iv
->base
;
2981 tree cstep
= cand
->iv
->step
;
2982 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2986 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2987 HOST_WIDE_INT ratioi
;
2988 struct affine_tree_combination cbase_aff
, expr_aff
;
2989 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
2991 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2993 /* We do not have a precision to express the values of use. */
2997 expr
= var_at_stmt (loop
, cand
, at
);
2999 if (TREE_TYPE (expr
) != ctype
)
3001 /* This may happen with the original ivs. */
3002 expr
= fold_convert (ctype
, expr
);
3005 if (TYPE_UNSIGNED (utype
))
3009 uutype
= unsigned_type_for (utype
);
3010 ubase
= fold_convert (uutype
, ubase
);
3011 ustep
= fold_convert (uutype
, ustep
);
3014 if (uutype
!= ctype
)
3016 expr
= fold_convert (uutype
, expr
);
3017 cbase
= fold_convert (uutype
, cbase
);
3018 cstep
= fold_convert (uutype
, cstep
);
3020 /* If the conversion is not noop, we must take it into account when
3021 considering the value of the step. */
3022 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3026 if (cst_and_fits_in_hwi (cstep_orig
)
3027 && cst_and_fits_in_hwi (ustep_orig
))
3029 ustepi
= int_cst_value (ustep_orig
);
3030 cstepi
= int_cst_value (cstep_orig
);
3032 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
3034 /* TODO maybe consider case when ustep divides cstep and the ratio is
3035 a power of 2 (so that the division is fast to execute)? We would
3036 need to be much more careful with overflows etc. then. */
3040 ratio
= build_int_cst_type (uutype
, ratioi
);
3044 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
3048 /* Ratioi is only used to detect special cases when the multiplicative
3049 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3050 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3051 to integer_onep/integer_all_onesp, since the former ignores
3053 if (cst_and_fits_in_hwi (ratio
))
3054 ratioi
= int_cst_value (ratio
);
3055 else if (integer_onep (ratio
))
3057 else if (integer_all_onesp (ratio
))
3063 /* We may need to shift the value if we are after the increment. */
3064 if (stmt_after_increment (loop
, cand
, at
))
3065 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3067 /* use = ubase - ratio * cbase + ratio * var.
3069 In general case ubase + ratio * (var - cbase) could be better (one less
3070 multiplication), but often it is possible to eliminate redundant parts
3071 of computations from (ubase - ratio * cbase) term, and if it does not
3072 happen, fold is able to apply the distributive law to obtain this form
3075 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3077 /* Let's compute in trees and just return the result in AFF. This case
3078 should not be very common, and fold itself is not that bad either,
3079 so making the aff. functions more complicated to handle this case
3080 is not that urgent. */
3083 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3084 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3086 else if (ratioi
== -1)
3088 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3089 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3093 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3094 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3095 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3096 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3107 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3108 possible to compute ratioi. */
3109 gcc_assert (ratioi
);
3111 tree_to_aff_combination (ubase
, uutype
, aff
);
3112 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3113 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3114 aff_combination_scale (&cbase_aff
, -ratioi
);
3115 aff_combination_scale (&expr_aff
, ratioi
);
3116 aff_combination_add (aff
, &cbase_aff
);
3117 aff_combination_add (aff
, &expr_aff
);
3122 /* Determines the expression by that USE is expressed from induction variable
3123 CAND at statement AT in LOOP. The computation is unshared. */
3126 get_computation_at (struct loop
*loop
,
3127 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3129 struct affine_tree_combination aff
;
3130 tree type
= TREE_TYPE (use
->iv
->base
);
3132 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3134 unshare_aff_combination (&aff
);
3135 return fold_convert (type
, aff_combination_to_tree (&aff
));
3138 /* Determines the expression by that USE is expressed from induction variable
3139 CAND in LOOP. The computation is unshared. */
3142 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3144 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3147 /* Returns cost of addition in MODE. */
3150 add_cost (enum machine_mode mode
)
3152 static unsigned costs
[NUM_MACHINE_MODES
];
3160 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3161 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3162 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3167 cost
= seq_cost (seq
);
3173 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3174 fprintf (dump_file
, "Addition in %s costs %d\n",
3175 GET_MODE_NAME (mode
), cost
);
3179 /* Entry in a hashtable of already known costs for multiplication. */
3182 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3183 enum machine_mode mode
; /* In mode. */
3184 unsigned cost
; /* The cost. */
3187 /* Counts hash value for the ENTRY. */
3190 mbc_entry_hash (const void *entry
)
3192 const struct mbc_entry
*e
= entry
;
3194 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3197 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3200 mbc_entry_eq (const void *entry1
, const void *entry2
)
3202 const struct mbc_entry
*e1
= entry1
;
3203 const struct mbc_entry
*e2
= entry2
;
3205 return (e1
->mode
== e2
->mode
3206 && e1
->cst
== e2
->cst
);
3209 /* Returns cost of multiplication by constant CST in MODE. */
3212 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3214 static htab_t costs
;
3215 struct mbc_entry
**cached
, act
;
3220 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3224 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3226 return (*cached
)->cost
;
3228 *cached
= xmalloc (sizeof (struct mbc_entry
));
3229 (*cached
)->mode
= mode
;
3230 (*cached
)->cst
= cst
;
3233 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3234 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3238 cost
= seq_cost (seq
);
3240 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3241 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3242 (int) cst
, GET_MODE_NAME (mode
), cost
);
3244 (*cached
)->cost
= cost
;
3249 /* Returns true if multiplying by RATIO is allowed in address. */
3252 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3254 #define MAX_RATIO 128
3255 static sbitmap valid_mult
;
3259 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3263 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3264 sbitmap_zero (valid_mult
);
3265 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3266 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3268 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3269 if (memory_address_p (Pmode
, addr
))
3270 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3273 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3275 fprintf (dump_file
, " allowed multipliers:");
3276 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3277 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3278 fprintf (dump_file
, " %d", (int) i
);
3279 fprintf (dump_file
, "\n");
3280 fprintf (dump_file
, "\n");
3284 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3287 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3290 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3291 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3292 variable is omitted. The created memory accesses MODE.
3294 TODO -- there must be some better way. This all is quite crude. */
3297 get_address_cost (bool symbol_present
, bool var_present
,
3298 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3300 static bool initialized
= false;
3301 static HOST_WIDE_INT rat
, off
;
3302 static HOST_WIDE_INT min_offset
, max_offset
;
3303 static unsigned costs
[2][2][2][2];
3304 unsigned cost
, acost
;
3305 rtx seq
, addr
, base
;
3306 bool offset_p
, ratio_p
;
3308 HOST_WIDE_INT s_offset
;
3309 unsigned HOST_WIDE_INT mask
;
3317 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3319 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3320 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3322 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3323 if (!memory_address_p (Pmode
, addr
))
3326 max_offset
= i
>> 1;
3329 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3331 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3332 if (!memory_address_p (Pmode
, addr
))
3335 min_offset
= -(i
>> 1);
3337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3339 fprintf (dump_file
, "get_address_cost:\n");
3340 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3341 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3345 for (i
= 2; i
<= MAX_RATIO
; i
++)
3346 if (multiplier_allowed_in_address_p (i
))
3353 bits
= GET_MODE_BITSIZE (Pmode
);
3354 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3356 if ((offset
>> (bits
- 1) & 1))
3361 offset_p
= (s_offset
!= 0
3362 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3363 ratio_p
= (ratio
!= 1
3364 && multiplier_allowed_in_address_p (ratio
));
3366 if (ratio
!= 1 && !ratio_p
)
3367 cost
+= multiply_by_cost (ratio
, Pmode
);
3369 if (s_offset
&& !offset_p
&& !symbol_present
)
3371 cost
+= add_cost (Pmode
);
3375 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3380 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3381 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3383 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3386 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3390 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3392 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3393 gen_rtx_fmt_ee (PLUS
, Pmode
,
3395 gen_int_mode (off
, Pmode
)));
3398 base
= gen_int_mode (off
, Pmode
);
3403 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3406 addr
= memory_address (Pmode
, addr
);
3410 acost
= seq_cost (seq
);
3411 acost
+= address_cost (addr
, Pmode
);
3415 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3418 return cost
+ acost
;
3421 /* Estimates cost of forcing expression EXPR into a variable. */
3424 force_expr_to_var_cost (tree expr
)
3426 static bool costs_initialized
= false;
3427 static unsigned integer_cost
;
3428 static unsigned symbol_cost
;
3429 static unsigned address_cost
;
3431 unsigned cost0
, cost1
, cost
;
3432 enum machine_mode mode
;
3434 if (!costs_initialized
)
3436 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3437 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3438 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3440 tree type
= build_pointer_type (integer_type_node
);
3442 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3445 SET_DECL_RTL (var
, x
);
3446 TREE_STATIC (var
) = 1;
3447 addr
= build1 (ADDR_EXPR
, type
, var
);
3448 symbol_cost
= computation_cost (addr
) + 1;
3451 = computation_cost (build2 (PLUS_EXPR
, type
,
3453 build_int_cst_type (type
, 2000))) + 1;
3454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3456 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3457 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3458 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3459 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3460 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3461 fprintf (dump_file
, "\n");
3464 costs_initialized
= true;
3469 if (SSA_VAR_P (expr
))
3472 if (TREE_INVARIANT (expr
))
3474 if (TREE_CODE (expr
) == INTEGER_CST
)
3475 return integer_cost
;
3477 if (TREE_CODE (expr
) == ADDR_EXPR
)
3479 tree obj
= TREE_OPERAND (expr
, 0);
3481 if (TREE_CODE (obj
) == VAR_DECL
3482 || TREE_CODE (obj
) == PARM_DECL
3483 || TREE_CODE (obj
) == RESULT_DECL
)
3487 return address_cost
;
3490 switch (TREE_CODE (expr
))
3495 op0
= TREE_OPERAND (expr
, 0);
3496 op1
= TREE_OPERAND (expr
, 1);
3500 if (is_gimple_val (op0
))
3503 cost0
= force_expr_to_var_cost (op0
);
3505 if (is_gimple_val (op1
))
3508 cost1
= force_expr_to_var_cost (op1
);
3513 /* Just an arbitrary value, FIXME. */
3514 return target_spill_cost
;
3517 mode
= TYPE_MODE (TREE_TYPE (expr
));
3518 switch (TREE_CODE (expr
))
3522 cost
= add_cost (mode
);
3526 if (cst_and_fits_in_hwi (op0
))
3527 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3528 else if (cst_and_fits_in_hwi (op1
))
3529 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3531 return target_spill_cost
;
3541 /* Bound the cost by target_spill_cost. The parts of complicated
3542 computations often are either loop invariant or at least can
3543 be shared between several iv uses, so letting this grow without
3544 limits would not give reasonable results. */
3545 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3548 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3549 invariants the computation depends on. */
3552 force_var_cost (struct ivopts_data
*data
,
3553 tree expr
, bitmap
*depends_on
)
3557 fd_ivopts_data
= data
;
3558 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3561 return force_expr_to_var_cost (expr
);
3564 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3565 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3566 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3567 invariants the computation depends on. */
3570 split_address_cost (struct ivopts_data
*data
,
3571 tree addr
, bool *symbol_present
, bool *var_present
,
3572 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3575 HOST_WIDE_INT bitsize
;
3576 HOST_WIDE_INT bitpos
;
3578 enum machine_mode mode
;
3579 int unsignedp
, volatilep
;
3581 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3582 &unsignedp
, &volatilep
, false);
3585 || bitpos
% BITS_PER_UNIT
!= 0
3586 || TREE_CODE (core
) != VAR_DECL
)
3588 *symbol_present
= false;
3589 *var_present
= true;
3590 fd_ivopts_data
= data
;
3591 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3592 return target_spill_cost
;
3595 *offset
+= bitpos
/ BITS_PER_UNIT
;
3596 if (TREE_STATIC (core
)
3597 || DECL_EXTERNAL (core
))
3599 *symbol_present
= true;
3600 *var_present
= false;
3604 *symbol_present
= false;
3605 *var_present
= true;
3609 /* Estimates cost of expressing difference of addresses E1 - E2 as
3610 var + symbol + offset. The value of offset is added to OFFSET,
3611 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3612 part is missing. DEPENDS_ON is a set of the invariants the computation
3616 ptr_difference_cost (struct ivopts_data
*data
,
3617 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3618 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3620 HOST_WIDE_INT diff
= 0;
3623 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3625 if (ptr_difference_const (e1
, e2
, &diff
))
3628 *symbol_present
= false;
3629 *var_present
= false;
3633 if (e2
== integer_zero_node
)
3634 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3635 symbol_present
, var_present
, offset
, depends_on
);
3637 *symbol_present
= false;
3638 *var_present
= true;
3640 cost
= force_var_cost (data
, e1
, depends_on
);
3641 cost
+= force_var_cost (data
, e2
, depends_on
);
3642 cost
+= add_cost (Pmode
);
3647 /* Estimates cost of expressing difference E1 - E2 as
3648 var + symbol + offset. The value of offset is added to OFFSET,
3649 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3650 part is missing. DEPENDS_ON is a set of the invariants the computation
3654 difference_cost (struct ivopts_data
*data
,
3655 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3656 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3659 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3660 unsigned HOST_WIDE_INT off1
, off2
;
3662 e1
= strip_offset (e1
, &off1
);
3663 e2
= strip_offset (e2
, &off2
);
3664 *offset
+= off1
- off2
;
3669 if (TREE_CODE (e1
) == ADDR_EXPR
)
3670 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3672 *symbol_present
= false;
3674 if (operand_equal_p (e1
, e2
, 0))
3676 *var_present
= false;
3679 *var_present
= true;
3681 return force_var_cost (data
, e1
, depends_on
);
3685 cost
= force_var_cost (data
, e2
, depends_on
);
3686 cost
+= multiply_by_cost (-1, mode
);
3691 cost
= force_var_cost (data
, e1
, depends_on
);
3692 cost
+= force_var_cost (data
, e2
, depends_on
);
3693 cost
+= add_cost (mode
);
3698 /* Determines the cost of the computation by that USE is expressed
3699 from induction variable CAND. If ADDRESS_P is true, we just need
3700 to create an address from it, otherwise we want to get it into
3701 register. A set of invariants we depend on is stored in
3702 DEPENDS_ON. AT is the statement at that the value is computed. */
3705 get_computation_cost_at (struct ivopts_data
*data
,
3706 struct iv_use
*use
, struct iv_cand
*cand
,
3707 bool address_p
, bitmap
*depends_on
, tree at
)
3709 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3711 tree utype
= TREE_TYPE (ubase
), ctype
;
3712 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3713 HOST_WIDE_INT ratio
, aratio
;
3714 bool var_present
, symbol_present
;
3715 unsigned cost
= 0, n_sums
;
3719 /* Only consider real candidates. */
3723 cbase
= cand
->iv
->base
;
3724 cstep
= cand
->iv
->step
;
3725 ctype
= TREE_TYPE (cbase
);
3727 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3729 /* We do not have a precision to express the values of use. */
3735 /* Do not try to express address of an object with computation based
3736 on address of a different object. This may cause problems in rtl
3737 level alias analysis (that does not expect this to be happening,
3738 as this is illegal in C), and would be unlikely to be useful
3740 if (use
->iv
->base_object
3741 && cand
->iv
->base_object
3742 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3746 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3748 /* TODO -- add direct handling of this case. */
3752 /* CSTEPI is removed from the offset in case statement is after the
3753 increment. If the step is not constant, we use zero instead.
3754 This is a bit imprecise (there is the extra addition), but
3755 redundancy elimination is likely to transform the code so that
3756 it uses value of the variable before increment anyway,
3757 so it is not that much unrealistic. */
3758 if (cst_and_fits_in_hwi (cstep
))
3759 cstepi
= int_cst_value (cstep
);
3763 if (cst_and_fits_in_hwi (ustep
)
3764 && cst_and_fits_in_hwi (cstep
))
3766 ustepi
= int_cst_value (ustep
);
3768 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3775 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3780 if (cst_and_fits_in_hwi (rat
))
3781 ratio
= int_cst_value (rat
);
3782 else if (integer_onep (rat
))
3784 else if (integer_all_onesp (rat
))
3790 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3791 or ratio == 1, it is better to handle this like
3793 ubase - ratio * cbase + ratio * var
3795 (also holds in the case ratio == -1, TODO. */
3797 if (cst_and_fits_in_hwi (cbase
))
3799 offset
= - ratio
* int_cst_value (cbase
);
3800 cost
+= difference_cost (data
,
3801 ubase
, integer_zero_node
,
3802 &symbol_present
, &var_present
, &offset
,
3805 else if (ratio
== 1)
3807 cost
+= difference_cost (data
,
3809 &symbol_present
, &var_present
, &offset
,
3814 cost
+= force_var_cost (data
, cbase
, depends_on
);
3815 cost
+= add_cost (TYPE_MODE (ctype
));
3816 cost
+= difference_cost (data
,
3817 ubase
, integer_zero_node
,
3818 &symbol_present
, &var_present
, &offset
,
3822 /* If we are after the increment, the value of the candidate is higher by
3824 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3825 offset
-= ratio
* cstepi
;
3827 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3828 (symbol/var/const parts may be omitted). If we are looking for an address,
3829 find the cost of addressing this. */
3831 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3833 /* Otherwise estimate the costs for computing the expression. */
3834 aratio
= ratio
> 0 ? ratio
: -ratio
;
3835 if (!symbol_present
&& !var_present
&& !offset
)
3838 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3844 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3848 /* Symbol + offset should be compile-time computable. */
3849 && (symbol_present
|| offset
))
3852 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3856 /* Just get the expression, expand it and measure the cost. */
3857 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3863 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3865 return computation_cost (comp
);
3869 /* Determines the cost of the computation by that USE is expressed
3870 from induction variable CAND. If ADDRESS_P is true, we just need
3871 to create an address from it, otherwise we want to get it into
3872 register. A set of invariants we depend on is stored in
3876 get_computation_cost (struct ivopts_data
*data
,
3877 struct iv_use
*use
, struct iv_cand
*cand
,
3878 bool address_p
, bitmap
*depends_on
)
3880 return get_computation_cost_at (data
,
3881 use
, cand
, address_p
, depends_on
, use
->stmt
);
3884 /* Determines cost of basing replacement of USE on CAND in a generic
3888 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3889 struct iv_use
*use
, struct iv_cand
*cand
)
3894 /* The simple case first -- if we need to express value of the preserved
3895 original biv, the cost is 0. This also prevents us from counting the
3896 cost of increment twice -- once at this use and once in the cost of
3898 if (cand
->pos
== IP_ORIGINAL
3899 && cand
->incremented_at
== use
->stmt
)
3901 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3905 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3906 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3908 return cost
!= INFTY
;
3911 /* Determines cost of basing replacement of USE on CAND in an address. */
3914 determine_use_iv_cost_address (struct ivopts_data
*data
,
3915 struct iv_use
*use
, struct iv_cand
*cand
)
3918 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3920 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3922 return cost
!= INFTY
;
3925 /* Computes value of induction variable IV in iteration NITER. */
3928 iv_value (struct iv
*iv
, tree niter
)
3931 tree type
= TREE_TYPE (iv
->base
);
3933 niter
= fold_convert (type
, niter
);
3934 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3936 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3939 /* Computes value of candidate CAND at position AT in iteration NITER. */
3942 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3944 tree val
= iv_value (cand
->iv
, niter
);
3945 tree type
= TREE_TYPE (cand
->iv
->base
);
3947 if (stmt_after_increment (loop
, cand
, at
))
3948 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3953 /* Returns period of induction variable iv. */
3956 iv_period (struct iv
*iv
)
3958 tree step
= iv
->step
, period
, type
;
3961 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3963 /* Period of the iv is gcd (step, type range). Since type range is power
3964 of two, it suffices to determine the maximum power of two that divides
3966 pow2div
= num_ending_zeros (step
);
3967 type
= unsigned_type_for (TREE_TYPE (step
));
3969 period
= build_low_bits_mask (type
,
3970 (TYPE_PRECISION (type
)
3971 - tree_low_cst (pow2div
, 1)));
3976 /* Returns the comparison operator used when eliminating the iv USE. */
3978 static enum tree_code
3979 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3981 struct loop
*loop
= data
->current_loop
;
3985 ex_bb
= bb_for_stmt (use
->stmt
);
3986 exit
= EDGE_SUCC (ex_bb
, 0);
3987 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3988 exit
= EDGE_SUCC (ex_bb
, 1);
3990 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3993 /* Check whether it is possible to express the condition in USE by comparison
3994 of candidate CAND. If so, store the value compared with to BOUND. */
3997 may_eliminate_iv (struct ivopts_data
*data
,
3998 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4002 struct tree_niter_desc
*niter
;
4004 tree wider_type
, period
, per_type
;
4005 struct loop
*loop
= data
->current_loop
;
4007 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4010 /* For now works only for exits that dominate the loop latch. TODO -- extend
4011 for other conditions inside loop body. */
4012 ex_bb
= bb_for_stmt (use
->stmt
);
4013 if (use
->stmt
!= last_stmt (ex_bb
)
4014 || TREE_CODE (use
->stmt
) != COND_EXPR
)
4016 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4019 exit
= EDGE_SUCC (ex_bb
, 0);
4020 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4021 exit
= EDGE_SUCC (ex_bb
, 1);
4022 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4025 niter
= niter_for_exit (data
, exit
);
4027 || !zero_p (niter
->may_be_zero
))
4031 nit_type
= TREE_TYPE (nit
);
4033 /* Determine whether we may use the variable to test whether niter iterations
4034 elapsed. This is the case iff the period of the induction variable is
4035 greater than the number of iterations. */
4036 period
= iv_period (cand
->iv
);
4039 per_type
= TREE_TYPE (period
);
4041 wider_type
= TREE_TYPE (period
);
4042 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4043 wider_type
= per_type
;
4045 wider_type
= nit_type
;
4047 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
4048 fold_convert (wider_type
, period
),
4049 fold_convert (wider_type
, nit
))))
4052 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
4056 /* Determines cost of basing replacement of USE on CAND in a condition. */
4059 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4060 struct iv_use
*use
, struct iv_cand
*cand
)
4062 tree bound
= NULL_TREE
, op
, cond
;
4063 bitmap depends_on
= NULL
;
4066 /* Only consider real candidates. */
4069 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4073 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4075 cost
= force_var_cost (data
, bound
, &depends_on
);
4077 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4078 return cost
!= INFTY
;
4081 /* The induction variable elimination failed; just express the original
4082 giv. If it is compared with an invariant, note that we cannot get
4084 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4087 if (TREE_CODE (cond
) != SSA_NAME
)
4089 op
= TREE_OPERAND (cond
, 0);
4090 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4091 op
= TREE_OPERAND (cond
, 1);
4092 if (TREE_CODE (op
) == SSA_NAME
)
4094 op
= get_iv (data
, op
)->base
;
4095 fd_ivopts_data
= data
;
4096 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4100 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4101 return cost
!= INFTY
;
4104 /* Checks whether it is possible to replace the final value of USE by
4105 a direct computation. If so, the formula is stored to *VALUE. */
4108 may_replace_final_value (struct ivopts_data
*data
, struct iv_use
*use
,
4111 struct loop
*loop
= data
->current_loop
;
4113 struct tree_niter_desc
*niter
;
4115 exit
= single_dom_exit (loop
);
4119 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
4120 bb_for_stmt (use
->stmt
)));
4122 niter
= niter_for_single_dom_exit (data
);
4124 || !zero_p (niter
->may_be_zero
))
4127 *value
= iv_value (use
->iv
, niter
->niter
);
4132 /* Determines cost of replacing final value of USE using CAND. */
4135 determine_use_iv_cost_outer (struct ivopts_data
*data
,
4136 struct iv_use
*use
, struct iv_cand
*cand
)
4141 tree value
= NULL_TREE
;
4142 struct loop
*loop
= data
->current_loop
;
4144 /* The simple case first -- if we need to express value of the preserved
4145 original biv, the cost is 0. This also prevents us from counting the
4146 cost of increment twice -- once at this use and once in the cost of
4148 if (cand
->pos
== IP_ORIGINAL
4149 && cand
->incremented_at
== use
->stmt
)
4151 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
4157 if (!may_replace_final_value (data
, use
, &value
))
4159 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4164 cost
= force_var_cost (data
, value
, &depends_on
);
4166 cost
/= AVG_LOOP_NITER (loop
);
4168 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, value
);
4169 return cost
!= INFTY
;
4172 exit
= single_dom_exit (loop
);
4175 /* If there is just a single exit, we may use value of the candidate
4176 after we take it to determine the value of use. */
4177 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
4178 last_stmt (exit
->src
));
4180 cost
/= AVG_LOOP_NITER (loop
);
4184 /* Otherwise we just need to compute the iv. */
4185 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4188 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4190 return cost
!= INFTY
;
4193 /* Determines cost of basing replacement of USE on CAND. Returns false
4194 if USE cannot be based on CAND. */
4197 determine_use_iv_cost (struct ivopts_data
*data
,
4198 struct iv_use
*use
, struct iv_cand
*cand
)
4202 case USE_NONLINEAR_EXPR
:
4203 return determine_use_iv_cost_generic (data
, use
, cand
);
4206 return determine_use_iv_cost_outer (data
, use
, cand
);
4209 return determine_use_iv_cost_address (data
, use
, cand
);
4212 return determine_use_iv_cost_condition (data
, use
, cand
);
4219 /* Determines costs of basing the use of the iv on an iv candidate. */
4222 determine_use_iv_costs (struct ivopts_data
*data
)
4226 struct iv_cand
*cand
;
4227 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4229 alloc_use_cost_map (data
);
4231 for (i
= 0; i
< n_iv_uses (data
); i
++)
4233 use
= iv_use (data
, i
);
4235 if (data
->consider_all_candidates
)
4237 for (j
= 0; j
< n_iv_cands (data
); j
++)
4239 cand
= iv_cand (data
, j
);
4240 determine_use_iv_cost (data
, use
, cand
);
4247 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4249 cand
= iv_cand (data
, j
);
4250 if (!determine_use_iv_cost (data
, use
, cand
))
4251 bitmap_set_bit (to_clear
, j
);
4254 /* Remove the candidates for that the cost is infinite from
4255 the list of related candidates. */
4256 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4257 bitmap_clear (to_clear
);
4261 BITMAP_FREE (to_clear
);
4263 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4265 fprintf (dump_file
, "Use-candidate costs:\n");
4267 for (i
= 0; i
< n_iv_uses (data
); i
++)
4269 use
= iv_use (data
, i
);
4271 fprintf (dump_file
, "Use %d:\n", i
);
4272 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4273 for (j
= 0; j
< use
->n_map_members
; j
++)
4275 if (!use
->cost_map
[j
].cand
4276 || use
->cost_map
[j
].cost
== INFTY
)
4279 fprintf (dump_file
, " %d\t%d\t",
4280 use
->cost_map
[j
].cand
->id
,
4281 use
->cost_map
[j
].cost
);
4282 if (use
->cost_map
[j
].depends_on
)
4283 bitmap_print (dump_file
,
4284 use
->cost_map
[j
].depends_on
, "","");
4285 fprintf (dump_file
, "\n");
4288 fprintf (dump_file
, "\n");
4290 fprintf (dump_file
, "\n");
4294 /* Determines cost of the candidate CAND. */
4297 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4299 unsigned cost_base
, cost_step
;
4308 /* There are two costs associated with the candidate -- its increment
4309 and its initialization. The second is almost negligible for any loop
4310 that rolls enough, so we take it just very little into account. */
4312 base
= cand
->iv
->base
;
4313 cost_base
= force_var_cost (data
, base
, NULL
);
4314 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4316 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4318 /* Prefer the original iv unless we may gain something by replacing it;
4319 this is not really relevant for artificial ivs created by other
4321 if (cand
->pos
== IP_ORIGINAL
4322 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4325 /* Prefer not to insert statements into latch unless there are some
4326 already (so that we do not create unnecessary jumps). */
4327 if (cand
->pos
== IP_END
4328 && empty_block_p (ip_end_pos (data
->current_loop
)))
4332 /* Determines costs of computation of the candidates. */
4335 determine_iv_costs (struct ivopts_data
*data
)
4339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4341 fprintf (dump_file
, "Candidate costs:\n");
4342 fprintf (dump_file
, " cand\tcost\n");
4345 for (i
= 0; i
< n_iv_cands (data
); i
++)
4347 struct iv_cand
*cand
= iv_cand (data
, i
);
4349 determine_iv_cost (data
, cand
);
4351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4352 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4356 fprintf (dump_file
, "\n");
4359 /* Calculates cost for having SIZE induction variables. */
4362 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4364 return global_cost_for_size (size
,
4365 loop_data (data
->current_loop
)->regs_used
,
4369 /* For each size of the induction variable set determine the penalty. */
4372 determine_set_costs (struct ivopts_data
*data
)
4376 struct loop
*loop
= data
->current_loop
;
4379 /* We use the following model (definitely improvable, especially the
4380 cost function -- TODO):
4382 We estimate the number of registers available (using MD data), name it A.
4384 We estimate the number of registers used by the loop, name it U. This
4385 number is obtained as the number of loop phi nodes (not counting virtual
4386 registers and bivs) + the number of variables from outside of the loop.
4388 We set a reserve R (free regs that are used for temporary computations,
4389 etc.). For now the reserve is a constant 3.
4391 Let I be the number of induction variables.
4393 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4394 make a lot of ivs without a reason).
4395 -- if A - R < U + I <= A, the cost is I * PRES_COST
4396 -- if U + I > A, the cost is I * PRES_COST and
4397 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4399 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4401 fprintf (dump_file
, "Global costs:\n");
4402 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4403 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4404 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4405 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4409 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4411 op
= PHI_RESULT (phi
);
4413 if (!is_gimple_reg (op
))
4416 if (get_iv (data
, op
))
4422 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4424 struct version_info
*info
= ver_info (data
, j
);
4426 if (info
->inv_id
&& info
->has_nonlin_use
)
4430 loop_data (loop
)->regs_used
= n
;
4431 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4432 fprintf (dump_file
, " regs_used %d\n", n
);
4434 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4436 fprintf (dump_file
, " cost for size:\n");
4437 fprintf (dump_file
, " ivs\tcost\n");
4438 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4439 fprintf (dump_file
, " %d\t%d\n", j
,
4440 ivopts_global_cost_for_size (data
, j
));
4441 fprintf (dump_file
, "\n");
4445 /* Returns true if A is a cheaper cost pair than B. */
4448 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4456 if (a
->cost
< b
->cost
)
4459 if (a
->cost
> b
->cost
)
4462 /* In case the costs are the same, prefer the cheaper candidate. */
4463 if (a
->cand
->cost
< b
->cand
->cost
)
4469 /* Computes the cost field of IVS structure. */
4472 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4476 cost
+= ivs
->cand_use_cost
;
4477 cost
+= ivs
->cand_cost
;
4478 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4483 /* Remove invariants in set INVS to set IVS. */
4486 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4494 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4496 ivs
->n_invariant_uses
[iid
]--;
4497 if (ivs
->n_invariant_uses
[iid
] == 0)
4502 /* Set USE not to be expressed by any candidate in IVS. */
4505 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4508 unsigned uid
= use
->id
, cid
;
4509 struct cost_pair
*cp
;
4511 cp
= ivs
->cand_for_use
[uid
];
4517 ivs
->cand_for_use
[uid
] = NULL
;
4518 ivs
->n_cand_uses
[cid
]--;
4520 if (ivs
->n_cand_uses
[cid
] == 0)
4522 bitmap_clear_bit (ivs
->cands
, cid
);
4523 /* Do not count the pseudocandidates. */
4527 ivs
->cand_cost
-= cp
->cand
->cost
;
4529 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4532 ivs
->cand_use_cost
-= cp
->cost
;
4534 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4535 iv_ca_recount_cost (data
, ivs
);
4538 /* Add invariants in set INVS to set IVS. */
4541 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4549 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4551 ivs
->n_invariant_uses
[iid
]++;
4552 if (ivs
->n_invariant_uses
[iid
] == 1)
4557 /* Set cost pair for USE in set IVS to CP. */
4560 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4561 struct iv_use
*use
, struct cost_pair
*cp
)
4563 unsigned uid
= use
->id
, cid
;
4565 if (ivs
->cand_for_use
[uid
] == cp
)
4568 if (ivs
->cand_for_use
[uid
])
4569 iv_ca_set_no_cp (data
, ivs
, use
);
4576 ivs
->cand_for_use
[uid
] = cp
;
4577 ivs
->n_cand_uses
[cid
]++;
4578 if (ivs
->n_cand_uses
[cid
] == 1)
4580 bitmap_set_bit (ivs
->cands
, cid
);
4581 /* Do not count the pseudocandidates. */
4585 ivs
->cand_cost
+= cp
->cand
->cost
;
4587 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4590 ivs
->cand_use_cost
+= cp
->cost
;
4591 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4592 iv_ca_recount_cost (data
, ivs
);
4596 /* Extend set IVS by expressing USE by some of the candidates in it
4600 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4603 struct cost_pair
*best_cp
= NULL
, *cp
;
4607 gcc_assert (ivs
->upto
>= use
->id
);
4609 if (ivs
->upto
== use
->id
)
4615 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4617 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4619 if (cheaper_cost_pair (cp
, best_cp
))
4623 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4626 /* Get cost for assignment IVS. */
4629 iv_ca_cost (struct iv_ca
*ivs
)
4631 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4634 /* Returns true if all dependences of CP are among invariants in IVS. */
4637 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4642 if (!cp
->depends_on
)
4645 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4647 if (ivs
->n_invariant_uses
[i
] == 0)
4654 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4655 it before NEXT_CHANGE. */
4657 static struct iv_ca_delta
*
4658 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4659 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4661 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
4664 change
->old_cp
= old_cp
;
4665 change
->new_cp
= new_cp
;
4666 change
->next_change
= next_change
;
4671 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4674 static struct iv_ca_delta
*
4675 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4677 struct iv_ca_delta
*last
;
4685 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4687 last
->next_change
= l2
;
4692 /* Returns candidate by that USE is expressed in IVS. */
4694 static struct cost_pair
*
4695 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4697 return ivs
->cand_for_use
[use
->id
];
4700 /* Reverse the list of changes DELTA, forming the inverse to it. */
4702 static struct iv_ca_delta
*
4703 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4705 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4706 struct cost_pair
*tmp
;
4708 for (act
= delta
; act
; act
= next
)
4710 next
= act
->next_change
;
4711 act
->next_change
= prev
;
4715 act
->old_cp
= act
->new_cp
;
4722 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4723 reverted instead. */
4726 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4727 struct iv_ca_delta
*delta
, bool forward
)
4729 struct cost_pair
*from
, *to
;
4730 struct iv_ca_delta
*act
;
4733 delta
= iv_ca_delta_reverse (delta
);
4735 for (act
= delta
; act
; act
= act
->next_change
)
4739 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4740 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4744 iv_ca_delta_reverse (delta
);
4747 /* Returns true if CAND is used in IVS. */
4750 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4752 return ivs
->n_cand_uses
[cand
->id
] > 0;
4755 /* Returns number of induction variable candidates in the set IVS. */
4758 iv_ca_n_cands (struct iv_ca
*ivs
)
4760 return ivs
->n_cands
;
4763 /* Free the list of changes DELTA. */
4766 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4768 struct iv_ca_delta
*act
, *next
;
4770 for (act
= *delta
; act
; act
= next
)
4772 next
= act
->next_change
;
4779 /* Allocates new iv candidates assignment. */
4781 static struct iv_ca
*
4782 iv_ca_new (struct ivopts_data
*data
)
4784 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
4788 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
4789 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
4790 nw
->cands
= BITMAP_ALLOC (NULL
);
4793 nw
->cand_use_cost
= 0;
4795 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
4801 /* Free memory occupied by the set IVS. */
4804 iv_ca_free (struct iv_ca
**ivs
)
4806 free ((*ivs
)->cand_for_use
);
4807 free ((*ivs
)->n_cand_uses
);
4808 BITMAP_FREE ((*ivs
)->cands
);
4809 free ((*ivs
)->n_invariant_uses
);
4814 /* Dumps IVS to FILE. */
4817 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4819 const char *pref
= " invariants ";
4822 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4823 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4825 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4826 if (ivs
->n_invariant_uses
[i
])
4828 fprintf (file
, "%s%d", pref
, i
);
4831 fprintf (file
, "\n");
4834 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4835 new set, and store differences in DELTA. Number of induction variables
4836 in the new set is stored to N_IVS. */
4839 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4840 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4845 struct cost_pair
*old_cp
, *new_cp
;
4848 for (i
= 0; i
< ivs
->upto
; i
++)
4850 use
= iv_use (data
, i
);
4851 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4854 && old_cp
->cand
== cand
)
4857 new_cp
= get_use_iv_cost (data
, use
, cand
);
4861 if (!iv_ca_has_deps (ivs
, new_cp
))
4864 if (!cheaper_cost_pair (new_cp
, old_cp
))
4867 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4870 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4871 cost
= iv_ca_cost (ivs
);
4873 *n_ivs
= iv_ca_n_cands (ivs
);
4874 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4879 /* Try narrowing set IVS by removing CAND. Return the cost of
4880 the new set and store the differences in DELTA. */
4883 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4884 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4888 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4890 struct iv_cand
*cnd
;
4894 for (i
= 0; i
< n_iv_uses (data
); i
++)
4896 use
= iv_use (data
, i
);
4898 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4899 if (old_cp
->cand
!= cand
)
4904 if (data
->consider_all_candidates
)
4906 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4911 cnd
= iv_cand (data
, ci
);
4913 cp
= get_use_iv_cost (data
, use
, cnd
);
4916 if (!iv_ca_has_deps (ivs
, cp
))
4919 if (!cheaper_cost_pair (cp
, new_cp
))
4927 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4932 cnd
= iv_cand (data
, ci
);
4934 cp
= get_use_iv_cost (data
, use
, cnd
);
4937 if (!iv_ca_has_deps (ivs
, cp
))
4940 if (!cheaper_cost_pair (cp
, new_cp
))
4949 iv_ca_delta_free (delta
);
4953 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4956 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4957 cost
= iv_ca_cost (ivs
);
4958 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4963 /* Try optimizing the set of candidates IVS by removing candidates different
4964 from to EXCEPT_CAND from it. Return cost of the new set, and store
4965 differences in DELTA. */
4968 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4969 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4972 struct iv_ca_delta
*act_delta
, *best_delta
;
4973 unsigned i
, best_cost
, acost
;
4974 struct iv_cand
*cand
;
4977 best_cost
= iv_ca_cost (ivs
);
4979 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4981 cand
= iv_cand (data
, i
);
4983 if (cand
== except_cand
)
4986 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4988 if (acost
< best_cost
)
4991 iv_ca_delta_free (&best_delta
);
4992 best_delta
= act_delta
;
4995 iv_ca_delta_free (&act_delta
);
5004 /* Recurse to possibly remove other unnecessary ivs. */
5005 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5006 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5007 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5008 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5012 /* Tries to extend the sets IVS in the best possible way in order
5013 to express the USE. */
5016 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5019 unsigned best_cost
, act_cost
;
5022 struct iv_cand
*cand
;
5023 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5024 struct cost_pair
*cp
;
5026 iv_ca_add_use (data
, ivs
, use
);
5027 best_cost
= iv_ca_cost (ivs
);
5029 cp
= iv_ca_cand_for_use (ivs
, use
);
5032 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5033 iv_ca_set_no_cp (data
, ivs
, use
);
5036 /* First try important candidates. Only if it fails, try the specific ones.
5037 Rationale -- in loops with many variables the best choice often is to use
5038 just one generic biv. If we added here many ivs specific to the uses,
5039 the optimization algorithm later would be likely to get stuck in a local
5040 minimum, thus causing us to create too many ivs. The approach from
5041 few ivs to more seems more likely to be successful -- starting from few
5042 ivs, replacing an expensive use by a specific iv should always be a
5044 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5046 cand
= iv_cand (data
, i
);
5048 if (iv_ca_cand_used_p (ivs
, cand
))
5051 cp
= get_use_iv_cost (data
, use
, cand
);
5055 iv_ca_set_cp (data
, ivs
, use
, cp
);
5056 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5057 iv_ca_set_no_cp (data
, ivs
, use
);
5058 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5060 if (act_cost
< best_cost
)
5062 best_cost
= act_cost
;
5064 iv_ca_delta_free (&best_delta
);
5065 best_delta
= act_delta
;
5068 iv_ca_delta_free (&act_delta
);
5071 if (best_cost
== INFTY
)
5073 for (i
= 0; i
< use
->n_map_members
; i
++)
5075 cp
= use
->cost_map
+ i
;
5080 /* Already tried this. */
5081 if (cand
->important
)
5084 if (iv_ca_cand_used_p (ivs
, cand
))
5088 iv_ca_set_cp (data
, ivs
, use
, cp
);
5089 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5090 iv_ca_set_no_cp (data
, ivs
, use
);
5091 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5094 if (act_cost
< best_cost
)
5096 best_cost
= act_cost
;
5099 iv_ca_delta_free (&best_delta
);
5100 best_delta
= act_delta
;
5103 iv_ca_delta_free (&act_delta
);
5107 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5108 iv_ca_delta_free (&best_delta
);
5110 return (best_cost
!= INFTY
);
5113 /* Finds an initial assignment of candidates to uses. */
5115 static struct iv_ca
*
5116 get_initial_solution (struct ivopts_data
*data
)
5118 struct iv_ca
*ivs
= iv_ca_new (data
);
5121 for (i
= 0; i
< n_iv_uses (data
); i
++)
5122 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5131 /* Tries to improve set of induction variables IVS. */
5134 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5136 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5137 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5138 struct iv_cand
*cand
;
5140 /* Try extending the set of induction variables by one. */
5141 for (i
= 0; i
< n_iv_cands (data
); i
++)
5143 cand
= iv_cand (data
, i
);
5145 if (iv_ca_cand_used_p (ivs
, cand
))
5148 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5152 /* If we successfully added the candidate and the set is small enough,
5153 try optimizing it by removing other candidates. */
5154 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5156 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5157 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5158 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5159 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5162 if (acost
< best_cost
)
5165 iv_ca_delta_free (&best_delta
);
5166 best_delta
= act_delta
;
5169 iv_ca_delta_free (&act_delta
);
5174 /* Try removing the candidates from the set instead. */
5175 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5177 /* Nothing more we can do. */
5182 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5183 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5184 iv_ca_delta_free (&best_delta
);
5188 /* Attempts to find the optimal set of induction variables. We do simple
5189 greedy heuristic -- we try to replace at most one candidate in the selected
5190 solution and remove the unused ivs while this improves the cost. */
5192 static struct iv_ca
*
5193 find_optimal_iv_set (struct ivopts_data
*data
)
5199 /* Get the initial solution. */
5200 set
= get_initial_solution (data
);
5203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5204 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5210 fprintf (dump_file
, "Initial set of candidates:\n");
5211 iv_ca_dump (data
, dump_file
, set
);
5214 while (try_improve_iv_set (data
, set
))
5216 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5218 fprintf (dump_file
, "Improved to:\n");
5219 iv_ca_dump (data
, dump_file
, set
);
5223 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5224 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5226 for (i
= 0; i
< n_iv_uses (data
); i
++)
5228 use
= iv_use (data
, i
);
5229 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5235 /* Creates a new induction variable corresponding to CAND. */
5238 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5240 block_stmt_iterator incr_pos
;
5250 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5254 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5259 /* Mark that the iv is preserved. */
5260 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5261 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5263 /* Rewrite the increment so that it uses var_before directly. */
5264 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5269 gimple_add_tmp_var (cand
->var_before
);
5270 add_referenced_tmp_var (cand
->var_before
);
5272 base
= unshare_expr (cand
->iv
->base
);
5274 create_iv (base
, unshare_expr (cand
->iv
->step
),
5275 cand
->var_before
, data
->current_loop
,
5276 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5279 /* Creates new induction variables described in SET. */
5282 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5285 struct iv_cand
*cand
;
5288 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5290 cand
= iv_cand (data
, i
);
5291 create_new_iv (data
, cand
);
5295 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5296 is true, remove also the ssa name defined by the statement. */
5299 remove_statement (tree stmt
, bool including_defined_name
)
5301 if (TREE_CODE (stmt
) == PHI_NODE
)
5303 if (!including_defined_name
)
5305 /* Prevent the ssa name defined by the statement from being removed. */
5306 SET_PHI_RESULT (stmt
, NULL
);
5308 remove_phi_node (stmt
, NULL_TREE
);
5312 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5318 /* Rewrites USE (definition of iv used in a nonlinear expression)
5319 using candidate CAND. */
5322 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5323 struct iv_use
*use
, struct iv_cand
*cand
)
5326 tree op
, stmts
, tgt
, ass
;
5327 block_stmt_iterator bsi
, pbsi
;
5329 /* An important special case -- if we are asked to express value of
5330 the original iv by itself, just exit; there is no need to
5331 introduce a new computation (that might also need casting the
5332 variable to unsigned and back). */
5333 if (cand
->pos
== IP_ORIGINAL
5334 && cand
->incremented_at
== use
->stmt
)
5336 tree step
, ctype
, utype
;
5337 enum tree_code incr_code
= PLUS_EXPR
;
5339 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5340 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5342 step
= cand
->iv
->step
;
5343 ctype
= TREE_TYPE (step
);
5344 utype
= TREE_TYPE (cand
->var_after
);
5345 if (TREE_CODE (step
) == NEGATE_EXPR
)
5347 incr_code
= MINUS_EXPR
;
5348 step
= TREE_OPERAND (step
, 0);
5351 /* Check whether we may leave the computation unchanged.
5352 This is the case only if it does not rely on other
5353 computations in the loop -- otherwise, the computation
5354 we rely upon may be removed in remove_unused_ivs,
5355 thus leading to ICE. */
5356 op
= TREE_OPERAND (use
->stmt
, 1);
5357 if (TREE_CODE (op
) == PLUS_EXPR
5358 || TREE_CODE (op
) == MINUS_EXPR
)
5360 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5361 op
= TREE_OPERAND (op
, 1);
5362 else if (TREE_CODE (op
) == PLUS_EXPR
5363 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5364 op
= TREE_OPERAND (op
, 0);
5372 && (TREE_CODE (op
) == INTEGER_CST
5373 || operand_equal_p (op
, step
, 0)))
5376 /* Otherwise, add the necessary computations to express
5378 op
= fold_convert (ctype
, cand
->var_before
);
5379 comp
= fold_convert (utype
,
5380 build2 (incr_code
, ctype
, op
,
5381 unshare_expr (step
)));
5384 comp
= get_computation (data
->current_loop
, use
, cand
);
5386 switch (TREE_CODE (use
->stmt
))
5389 tgt
= PHI_RESULT (use
->stmt
);
5391 /* If we should keep the biv, do not replace it. */
5392 if (name_info (data
, tgt
)->preserve_biv
)
5395 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5396 while (!bsi_end_p (pbsi
)
5397 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5405 tgt
= TREE_OPERAND (use
->stmt
, 0);
5406 bsi
= bsi_for_stmt (use
->stmt
);
5413 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5415 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5418 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5419 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5420 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5421 remove_statement (use
->stmt
, false);
5422 SSA_NAME_DEF_STMT (tgt
) = ass
;
5427 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5428 TREE_OPERAND (use
->stmt
, 1) = op
;
5432 /* Replaces ssa name in index IDX by its basic variable. Callback for
5436 idx_remove_ssa_names (tree base
, tree
*idx
,
5437 void *data ATTRIBUTE_UNUSED
)
5441 if (TREE_CODE (*idx
) == SSA_NAME
)
5442 *idx
= SSA_NAME_VAR (*idx
);
5444 if (TREE_CODE (base
) == ARRAY_REF
)
5446 op
= &TREE_OPERAND (base
, 2);
5448 && TREE_CODE (*op
) == SSA_NAME
)
5449 *op
= SSA_NAME_VAR (*op
);
5450 op
= &TREE_OPERAND (base
, 3);
5452 && TREE_CODE (*op
) == SSA_NAME
)
5453 *op
= SSA_NAME_VAR (*op
);
5459 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5462 unshare_and_remove_ssa_names (tree ref
)
5464 ref
= unshare_expr (ref
);
5465 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5470 /* Extract the alias analysis info for the memory reference REF. There are
5471 several ways how this information may be stored and what precisely is
5472 its semantics depending on the type of the reference, but there always is
5473 somewhere hidden one _DECL node that is used to determine the set of
5474 virtual operands for the reference. The code below deciphers this jungle
5475 and extracts this single useful piece of information. */
5478 get_ref_tag (tree ref
)
5480 tree var
= get_base_address (ref
);
5486 if (TREE_CODE (var
) == INDIRECT_REF
)
5488 /* In case the base is a dereference of a pointer, first check its name
5489 mem tag, and if it does not have one, use type mem tag. */
5490 var
= TREE_OPERAND (var
, 0);
5491 if (TREE_CODE (var
) != SSA_NAME
)
5494 if (SSA_NAME_PTR_INFO (var
))
5496 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5501 var
= SSA_NAME_VAR (var
);
5502 tag
= var_ann (var
)->type_mem_tag
;
5503 gcc_assert (tag
!= NULL_TREE
);
5511 tag
= var_ann (var
)->type_mem_tag
;
5519 /* Copies the reference information from OLD_REF to NEW_REF. */
5522 copy_ref_info (tree new_ref
, tree old_ref
)
5524 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5525 copy_mem_ref_info (new_ref
, old_ref
);
5528 TMR_TAG (new_ref
) = get_ref_tag (old_ref
);
5529 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5533 /* Rewrites USE (address that is an iv) using candidate CAND. */
5536 rewrite_use_address (struct ivopts_data
*data
,
5537 struct iv_use
*use
, struct iv_cand
*cand
)
5539 struct affine_tree_combination aff
;
5540 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5543 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5544 unshare_aff_combination (&aff
);
5546 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5547 copy_ref_info (ref
, *use
->op_p
);
5551 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5555 rewrite_use_compare (struct ivopts_data
*data
,
5556 struct iv_use
*use
, struct iv_cand
*cand
)
5559 tree
*op_p
, cond
, op
, stmts
, bound
;
5560 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5561 enum tree_code compare
;
5562 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5567 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5568 tree var_type
= TREE_TYPE (var
);
5570 compare
= iv_elimination_compare (data
, use
);
5571 bound
= fold_convert (var_type
, bound
);
5572 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5576 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5578 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5579 update_stmt (use
->stmt
);
5583 /* The induction variable elimination failed; just express the original
5585 comp
= get_computation (data
->current_loop
, use
, cand
);
5588 op_p
= &TREE_OPERAND (cond
, 0);
5589 if (TREE_CODE (*op_p
) != SSA_NAME
5590 || zero_p (get_iv (data
, *op_p
)->step
))
5591 op_p
= &TREE_OPERAND (cond
, 1);
5593 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5595 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5600 /* Ensure that operand *OP_P may be used at the end of EXIT without
5601 violating loop closed ssa form. */
5604 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5607 struct loop
*def_loop
;
5610 use
= USE_FROM_PTR (op_p
);
5611 if (TREE_CODE (use
) != SSA_NAME
)
5614 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5618 def_loop
= def_bb
->loop_father
;
5619 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5622 /* Try finding a phi node that copies the value out of the loop. */
5623 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5624 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5629 /* Create such a phi node. */
5630 tree new_name
= duplicate_ssa_name (use
, NULL
);
5632 phi
= create_phi_node (new_name
, exit
->dest
);
5633 SSA_NAME_DEF_STMT (new_name
) = phi
;
5634 add_phi_arg (phi
, use
, exit
);
5637 SET_USE (op_p
, PHI_RESULT (phi
));
5640 /* Ensure that operands of STMT may be used at the end of EXIT without
5641 violating loop closed ssa form. */
5644 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5647 use_operand_p use_p
;
5649 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5650 protect_loop_closed_ssa_form_use (exit
, use_p
);
5653 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5654 so that they are emitted on the correct place, and so that the loop closed
5655 ssa form is preserved. */
5658 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5660 tree_stmt_iterator tsi
;
5661 block_stmt_iterator bsi
;
5662 tree phi
, stmt
, def
, next
;
5664 if (!single_pred_p (exit
->dest
))
5665 split_loop_exit_edge (exit
);
5667 /* Ensure there is label in exit->dest, so that we can
5669 tree_block_label (exit
->dest
);
5670 bsi
= bsi_after_labels (exit
->dest
);
5672 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5674 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5676 bsi_insert_after (&bsi
, tsi_stmt (tsi
), BSI_NEW_STMT
);
5677 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5682 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
5683 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5689 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5691 next
= PHI_CHAIN (phi
);
5693 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5695 def
= PHI_RESULT (phi
);
5696 remove_statement (phi
, false);
5697 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5699 SSA_NAME_DEF_STMT (def
) = stmt
;
5700 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
5705 /* Rewrites the final value of USE (that is only needed outside of the loop)
5706 using candidate CAND. */
5709 rewrite_use_outer (struct ivopts_data
*data
,
5710 struct iv_use
*use
, struct iv_cand
*cand
)
5713 tree value
, op
, stmts
, tgt
;
5716 switch (TREE_CODE (use
->stmt
))
5719 tgt
= PHI_RESULT (use
->stmt
);
5722 tgt
= TREE_OPERAND (use
->stmt
, 0);
5728 exit
= single_dom_exit (data
->current_loop
);
5734 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5735 value
= unshare_expr (cp
->value
);
5738 value
= get_computation_at (data
->current_loop
,
5739 use
, cand
, last_stmt (exit
->src
));
5741 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
5743 /* If we will preserve the iv anyway and we would need to perform
5744 some computation to replace the final value, do nothing. */
5745 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
5748 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5750 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
5752 if (USE_FROM_PTR (use_p
) == tgt
)
5753 SET_USE (use_p
, op
);
5757 compute_phi_arg_on_exit (exit
, stmts
, op
);
5759 /* Enable removal of the statement. We cannot remove it directly,
5760 since we may still need the aliasing information attached to the
5761 ssa name defined by it. */
5762 name_info (data
, tgt
)->iv
->have_use_for
= false;
5766 /* If the variable is going to be preserved anyway, there is nothing to
5768 if (name_info (data
, tgt
)->preserve_biv
)
5771 /* Otherwise we just need to compute the iv. */
5772 rewrite_use_nonlinear_expr (data
, use
, cand
);
5775 /* Rewrites USE using candidate CAND. */
5778 rewrite_use (struct ivopts_data
*data
,
5779 struct iv_use
*use
, struct iv_cand
*cand
)
5783 case USE_NONLINEAR_EXPR
:
5784 rewrite_use_nonlinear_expr (data
, use
, cand
);
5788 rewrite_use_outer (data
, use
, cand
);
5792 rewrite_use_address (data
, use
, cand
);
5796 rewrite_use_compare (data
, use
, cand
);
5802 update_stmt (use
->stmt
);
5805 /* Rewrite the uses using the selected induction variables. */
5808 rewrite_uses (struct ivopts_data
*data
)
5811 struct iv_cand
*cand
;
5814 for (i
= 0; i
< n_iv_uses (data
); i
++)
5816 use
= iv_use (data
, i
);
5817 cand
= use
->selected
;
5820 rewrite_use (data
, use
, cand
);
5824 /* Removes the ivs that are not used after rewriting. */
5827 remove_unused_ivs (struct ivopts_data
*data
)
5832 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5834 struct version_info
*info
;
5836 info
= ver_info (data
, j
);
5838 && !zero_p (info
->iv
->step
)
5840 && !info
->iv
->have_use_for
5841 && !info
->preserve_biv
)
5842 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5846 /* Frees data allocated by the optimization of a single loop. */
5849 free_loop_data (struct ivopts_data
*data
)
5855 htab_empty (data
->niters
);
5857 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5859 struct version_info
*info
;
5861 info
= ver_info (data
, i
);
5865 info
->has_nonlin_use
= false;
5866 info
->preserve_biv
= false;
5869 bitmap_clear (data
->relevant
);
5870 bitmap_clear (data
->important_candidates
);
5872 for (i
= 0; i
< n_iv_uses (data
); i
++)
5874 struct iv_use
*use
= iv_use (data
, i
);
5877 BITMAP_FREE (use
->related_cands
);
5878 for (j
= 0; j
< use
->n_map_members
; j
++)
5879 if (use
->cost_map
[j
].depends_on
)
5880 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5881 free (use
->cost_map
);
5884 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5886 for (i
= 0; i
< n_iv_cands (data
); i
++)
5888 struct iv_cand
*cand
= iv_cand (data
, i
);
5892 if (cand
->depends_on
)
5893 BITMAP_FREE (cand
->depends_on
);
5896 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5898 if (data
->version_info_size
< num_ssa_names
)
5900 data
->version_info_size
= 2 * num_ssa_names
;
5901 free (data
->version_info
);
5902 data
->version_info
= xcalloc (data
->version_info_size
,
5903 sizeof (struct version_info
));
5906 data
->max_inv_id
= 0;
5908 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5909 SET_DECL_RTL (obj
, NULL_RTX
);
5911 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5914 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5918 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5922 for (i
= 1; i
< loops
->num
; i
++)
5923 if (loops
->parray
[i
])
5925 free (loops
->parray
[i
]->aux
);
5926 loops
->parray
[i
]->aux
= NULL
;
5929 free_loop_data (data
);
5930 free (data
->version_info
);
5931 BITMAP_FREE (data
->relevant
);
5932 BITMAP_FREE (data
->important_candidates
);
5933 htab_delete (data
->niters
);
5935 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5936 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5937 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5940 /* Optimizes the LOOP. Returns true if anything changed. */
5943 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5945 bool changed
= false;
5946 struct iv_ca
*iv_ca
;
5949 data
->current_loop
= loop
;
5951 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5953 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5955 exit
= single_dom_exit (loop
);
5958 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5959 exit
->src
->index
, exit
->dest
->index
);
5960 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5961 fprintf (dump_file
, "\n");
5964 fprintf (dump_file
, "\n");
5967 /* For each ssa name determines whether it behaves as an induction variable
5969 if (!find_induction_variables (data
))
5972 /* Finds interesting uses (item 1). */
5973 find_interesting_uses (data
);
5974 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5977 /* Finds candidates for the induction variables (item 2). */
5978 find_iv_candidates (data
);
5980 /* Calculates the costs (item 3, part 1). */
5981 determine_use_iv_costs (data
);
5982 determine_iv_costs (data
);
5983 determine_set_costs (data
);
5985 /* Find the optimal set of induction variables (item 3, part 2). */
5986 iv_ca
= find_optimal_iv_set (data
);
5991 /* Create the new induction variables (item 4, part 1). */
5992 create_new_ivs (data
, iv_ca
);
5993 iv_ca_free (&iv_ca
);
5995 /* Rewrite the uses (item 4, part 2). */
5996 rewrite_uses (data
);
5998 /* Remove the ivs that are unused after rewriting. */
5999 remove_unused_ivs (data
);
6001 /* We have changed the structure of induction variables; it might happen
6002 that definitions in the scev database refer to some of them that were
6007 free_loop_data (data
);
6012 /* Main entry point. Optimizes induction variables in LOOPS. */
6015 tree_ssa_iv_optimize (struct loops
*loops
)
6018 struct ivopts_data data
;
6020 tree_ssa_iv_optimize_init (loops
, &data
);
6022 /* Optimize the loops starting with the innermost ones. */
6023 loop
= loops
->tree_root
;
6027 /* Scan the loops, inner ones first. */
6028 while (loop
!= loops
->tree_root
)
6030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6031 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6033 tree_ssa_iv_optimize_loop (&data
, loop
);
6045 tree_ssa_iv_optimize_finalize (loops
, &data
);