1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used
; /* Number of registers used. */
133 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
134 USE_OUTER
, /* The induction variable is used outside the loop. */
135 USE_ADDRESS
, /* Use in an address. */
136 USE_COMPARE
/* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand
*cand
; /* The candidate. */
143 unsigned cost
; /* The cost. */
144 bitmap depends_on
; /* The list of invariants that have to be
146 tree value
; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id
; /* The id of the use. */
155 enum use_type type
; /* Type of the use. */
156 struct iv
*iv
; /* The induction variable it is based on. */
157 tree stmt
; /* Statement in that it occurs. */
158 tree
*op_p
; /* The place where it occurs. */
159 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
163 struct cost_pair
*cost_map
;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand
*selected
;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL
, /* At the end, just before the exit condition. */
174 IP_END
, /* At the end of the latch block. */
175 IP_ORIGINAL
/* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id
; /* The number of the candidate. */
182 bool important
; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos
; /* Where it is computed. */
185 tree incremented_at
; /* For original biv, the statement where it is
187 tree var_before
; /* The variable used for it before increment. */
188 tree var_after
; /* The variable used for it after increment. */
189 struct iv
*iv
; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost
; /* Cost of the candidate. */
194 bitmap depends_on
; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use
*iv_use_p
;
202 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
204 typedef struct iv_cand
*iv_cand_p
;
205 DEF_VEC_P(iv_cand_p
);
206 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
210 /* The currently optimized loop. */
211 struct loop
*current_loop
;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size
;
219 /* The array of information for the ssa names. */
220 struct version_info
*version_info
;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p
,heap
) *iv_uses
;
231 /* The candidates. */
232 VEC(iv_cand_p
,heap
) *iv_candidates
;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates
;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates
;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair
**cand_for_use
;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses
;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost
;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses
;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair
*old_cp
;
290 /* A new assignment. */
291 struct cost_pair
*new_cp
;
293 /* Next change in the list. */
294 struct iv_ca_delta
*next_change
;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree
,heap
) *decl_rtl_to_reset
;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data
*data
)
324 return VEC_length (iv_use_p
, data
->iv_uses
);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use
*
330 iv_use (struct ivopts_data
*data
, unsigned i
)
332 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data
*data
)
340 return VEC_length (iv_cand_p
, data
->iv_candidates
);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand
*
346 iv_cand (struct ivopts_data
*data
, unsigned i
)
348 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
351 /* The data for LOOP. */
353 static inline struct loop_data
*
354 loop_data (struct loop
*loop
)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop
*loop
)
364 edge exit
= loop
->single_exit
;
369 if (!just_once_each_iteration_p (loop
, exit
->src
))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv
*);
379 dump_iv (FILE *file
, struct iv
*iv
)
383 fprintf (file
, "ssa name ");
384 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
385 fprintf (file
, "\n");
388 fprintf (file
, " type ");
389 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
390 fprintf (file
, "\n");
394 fprintf (file
, " base ");
395 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
396 fprintf (file
, "\n");
398 fprintf (file
, " step ");
399 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
400 fprintf (file
, "\n");
404 fprintf (file
, " invariant ");
405 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
406 fprintf (file
, "\n");
411 fprintf (file
, " base object ");
412 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
413 fprintf (file
, "\n");
417 fprintf (file
, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use
*);
424 dump_use (FILE *file
, struct iv_use
*use
)
426 fprintf (file
, "use %d\n", use
->id
);
430 case USE_NONLINEAR_EXPR
:
431 fprintf (file
, " generic\n");
435 fprintf (file
, " outside\n");
439 fprintf (file
, " address\n");
443 fprintf (file
, " compare\n");
450 fprintf (file
, " in statement ");
451 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
452 fprintf (file
, "\n");
454 fprintf (file
, " at position ");
456 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
457 fprintf (file
, "\n");
459 dump_iv (file
, use
->iv
);
461 if (use
->related_cands
)
463 fprintf (file
, " related candidates ");
464 dump_bitmap (file
, use
->related_cands
);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data
*);
472 dump_uses (FILE *file
, struct ivopts_data
*data
)
477 for (i
= 0; i
< n_iv_uses (data
); i
++)
479 use
= iv_use (data
, i
);
481 dump_use (file
, use
);
482 fprintf (file
, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand
*);
490 dump_cand (FILE *file
, struct iv_cand
*cand
)
492 struct iv
*iv
= cand
->iv
;
494 fprintf (file
, "candidate %d%s\n",
495 cand
->id
, cand
->important
? " (important)" : "");
497 if (cand
->depends_on
)
499 fprintf (file
, " depends on ");
500 dump_bitmap (file
, cand
->depends_on
);
505 fprintf (file
, " final value replacement\n");
512 fprintf (file
, " incremented before exit test\n");
516 fprintf (file
, " incremented at end\n");
520 fprintf (file
, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info
*
530 ver_info (struct ivopts_data
*data
, unsigned ver
)
532 return data
->version_info
+ ver
;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info
*
538 name_info (struct ivopts_data
*data
, tree name
)
540 return ver_info (data
, SSA_NAME_VERSION (name
));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
550 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
551 unsigned HOST_WIDE_INT inv
, ex
, val
;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a
& 1) && !(b
& 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i
= 0; i
< bits
- 1; i
++)
578 inv
= (inv
* ex
) & mask
;
579 ex
= (ex
* ex
) & mask
;
582 val
= (a
* inv
) & mask
;
584 gcc_assert (((val
* b
) & mask
) == a
);
586 if ((val
>> (bits
- 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
600 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
604 if (sbb
== loop
->latch
)
610 return stmt
== last_stmt (bb
);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
619 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
620 basic_block stmt_bb
= bb_for_stmt (stmt
);
621 block_stmt_iterator bsi
;
623 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
626 if (stmt_bb
!= cand_bb
)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
633 if (bsi_stmt (bsi
) == cand
->incremented_at
)
635 if (bsi_stmt (bsi
) == stmt
)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
652 return stmt_after_ip_normal_pos (loop
, stmt
);
655 return stmt_after_ip_original_pos (cand
, stmt
);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter
;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e
)
682 const struct nfe_cache_elt
*elt
= e
;
684 return htab_hash_pointer (elt
->exit
);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1
, const void *e2
)
692 const struct nfe_cache_elt
*elt1
= e1
;
694 return elt1
->exit
== e2
;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc
*
701 niter_for_exit (struct ivopts_data
*data
, edge exit
)
703 struct nfe_cache_elt
*nfe_desc
;
706 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
707 htab_hash_pointer (exit
),
712 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
713 nfe_desc
->exit
= exit
;
714 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
715 exit
, &nfe_desc
->niter
);
721 if (!nfe_desc
->valid_p
)
724 return &nfe_desc
->niter
;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
731 static struct tree_niter_desc
*
732 niter_for_single_dom_exit (struct ivopts_data
*data
)
734 edge exit
= single_dom_exit (data
->current_loop
);
739 return niter_for_exit (data
, exit
);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
746 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
750 data
->version_info_size
= 2 * num_ssa_names
;
751 data
->version_info
= xcalloc (data
->version_info_size
,
752 sizeof (struct version_info
));
753 data
->relevant
= BITMAP_ALLOC (NULL
);
754 data
->important_candidates
= BITMAP_ALLOC (NULL
);
755 data
->max_inv_id
= 0;
756 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
758 for (i
= 1; i
< loops
->num
; i
++)
759 if (loops
->parray
[i
])
760 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
762 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
763 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
764 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr
)
773 enum tree_code code
= TREE_CODE (expr
);
774 tree base
, obj
, op0
, op1
;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
785 obj
= TREE_OPERAND (expr
, 0);
786 base
= get_base_address (obj
);
791 if (TREE_CODE (base
) == INDIRECT_REF
)
792 return determine_base_object (TREE_OPERAND (base
, 0));
794 return fold_convert (ptr_type_node
,
795 build_fold_addr_expr (base
));
799 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
800 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
806 return (code
== PLUS_EXPR
808 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
810 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
814 return determine_base_object (TREE_OPERAND (expr
, 0));
817 return fold_convert (ptr_type_node
, expr
);
821 /* Allocates an induction variable with given initial value BASE and step STEP
825 alloc_iv (tree base
, tree step
)
827 struct iv
*iv
= xcalloc (1, sizeof (struct iv
));
829 if (step
&& integer_zerop (step
))
833 iv
->base_object
= determine_base_object (base
);
836 iv
->have_use_for
= false;
838 iv
->ssa_name
= NULL_TREE
;
843 /* Sets STEP and BASE for induction variable IV. */
846 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
848 struct version_info
*info
= name_info (data
, iv
);
850 gcc_assert (!info
->iv
);
852 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
853 info
->iv
= alloc_iv (base
, step
);
854 info
->iv
->ssa_name
= iv
;
857 /* Finds induction variable declaration for VAR. */
860 get_iv (struct ivopts_data
*data
, tree var
)
864 if (!name_info (data
, var
)->iv
)
866 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
869 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
870 set_iv (data
, var
, var
, NULL_TREE
);
873 return name_info (data
, var
)->iv
;
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
880 determine_biv_step (tree phi
)
882 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
883 tree name
= PHI_RESULT (phi
), base
, step
;
885 if (!is_gimple_reg (name
))
888 if (!simple_iv (loop
, phi
, name
, &base
, &step
, true))
897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
900 abnormal_ssa_name_p (tree exp
)
905 if (TREE_CODE (exp
) != SSA_NAME
)
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
912 abnormal phi node. Callback for for_each_index. */
915 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
916 void *data ATTRIBUTE_UNUSED
)
918 if (TREE_CODE (base
) == ARRAY_REF
)
920 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
922 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
926 return !abnormal_ssa_name_p (*index
);
929 /* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
933 contains_abnormal_ssa_name_p (tree expr
)
936 enum tree_code_class
class;
941 code
= TREE_CODE (expr
);
942 class = TREE_CODE_CLASS (code
);
944 if (code
== SSA_NAME
)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
947 if (code
== INTEGER_CST
948 || is_gimple_min_invariant (expr
))
951 if (code
== ADDR_EXPR
)
952 return !for_each_index (&TREE_OPERAND (expr
, 0),
953 idx_contains_abnormal_ssa_name_p
,
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
977 /* Finds basic ivs. */
980 find_bivs (struct ivopts_data
*data
)
982 tree phi
, step
, type
, base
;
984 struct loop
*loop
= data
->current_loop
;
986 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
991 step
= determine_biv_step (phi
);
995 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
996 base
= expand_simple_operations (base
);
997 if (contains_abnormal_ssa_name_p (base
)
998 || contains_abnormal_ssa_name_p (step
))
1001 type
= TREE_TYPE (PHI_RESULT (phi
));
1002 base
= fold_convert (type
, base
);
1004 step
= fold_convert (type
, step
);
1006 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1013 /* Marks basic ivs. */
1016 mark_bivs (struct ivopts_data
*data
)
1019 struct iv
*iv
, *incr_iv
;
1020 struct loop
*loop
= data
->current_loop
;
1021 basic_block incr_bb
;
1023 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1025 iv
= get_iv (data
, PHI_RESULT (phi
));
1029 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1030 incr_iv
= get_iv (data
, var
);
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1036 if (incr_bb
->loop_father
!= data
->current_loop
1037 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1041 incr_iv
->biv_p
= true;
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1049 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
,
1050 tree
*base
, tree
*step
)
1053 struct loop
*loop
= data
->current_loop
;
1058 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1061 lhs
= TREE_OPERAND (stmt
, 0);
1062 if (TREE_CODE (lhs
) != SSA_NAME
)
1065 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), base
, step
, true))
1067 *base
= expand_simple_operations (*base
);
1069 if (contains_abnormal_ssa_name_p (*base
)
1070 || contains_abnormal_ssa_name_p (*step
))
1076 /* Finds general ivs in statement STMT. */
1079 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1083 if (!find_givs_in_stmt_scev (data
, stmt
, &base
, &step
))
1086 set_iv (data
, TREE_OPERAND (stmt
, 0), base
, step
);
1089 /* Finds general ivs in basic block BB. */
1092 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1094 block_stmt_iterator bsi
;
1096 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1097 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1100 /* Finds general ivs. */
1103 find_givs (struct ivopts_data
*data
)
1105 struct loop
*loop
= data
->current_loop
;
1106 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1109 for (i
= 0; i
< loop
->num_nodes
; i
++)
1110 find_givs_in_bb (data
, body
[i
]);
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1118 find_induction_variables (struct ivopts_data
*data
)
1123 if (!find_bivs (data
))
1129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1131 struct tree_niter_desc
*niter
;
1133 niter
= niter_for_single_dom_exit (data
);
1137 fprintf (dump_file
, " number of iterations ");
1138 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1139 fprintf (dump_file
, "\n");
1141 fprintf (dump_file
, " may be zero if ");
1142 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1143 fprintf (dump_file
, "\n");
1144 fprintf (dump_file
, "\n");
1147 fprintf (dump_file
, "Induction variables:\n\n");
1149 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1151 if (ver_info (data
, i
)->iv
)
1152 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1161 static struct iv_use
*
1162 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1163 tree stmt
, enum use_type use_type
)
1165 struct iv_use
*use
= xcalloc (1, sizeof (struct iv_use
));
1167 use
->id
= n_iv_uses (data
);
1168 use
->type
= use_type
;
1172 use
->related_cands
= BITMAP_ALLOC (NULL
);
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1176 iv
->ssa_name
= NULL_TREE
;
1178 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1179 dump_use (dump_file
, use
);
1181 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1191 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1194 struct version_info
*info
;
1196 if (TREE_CODE (op
) != SSA_NAME
1197 || !is_gimple_reg (op
))
1200 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1202 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1205 info
= name_info (data
, op
);
1207 info
->has_nonlin_use
|= nonlinear_use
;
1209 info
->inv_id
= ++data
->max_inv_id
;
1210 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1213 /* Checks whether the use OP is interesting and if so, records it
1216 static struct iv_use
*
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data
*data
, tree op
,
1225 if (TREE_CODE (op
) != SSA_NAME
)
1228 iv
= get_iv (data
, op
);
1232 if (iv
->have_use_for
)
1234 use
= iv_use (data
, iv
->use_id
);
1236 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
1237 || use
->type
== USE_OUTER
);
1239 if (type
== USE_NONLINEAR_EXPR
)
1240 use
->type
= USE_NONLINEAR_EXPR
;
1244 if (zero_p (iv
->step
))
1246 record_invariant (data
, op
, true);
1249 iv
->have_use_for
= true;
1251 civ
= xmalloc (sizeof (struct iv
));
1254 stmt
= SSA_NAME_DEF_STMT (op
);
1255 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1256 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1258 use
= record_use (data
, NULL
, civ
, stmt
, type
);
1259 iv
->use_id
= use
->id
;
1264 /* Checks whether the use OP is interesting and if so, records it. */
1266 static struct iv_use
*
1267 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1269 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_NONLINEAR_EXPR
);
1272 /* Records a definition of induction variable OP that is used outside of the
1275 static struct iv_use
*
1276 find_interesting_uses_outer (struct ivopts_data
*data
, tree op
)
1278 return find_interesting_uses_outer_or_nonlin (data
, op
, USE_OUTER
);
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1285 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1289 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1291 tree zero
= integer_zero_node
;
1293 const_iv
.step
= NULL_TREE
;
1295 if (TREE_CODE (*cond_p
) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p
))
1299 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1306 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1307 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1310 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1311 iv0
= get_iv (data
, *op0_p
);
1315 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1316 iv1
= get_iv (data
, *op1_p
);
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1327 find_interesting_uses_op (data
, *op0_p
);
1328 find_interesting_uses_op (data
, *op1_p
);
1332 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1334 /* If both are invariants, this is a work for unswitching. */
1338 civ
= xmalloc (sizeof (struct iv
));
1339 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1340 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1347 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1352 if (is_gimple_min_invariant (expr
))
1355 if (TREE_CODE (expr
) == SSA_NAME
)
1357 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1359 && flow_bb_inside_loop_p (loop
, def_bb
))
1368 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1369 for (i
= 0; i
< len
; i
++)
1370 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1380 struct ifs_ivopts_data
1382 struct ivopts_data
*ivopts_data
;
1388 idx_find_step (tree base
, tree
*idx
, void *data
)
1390 struct ifs_ivopts_data
*dta
= data
;
1392 tree step
, iv_step
, lbound
, off
;
1393 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1395 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1399 /* If base is a component ref, require that the offset of the reference
1401 if (TREE_CODE (base
) == COMPONENT_REF
)
1403 off
= component_ref_field_offset (base
);
1404 return expr_invariant_in_loop_p (loop
, off
);
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base
) == ARRAY_REF
)
1413 step
= array_ref_element_size (base
);
1414 lbound
= array_ref_low_bound (base
);
1416 if (!expr_invariant_in_loop_p (loop
, step
)
1417 || !expr_invariant_in_loop_p (loop
, lbound
))
1421 if (TREE_CODE (*idx
) != SSA_NAME
)
1424 iv
= get_iv (dta
->ivopts_data
, *idx
);
1433 if (TREE_CODE (base
) == ARRAY_REF
)
1435 step
= array_ref_element_size (base
);
1437 /* We only handle addresses whose step is an integer constant. */
1438 if (TREE_CODE (step
) != INTEGER_CST
)
1442 /* The step for pointer arithmetics already is 1 byte. */
1443 step
= build_int_cst (sizetype
, 1);
1445 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1446 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1450 /* The index might wrap. */
1454 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1457 *dta
->step_p
= step
;
1459 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1464 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1465 object is passed to it in DATA. */
1468 idx_record_use (tree base
, tree
*idx
,
1471 find_interesting_uses_op (data
, *idx
);
1472 if (TREE_CODE (base
) == ARRAY_REF
)
1474 find_interesting_uses_op (data
, array_ref_element_size (base
));
1475 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1480 /* Returns true if memory reference REF may be unaligned. */
1483 may_be_unaligned_p (tree ref
)
1487 HOST_WIDE_INT bitsize
;
1488 HOST_WIDE_INT bitpos
;
1490 enum machine_mode mode
;
1491 int unsignedp
, volatilep
;
1492 unsigned base_align
;
1494 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1495 thus they are not misaligned. */
1496 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1499 /* The test below is basically copy of what expr.c:normal_inner_ref
1500 does to check whether the object must be loaded by parts when
1501 STRICT_ALIGNMENT is true. */
1502 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1503 &unsignedp
, &volatilep
, true);
1504 base_type
= TREE_TYPE (base
);
1505 base_align
= TYPE_ALIGN (base_type
);
1508 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1509 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1510 || bitpos
% BITS_PER_UNIT
!= 0))
1516 /* Finds addresses in *OP_P inside STMT. */
1519 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1521 tree base
= *op_p
, step
= NULL
;
1523 struct ifs_ivopts_data ifs_ivopts_data
;
1525 /* Do not play with volatile memory references. A bit too conservative,
1526 perhaps, but safe. */
1527 if (stmt_ann (stmt
)->has_volatile_ops
)
1530 /* Ignore bitfields for now. Not really something terribly complicated
1532 if (TREE_CODE (base
) == COMPONENT_REF
1533 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1536 if (STRICT_ALIGNMENT
1537 && may_be_unaligned_p (base
))
1540 base
= unshare_expr (base
);
1542 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1544 tree type
= build_pointer_type (TREE_TYPE (base
));
1548 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1550 civ
= get_iv (data
, TMR_BASE (base
));
1554 TMR_BASE (base
) = civ
->base
;
1557 if (TMR_INDEX (base
)
1558 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1560 civ
= get_iv (data
, TMR_INDEX (base
));
1564 TMR_INDEX (base
) = civ
->base
;
1569 if (TMR_STEP (base
))
1570 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1573 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1581 base
= tree_mem_ref_addr (type
, base
);
1585 ifs_ivopts_data
.ivopts_data
= data
;
1586 ifs_ivopts_data
.stmt
= stmt
;
1587 ifs_ivopts_data
.step_p
= &step
;
1588 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1592 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1593 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1595 base
= build_fold_addr_expr (base
);
1598 civ
= alloc_iv (base
, step
);
1599 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1603 for_each_index (op_p
, idx_record_use
, data
);
1606 /* Finds and records invariants used in STMT. */
1609 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1612 use_operand_p use_p
;
1615 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1617 op
= USE_FROM_PTR (use_p
);
1618 record_invariant (data
, op
, false);
1622 /* Finds interesting uses of induction variables in the statement STMT. */
1625 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1630 use_operand_p use_p
;
1632 find_invariants_stmt (data
, stmt
);
1634 if (TREE_CODE (stmt
) == COND_EXPR
)
1636 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1640 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1642 lhs
= TREE_OPERAND (stmt
, 0);
1643 rhs
= TREE_OPERAND (stmt
, 1);
1645 if (TREE_CODE (lhs
) == SSA_NAME
)
1647 /* If the statement defines an induction variable, the uses are not
1648 interesting by themselves. */
1650 iv
= get_iv (data
, lhs
);
1652 if (iv
&& !zero_p (iv
->step
))
1656 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1658 case tcc_comparison
:
1659 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1663 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1664 if (REFERENCE_CLASS_P (lhs
))
1665 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1671 if (REFERENCE_CLASS_P (lhs
)
1672 && is_gimple_val (rhs
))
1674 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1675 find_interesting_uses_op (data
, rhs
);
1679 /* TODO -- we should also handle address uses of type
1681 memory = call (whatever);
1688 if (TREE_CODE (stmt
) == PHI_NODE
1689 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1691 lhs
= PHI_RESULT (stmt
);
1692 iv
= get_iv (data
, lhs
);
1694 if (iv
&& !zero_p (iv
->step
))
1698 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1700 op
= USE_FROM_PTR (use_p
);
1702 if (TREE_CODE (op
) != SSA_NAME
)
1705 iv
= get_iv (data
, op
);
1709 find_interesting_uses_op (data
, op
);
1713 /* Finds interesting uses of induction variables outside of loops
1714 on loop exit edge EXIT. */
1717 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1721 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1723 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1724 find_interesting_uses_outer (data
, def
);
1728 /* Finds uses of the induction variables that are interesting. */
1731 find_interesting_uses (struct ivopts_data
*data
)
1734 block_stmt_iterator bsi
;
1736 basic_block
*body
= get_loop_body (data
->current_loop
);
1738 struct version_info
*info
;
1741 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1742 fprintf (dump_file
, "Uses:\n\n");
1744 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1749 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1750 if (e
->dest
!= EXIT_BLOCK_PTR
1751 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1752 find_interesting_uses_outside (data
, e
);
1754 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1755 find_interesting_uses_stmt (data
, phi
);
1756 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1757 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1764 fprintf (dump_file
, "\n");
1766 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1768 info
= ver_info (data
, i
);
1771 fprintf (dump_file
, " ");
1772 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1773 fprintf (dump_file
, " is invariant (%d)%s\n",
1774 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1778 fprintf (dump_file
, "\n");
1784 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1785 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1786 we are at the top-level of the processed address. */
1789 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1790 unsigned HOST_WIDE_INT
*offset
)
1792 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1793 enum tree_code code
;
1794 tree type
, orig_type
= TREE_TYPE (expr
);
1795 unsigned HOST_WIDE_INT off0
, off1
, st
;
1796 tree orig_expr
= expr
;
1800 type
= TREE_TYPE (expr
);
1801 code
= TREE_CODE (expr
);
1807 if (!cst_and_fits_in_hwi (expr
)
1811 *offset
= int_cst_value (expr
);
1812 return build_int_cst_type (orig_type
, 0);
1816 op0
= TREE_OPERAND (expr
, 0);
1817 op1
= TREE_OPERAND (expr
, 1);
1819 op0
= strip_offset_1 (op0
, false, false, &off0
);
1820 op1
= strip_offset_1 (op1
, false, false, &off1
);
1822 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1823 if (op0
== TREE_OPERAND (expr
, 0)
1824 && op1
== TREE_OPERAND (expr
, 1))
1829 else if (zero_p (op0
))
1831 if (code
== PLUS_EXPR
)
1834 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1837 expr
= fold_build2 (code
, type
, op0
, op1
);
1839 return fold_convert (orig_type
, expr
);
1845 step
= array_ref_element_size (expr
);
1846 if (!cst_and_fits_in_hwi (step
))
1849 st
= int_cst_value (step
);
1850 op1
= TREE_OPERAND (expr
, 1);
1851 op1
= strip_offset_1 (op1
, false, false, &off1
);
1852 *offset
= off1
* st
;
1857 /* Strip the component reference completely. */
1858 op0
= TREE_OPERAND (expr
, 0);
1859 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1869 tmp
= component_ref_field_offset (expr
);
1871 && cst_and_fits_in_hwi (tmp
))
1873 /* Strip the component reference completely. */
1874 op0
= TREE_OPERAND (expr
, 0);
1875 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1876 *offset
= off0
+ int_cst_value (tmp
);
1882 op0
= TREE_OPERAND (expr
, 0);
1883 op0
= strip_offset_1 (op0
, true, true, &off0
);
1886 if (op0
== TREE_OPERAND (expr
, 0))
1889 expr
= build_fold_addr_expr (op0
);
1890 return fold_convert (orig_type
, expr
);
1893 inside_addr
= false;
1900 /* Default handling of expressions for that we want to recurse into
1901 the first operand. */
1902 op0
= TREE_OPERAND (expr
, 0);
1903 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1906 if (op0
== TREE_OPERAND (expr
, 0)
1907 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1910 expr
= copy_node (expr
);
1911 TREE_OPERAND (expr
, 0) = op0
;
1913 TREE_OPERAND (expr
, 1) = op1
;
1915 /* Inside address, we might strip the top level component references,
1916 thus changing type of the expression. Handling of ADDR_EXPR
1918 expr
= fold_convert (orig_type
, expr
);
1923 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1926 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1928 return strip_offset_1 (expr
, false, false, offset
);
1931 /* Returns variant of TYPE that can be used as base for different uses.
1932 For integer types, we return unsigned variant of the type, which
1933 avoids problems with overflows. For pointer types, we return void *. */
1936 generic_type_for (tree type
)
1938 if (POINTER_TYPE_P (type
))
1939 return ptr_type_node
;
1941 if (TYPE_UNSIGNED (type
))
1944 return unsigned_type_for (type
);
1947 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1948 the bitmap to that we should store it. */
1950 static struct ivopts_data
*fd_ivopts_data
;
1952 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1954 bitmap
*depends_on
= data
;
1955 struct version_info
*info
;
1957 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1959 info
= name_info (fd_ivopts_data
, *expr_p
);
1961 if (!info
->inv_id
|| info
->has_nonlin_use
)
1965 *depends_on
= BITMAP_ALLOC (NULL
);
1966 bitmap_set_bit (*depends_on
, info
->inv_id
);
1971 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1972 position to POS. If USE is not NULL, the candidate is set as related to
1973 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1974 replacement of the final value of the iv by a direct computation. */
1976 static struct iv_cand
*
1977 add_candidate_1 (struct ivopts_data
*data
,
1978 tree base
, tree step
, bool important
, enum iv_position pos
,
1979 struct iv_use
*use
, tree incremented_at
)
1982 struct iv_cand
*cand
= NULL
;
1983 tree type
, orig_type
;
1987 orig_type
= TREE_TYPE (base
);
1988 type
= generic_type_for (orig_type
);
1989 if (type
!= orig_type
)
1991 base
= fold_convert (type
, base
);
1993 step
= fold_convert (type
, step
);
1997 for (i
= 0; i
< n_iv_cands (data
); i
++)
1999 cand
= iv_cand (data
, i
);
2001 if (cand
->pos
!= pos
)
2004 if (cand
->incremented_at
!= incremented_at
)
2018 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
2021 if (zero_p (cand
->iv
->step
))
2028 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2033 if (i
== n_iv_cands (data
))
2035 cand
= xcalloc (1, sizeof (struct iv_cand
));
2041 cand
->iv
= alloc_iv (base
, step
);
2044 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2046 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2047 cand
->var_after
= cand
->var_before
;
2049 cand
->important
= important
;
2050 cand
->incremented_at
= incremented_at
;
2051 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2054 && TREE_CODE (step
) != INTEGER_CST
)
2056 fd_ivopts_data
= data
;
2057 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2061 dump_cand (dump_file
, cand
);
2064 if (important
&& !cand
->important
)
2066 cand
->important
= true;
2067 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2068 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2073 bitmap_set_bit (use
->related_cands
, i
);
2074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2075 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2082 /* Returns true if incrementing the induction variable at the end of the LOOP
2085 The purpose is to avoid splitting latch edge with a biv increment, thus
2086 creating a jump, possibly confusing other optimization passes and leaving
2087 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2088 is not available (so we do not have a better alternative), or if the latch
2089 edge is already nonempty. */
2092 allow_ip_end_pos_p (struct loop
*loop
)
2094 if (!ip_normal_pos (loop
))
2097 if (!empty_block_p (ip_end_pos (loop
)))
2103 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2104 position to POS. If USE is not NULL, the candidate is set as related to
2105 it. The candidate computation is scheduled on all available positions. */
2108 add_candidate (struct ivopts_data
*data
,
2109 tree base
, tree step
, bool important
, struct iv_use
*use
)
2111 if (ip_normal_pos (data
->current_loop
))
2112 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2113 if (ip_end_pos (data
->current_loop
)
2114 && allow_ip_end_pos_p (data
->current_loop
))
2115 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2118 /* Add a standard "0 + 1 * iteration" iv candidate for a
2119 type with SIZE bits. */
2122 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2125 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2126 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2130 /* Adds standard iv candidates. */
2133 add_standard_iv_candidates (struct ivopts_data
*data
)
2135 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2137 /* The same for a double-integer type if it is still fast enough. */
2138 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2139 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2143 /* Adds candidates bases on the old induction variable IV. */
2146 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2149 struct iv_cand
*cand
;
2151 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2153 /* The same, but with initial value zero. */
2154 add_candidate (data
,
2155 build_int_cst (TREE_TYPE (iv
->base
), 0),
2156 iv
->step
, true, NULL
);
2158 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2159 if (TREE_CODE (phi
) == PHI_NODE
)
2161 /* Additionally record the possibility of leaving the original iv
2163 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2164 cand
= add_candidate_1 (data
,
2165 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2166 SSA_NAME_DEF_STMT (def
));
2167 cand
->var_before
= iv
->ssa_name
;
2168 cand
->var_after
= def
;
2172 /* Adds candidates based on the old induction variables. */
2175 add_old_ivs_candidates (struct ivopts_data
*data
)
2181 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2183 iv
= ver_info (data
, i
)->iv
;
2184 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2185 add_old_iv_candidates (data
, iv
);
2189 /* Adds candidates based on the value of the induction variable IV and USE. */
2192 add_iv_value_candidates (struct ivopts_data
*data
,
2193 struct iv
*iv
, struct iv_use
*use
)
2195 unsigned HOST_WIDE_INT offset
;
2198 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2200 /* The same, but with initial value zero. Make such variable important,
2201 since it is generic enough so that possibly many uses may be based
2203 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2204 iv
->step
, true, use
);
2206 /* Third, try removing the constant offset. */
2207 base
= strip_offset (iv
->base
, &offset
);
2209 add_candidate (data
, base
, iv
->step
, false, use
);
2212 /* Possibly adds pseudocandidate for replacing the final value of USE by
2213 a direct computation. */
2216 add_iv_outer_candidates (struct ivopts_data
*data
, struct iv_use
*use
)
2218 struct tree_niter_desc
*niter
;
2220 /* We must know where we exit the loop and how many times does it roll. */
2221 niter
= niter_for_single_dom_exit (data
);
2223 || !zero_p (niter
->may_be_zero
))
2226 add_candidate_1 (data
, NULL
, NULL
, false, IP_NORMAL
, use
, NULL_TREE
);
2229 /* Adds candidates based on the uses. */
2232 add_derived_ivs_candidates (struct ivopts_data
*data
)
2236 for (i
= 0; i
< n_iv_uses (data
); i
++)
2238 struct iv_use
*use
= iv_use (data
, i
);
2245 case USE_NONLINEAR_EXPR
:
2248 /* Just add the ivs based on the value of the iv used here. */
2249 add_iv_value_candidates (data
, use
->iv
, use
);
2253 add_iv_value_candidates (data
, use
->iv
, use
);
2255 /* Additionally, add the pseudocandidate for the possibility to
2256 replace the final value by a direct computation. */
2257 add_iv_outer_candidates (data
, use
);
2266 /* Record important candidates and add them to related_cands bitmaps
2270 record_important_candidates (struct ivopts_data
*data
)
2275 for (i
= 0; i
< n_iv_cands (data
); i
++)
2277 struct iv_cand
*cand
= iv_cand (data
, i
);
2279 if (cand
->important
)
2280 bitmap_set_bit (data
->important_candidates
, i
);
2283 data
->consider_all_candidates
= (n_iv_cands (data
)
2284 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2286 if (data
->consider_all_candidates
)
2288 /* We will not need "related_cands" bitmaps in this case,
2289 so release them to decrease peak memory consumption. */
2290 for (i
= 0; i
< n_iv_uses (data
); i
++)
2292 use
= iv_use (data
, i
);
2293 BITMAP_FREE (use
->related_cands
);
2298 /* Add important candidates to the related_cands bitmaps. */
2299 for (i
= 0; i
< n_iv_uses (data
); i
++)
2300 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2301 data
->important_candidates
);
2305 /* Finds the candidates for the induction variables. */
2308 find_iv_candidates (struct ivopts_data
*data
)
2310 /* Add commonly used ivs. */
2311 add_standard_iv_candidates (data
);
2313 /* Add old induction variables. */
2314 add_old_ivs_candidates (data
);
2316 /* Add induction variables derived from uses. */
2317 add_derived_ivs_candidates (data
);
2319 /* Record the important candidates. */
2320 record_important_candidates (data
);
2323 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2324 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2325 we allocate a simple list to every use. */
2328 alloc_use_cost_map (struct ivopts_data
*data
)
2330 unsigned i
, size
, s
, j
;
2332 for (i
= 0; i
< n_iv_uses (data
); i
++)
2334 struct iv_use
*use
= iv_use (data
, i
);
2337 if (data
->consider_all_candidates
)
2338 size
= n_iv_cands (data
);
2342 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2347 /* Round up to the power of two, so that moduling by it is fast. */
2348 for (size
= 1; size
< s
; size
<<= 1)
2352 use
->n_map_members
= size
;
2353 use
->cost_map
= xcalloc (size
, sizeof (struct cost_pair
));
2357 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2358 on invariants DEPENDS_ON and that the value used in expressing it
2362 set_use_iv_cost (struct ivopts_data
*data
,
2363 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2364 bitmap depends_on
, tree value
)
2370 BITMAP_FREE (depends_on
);
2374 if (data
->consider_all_candidates
)
2376 use
->cost_map
[cand
->id
].cand
= cand
;
2377 use
->cost_map
[cand
->id
].cost
= cost
;
2378 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2379 use
->cost_map
[cand
->id
].value
= value
;
2383 /* n_map_members is a power of two, so this computes modulo. */
2384 s
= cand
->id
& (use
->n_map_members
- 1);
2385 for (i
= s
; i
< use
->n_map_members
; i
++)
2386 if (!use
->cost_map
[i
].cand
)
2388 for (i
= 0; i
< s
; i
++)
2389 if (!use
->cost_map
[i
].cand
)
2395 use
->cost_map
[i
].cand
= cand
;
2396 use
->cost_map
[i
].cost
= cost
;
2397 use
->cost_map
[i
].depends_on
= depends_on
;
2398 use
->cost_map
[i
].value
= value
;
2401 /* Gets cost of (USE, CANDIDATE) pair. */
2403 static struct cost_pair
*
2404 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2405 struct iv_cand
*cand
)
2408 struct cost_pair
*ret
;
2413 if (data
->consider_all_candidates
)
2415 ret
= use
->cost_map
+ cand
->id
;
2422 /* n_map_members is a power of two, so this computes modulo. */
2423 s
= cand
->id
& (use
->n_map_members
- 1);
2424 for (i
= s
; i
< use
->n_map_members
; i
++)
2425 if (use
->cost_map
[i
].cand
== cand
)
2426 return use
->cost_map
+ i
;
2428 for (i
= 0; i
< s
; i
++)
2429 if (use
->cost_map
[i
].cand
== cand
)
2430 return use
->cost_map
+ i
;
2435 /* Returns estimate on cost of computing SEQ. */
2443 for (; seq
; seq
= NEXT_INSN (seq
))
2445 set
= single_set (seq
);
2447 cost
+= rtx_cost (set
, SET
);
2455 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2457 produce_memory_decl_rtl (tree obj
, int *regno
)
2462 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2464 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2465 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2468 x
= gen_raw_REG (Pmode
, (*regno
)++);
2470 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2473 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2474 walk_tree. DATA contains the actual fake register number. */
2477 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2479 tree obj
= NULL_TREE
;
2483 switch (TREE_CODE (*expr_p
))
2486 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2487 handled_component_p (*expr_p
);
2488 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2492 x
= produce_memory_decl_rtl (obj
, regno
);
2497 obj
= SSA_NAME_VAR (*expr_p
);
2498 if (!DECL_RTL_SET_P (obj
))
2499 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2508 if (DECL_RTL_SET_P (obj
))
2511 if (DECL_MODE (obj
) == BLKmode
)
2512 x
= produce_memory_decl_rtl (obj
, regno
);
2514 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2524 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2525 SET_DECL_RTL (obj
, x
);
2531 /* Determines cost of the computation of EXPR. */
2534 computation_cost (tree expr
)
2537 tree type
= TREE_TYPE (expr
);
2539 /* Avoid using hard regs in ways which may be unsupported. */
2540 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2542 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2544 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2548 cost
= seq_cost (seq
);
2550 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2555 /* Returns variable containing the value of candidate CAND at statement AT. */
2558 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2560 if (stmt_after_increment (loop
, cand
, stmt
))
2561 return cand
->var_after
;
2563 return cand
->var_before
;
2566 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2567 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2570 tree_int_cst_sign_bit (tree t
)
2572 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2573 unsigned HOST_WIDE_INT w
;
2575 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2576 w
= TREE_INT_CST_LOW (t
);
2579 w
= TREE_INT_CST_HIGH (t
);
2580 bitno
-= HOST_BITS_PER_WIDE_INT
;
2583 return (w
>> bitno
) & 1;
2586 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2587 return cst. Otherwise return NULL_TREE. */
2590 constant_multiple_of (tree type
, tree top
, tree bot
)
2592 tree res
, mby
, p0
, p1
;
2593 enum tree_code code
;
2599 if (operand_equal_p (top
, bot
, 0))
2600 return build_int_cst (type
, 1);
2602 code
= TREE_CODE (top
);
2606 mby
= TREE_OPERAND (top
, 1);
2607 if (TREE_CODE (mby
) != INTEGER_CST
)
2610 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2614 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2615 fold_convert (type
, mby
));
2619 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2622 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2626 return fold_binary_to_constant (code
, type
, p0
, p1
);
2629 if (TREE_CODE (bot
) != INTEGER_CST
)
2632 bot
= fold_convert (type
, bot
);
2633 top
= fold_convert (type
, top
);
2635 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2636 the result afterwards. */
2637 if (tree_int_cst_sign_bit (bot
))
2640 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2645 /* Ditto for TOP. */
2646 if (tree_int_cst_sign_bit (top
))
2649 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2652 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2655 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2657 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2665 /* Sets COMB to CST. */
2668 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2669 unsigned HOST_WIDE_INT cst
)
2671 unsigned prec
= TYPE_PRECISION (type
);
2674 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2677 comb
->rest
= NULL_TREE
;
2678 comb
->offset
= cst
& comb
->mask
;
2681 /* Sets COMB to single element ELT. */
2684 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2686 unsigned prec
= TYPE_PRECISION (type
);
2689 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2692 comb
->elts
[0] = elt
;
2694 comb
->rest
= NULL_TREE
;
2698 /* Scales COMB by SCALE. */
2701 aff_combination_scale (struct affine_tree_combination
*comb
,
2702 unsigned HOST_WIDE_INT scale
)
2711 aff_combination_const (comb
, comb
->type
, 0);
2715 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2716 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2718 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2719 comb
->elts
[j
] = comb
->elts
[i
];
2720 if (comb
->coefs
[j
] != 0)
2727 if (comb
->n
< MAX_AFF_ELTS
)
2729 comb
->coefs
[comb
->n
] = scale
;
2730 comb
->elts
[comb
->n
] = comb
->rest
;
2731 comb
->rest
= NULL_TREE
;
2735 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2736 build_int_cst_type (comb
->type
, scale
));
2740 /* Adds ELT * SCALE to COMB. */
2743 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2744 unsigned HOST_WIDE_INT scale
)
2751 for (i
= 0; i
< comb
->n
; i
++)
2752 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2754 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2759 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2760 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2763 if (comb
->n
< MAX_AFF_ELTS
)
2765 comb
->coefs
[comb
->n
] = scale
;
2766 comb
->elts
[comb
->n
] = elt
;
2772 elt
= fold_convert (comb
->type
, elt
);
2774 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2775 fold_convert (comb
->type
, elt
),
2776 build_int_cst_type (comb
->type
, scale
));
2779 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2784 /* Adds COMB2 to COMB1. */
2787 aff_combination_add (struct affine_tree_combination
*comb1
,
2788 struct affine_tree_combination
*comb2
)
2792 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2793 for (i
= 0; i
< comb2
-> n
; i
++)
2794 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2796 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2799 /* Splits EXPR into an affine combination of parts. */
2802 tree_to_aff_combination (tree expr
, tree type
,
2803 struct affine_tree_combination
*comb
)
2805 struct affine_tree_combination tmp
;
2806 enum tree_code code
;
2807 tree cst
, core
, toffset
;
2808 HOST_WIDE_INT bitpos
, bitsize
;
2809 enum machine_mode mode
;
2810 int unsignedp
, volatilep
;
2814 code
= TREE_CODE (expr
);
2818 aff_combination_const (comb
, type
, int_cst_value (expr
));
2823 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2824 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2825 if (code
== MINUS_EXPR
)
2826 aff_combination_scale (&tmp
, -1);
2827 aff_combination_add (comb
, &tmp
);
2831 cst
= TREE_OPERAND (expr
, 1);
2832 if (TREE_CODE (cst
) != INTEGER_CST
)
2834 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2835 aff_combination_scale (comb
, int_cst_value (cst
));
2839 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2840 aff_combination_scale (comb
, -1);
2844 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2845 &toffset
, &mode
, &unsignedp
, &volatilep
,
2847 if (bitpos
% BITS_PER_UNIT
!= 0)
2849 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2850 core
= build_fold_addr_expr (core
);
2851 if (TREE_CODE (core
) == ADDR_EXPR
)
2852 aff_combination_add_elt (comb
, core
, 1);
2855 tree_to_aff_combination (core
, type
, &tmp
);
2856 aff_combination_add (comb
, &tmp
);
2860 tree_to_aff_combination (toffset
, type
, &tmp
);
2861 aff_combination_add (comb
, &tmp
);
2869 aff_combination_elt (comb
, type
, expr
);
2872 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2875 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2876 unsigned HOST_WIDE_INT mask
)
2878 enum tree_code code
;
2881 elt
= fold_convert (type
, elt
);
2888 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2894 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2896 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2900 return fold_build2 (MULT_EXPR
, type
, elt
,
2901 build_int_cst_type (type
, scale
));
2903 if ((scale
| (mask
>> 1)) == mask
)
2905 /* Scale is negative. */
2907 scale
= (-scale
) & mask
;
2912 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2913 build_int_cst_type (type
, scale
));
2914 return fold_build2 (code
, type
, expr
, elt
);
2917 /* Copies the tree elements of COMB to ensure that they are not shared. */
2920 unshare_aff_combination (struct affine_tree_combination
*comb
)
2924 for (i
= 0; i
< comb
->n
; i
++)
2925 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2927 comb
->rest
= unshare_expr (comb
->rest
);
2930 /* Makes tree from the affine combination COMB. */
2933 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2935 tree type
= comb
->type
;
2936 tree expr
= comb
->rest
;
2938 unsigned HOST_WIDE_INT off
, sgn
;
2940 /* Handle the special case produced by get_computation_aff when
2941 the type does not fit in HOST_WIDE_INT. */
2942 if (comb
->n
== 0 && comb
->offset
== 0)
2943 return fold_convert (type
, expr
);
2945 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2947 for (i
= 0; i
< comb
->n
; i
++)
2948 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2951 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2953 /* Offset is negative. */
2954 off
= (-comb
->offset
) & comb
->mask
;
2962 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2966 /* Determines the expression by that USE is expressed from induction variable
2967 CAND at statement AT in LOOP. The expression is stored in a decomposed
2968 form into AFF. Returns false if USE cannot be expressed using CAND. */
2971 get_computation_aff (struct loop
*loop
,
2972 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2973 struct affine_tree_combination
*aff
)
2975 tree ubase
= use
->iv
->base
;
2976 tree ustep
= use
->iv
->step
;
2977 tree cbase
= cand
->iv
->base
;
2978 tree cstep
= cand
->iv
->step
;
2979 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2983 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2984 HOST_WIDE_INT ratioi
;
2985 struct affine_tree_combination cbase_aff
, expr_aff
;
2987 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2989 /* We do not have a precision to express the values of use. */
2993 expr
= var_at_stmt (loop
, cand
, at
);
2995 if (TREE_TYPE (expr
) != ctype
)
2997 /* This may happen with the original ivs. */
2998 expr
= fold_convert (ctype
, expr
);
3001 if (TYPE_UNSIGNED (utype
))
3005 uutype
= unsigned_type_for (utype
);
3006 ubase
= fold_convert (uutype
, ubase
);
3007 ustep
= fold_convert (uutype
, ustep
);
3010 if (uutype
!= ctype
)
3012 expr
= fold_convert (uutype
, expr
);
3013 cbase
= fold_convert (uutype
, cbase
);
3014 cstep
= fold_convert (uutype
, cstep
);
3017 if (cst_and_fits_in_hwi (cstep
)
3018 && cst_and_fits_in_hwi (ustep
))
3020 ustepi
= int_cst_value (ustep
);
3021 cstepi
= int_cst_value (cstep
);
3023 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
3025 /* TODO maybe consider case when ustep divides cstep and the ratio is
3026 a power of 2 (so that the division is fast to execute)? We would
3027 need to be much more careful with overflows etc. then. */
3031 ratio
= build_int_cst_type (uutype
, ratioi
);
3035 ratio
= constant_multiple_of (uutype
, ustep
, cstep
);
3039 /* Ratioi is only used to detect special cases when the multiplicative
3040 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3041 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3042 to integer_onep/integer_all_onesp, since the former ignores
3044 if (cst_and_fits_in_hwi (ratio
))
3045 ratioi
= int_cst_value (ratio
);
3046 else if (integer_onep (ratio
))
3048 else if (integer_all_onesp (ratio
))
3054 /* We may need to shift the value if we are after the increment. */
3055 if (stmt_after_increment (loop
, cand
, at
))
3056 cbase
= fold (build2 (PLUS_EXPR
, uutype
, cbase
, cstep
));
3058 /* use = ubase - ratio * cbase + ratio * var.
3060 In general case ubase + ratio * (var - cbase) could be better (one less
3061 multiplication), but often it is possible to eliminate redundant parts
3062 of computations from (ubase - ratio * cbase) term, and if it does not
3063 happen, fold is able to apply the distributive law to obtain this form
3066 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3068 /* Let's compute in trees and just return the result in AFF. This case
3069 should not be very common, and fold itself is not that bad either,
3070 so making the aff. functions more complicated to handle this case
3071 is not that urgent. */
3074 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3075 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3077 else if (ratioi
== -1)
3079 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3080 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3084 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3085 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3086 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3087 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3098 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3099 possible to compute ratioi. */
3100 gcc_assert (ratioi
);
3102 tree_to_aff_combination (ubase
, uutype
, aff
);
3103 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3104 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3105 aff_combination_scale (&cbase_aff
, -ratioi
);
3106 aff_combination_scale (&expr_aff
, ratioi
);
3107 aff_combination_add (aff
, &cbase_aff
);
3108 aff_combination_add (aff
, &expr_aff
);
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND at statement AT in LOOP. The computation is unshared. */
3117 get_computation_at (struct loop
*loop
,
3118 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3120 struct affine_tree_combination aff
;
3121 tree type
= TREE_TYPE (use
->iv
->base
);
3123 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3125 unshare_aff_combination (&aff
);
3126 return fold_convert (type
, aff_combination_to_tree (&aff
));
3129 /* Determines the expression by that USE is expressed from induction variable
3130 CAND in LOOP. The computation is unshared. */
3133 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3135 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3138 /* Returns cost of addition in MODE. */
3141 add_cost (enum machine_mode mode
)
3143 static unsigned costs
[NUM_MACHINE_MODES
];
3151 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3152 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3153 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3158 cost
= seq_cost (seq
);
3164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3165 fprintf (dump_file
, "Addition in %s costs %d\n",
3166 GET_MODE_NAME (mode
), cost
);
3170 /* Entry in a hashtable of already known costs for multiplication. */
3173 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3174 enum machine_mode mode
; /* In mode. */
3175 unsigned cost
; /* The cost. */
3178 /* Counts hash value for the ENTRY. */
3181 mbc_entry_hash (const void *entry
)
3183 const struct mbc_entry
*e
= entry
;
3185 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3188 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3191 mbc_entry_eq (const void *entry1
, const void *entry2
)
3193 const struct mbc_entry
*e1
= entry1
;
3194 const struct mbc_entry
*e2
= entry2
;
3196 return (e1
->mode
== e2
->mode
3197 && e1
->cst
== e2
->cst
);
3200 /* Returns cost of multiplication by constant CST in MODE. */
3203 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3205 static htab_t costs
;
3206 struct mbc_entry
**cached
, act
;
3211 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3215 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3217 return (*cached
)->cost
;
3219 *cached
= xmalloc (sizeof (struct mbc_entry
));
3220 (*cached
)->mode
= mode
;
3221 (*cached
)->cst
= cst
;
3224 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3225 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3229 cost
= seq_cost (seq
);
3231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3232 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3233 (int) cst
, GET_MODE_NAME (mode
), cost
);
3235 (*cached
)->cost
= cost
;
3240 /* Returns true if multiplying by RATIO is allowed in address. */
3243 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3245 #define MAX_RATIO 128
3246 static sbitmap valid_mult
;
3250 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3254 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3255 sbitmap_zero (valid_mult
);
3256 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3257 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3259 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3260 if (memory_address_p (Pmode
, addr
))
3261 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3266 fprintf (dump_file
, " allowed multipliers:");
3267 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3268 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3269 fprintf (dump_file
, " %d", (int) i
);
3270 fprintf (dump_file
, "\n");
3271 fprintf (dump_file
, "\n");
3275 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3278 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3281 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3282 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3283 variable is omitted. The created memory accesses MODE.
3285 TODO -- there must be some better way. This all is quite crude. */
3288 get_address_cost (bool symbol_present
, bool var_present
,
3289 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3291 static bool initialized
= false;
3292 static HOST_WIDE_INT rat
, off
;
3293 static HOST_WIDE_INT min_offset
, max_offset
;
3294 static unsigned costs
[2][2][2][2];
3295 unsigned cost
, acost
;
3296 rtx seq
, addr
, base
;
3297 bool offset_p
, ratio_p
;
3299 HOST_WIDE_INT s_offset
;
3300 unsigned HOST_WIDE_INT mask
;
3308 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3310 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3311 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3313 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3314 if (!memory_address_p (Pmode
, addr
))
3317 max_offset
= i
>> 1;
3320 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3322 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3323 if (!memory_address_p (Pmode
, addr
))
3326 min_offset
= -(i
>> 1);
3328 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3330 fprintf (dump_file
, "get_address_cost:\n");
3331 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3332 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3336 for (i
= 2; i
<= MAX_RATIO
; i
++)
3337 if (multiplier_allowed_in_address_p (i
))
3344 bits
= GET_MODE_BITSIZE (Pmode
);
3345 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3347 if ((offset
>> (bits
- 1) & 1))
3352 offset_p
= (s_offset
!= 0
3353 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3354 ratio_p
= (ratio
!= 1
3355 && multiplier_allowed_in_address_p (ratio
));
3357 if (ratio
!= 1 && !ratio_p
)
3358 cost
+= multiply_by_cost (ratio
, Pmode
);
3360 if (s_offset
&& !offset_p
&& !symbol_present
)
3362 cost
+= add_cost (Pmode
);
3366 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3371 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3372 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3374 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3377 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3381 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3383 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3384 gen_rtx_fmt_ee (PLUS
, Pmode
,
3386 gen_int_mode (off
, Pmode
)));
3389 base
= gen_int_mode (off
, Pmode
);
3394 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3397 addr
= memory_address (Pmode
, addr
);
3401 acost
= seq_cost (seq
);
3402 acost
+= address_cost (addr
, Pmode
);
3406 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3409 return cost
+ acost
;
3411 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3412 invariants the computation depends on. */
3415 force_var_cost (struct ivopts_data
*data
,
3416 tree expr
, bitmap
*depends_on
)
3418 static bool costs_initialized
= false;
3419 static unsigned integer_cost
;
3420 static unsigned symbol_cost
;
3421 static unsigned address_cost
;
3423 unsigned cost0
, cost1
, cost
;
3424 enum machine_mode mode
;
3426 if (!costs_initialized
)
3428 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3429 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3430 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3432 tree type
= build_pointer_type (integer_type_node
);
3434 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3437 SET_DECL_RTL (var
, x
);
3438 TREE_STATIC (var
) = 1;
3439 addr
= build1 (ADDR_EXPR
, type
, var
);
3440 symbol_cost
= computation_cost (addr
) + 1;
3443 = computation_cost (build2 (PLUS_EXPR
, type
,
3445 build_int_cst_type (type
, 2000))) + 1;
3446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3448 fprintf (dump_file
, "force_var_cost:\n");
3449 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3450 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3451 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3452 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3453 fprintf (dump_file
, "\n");
3456 costs_initialized
= true;
3463 fd_ivopts_data
= data
;
3464 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3467 if (SSA_VAR_P (expr
))
3470 if (TREE_INVARIANT (expr
))
3472 if (TREE_CODE (expr
) == INTEGER_CST
)
3473 return integer_cost
;
3475 if (TREE_CODE (expr
) == ADDR_EXPR
)
3477 tree obj
= TREE_OPERAND (expr
, 0);
3479 if (TREE_CODE (obj
) == VAR_DECL
3480 || TREE_CODE (obj
) == PARM_DECL
3481 || TREE_CODE (obj
) == RESULT_DECL
)
3485 return address_cost
;
3488 switch (TREE_CODE (expr
))
3493 op0
= TREE_OPERAND (expr
, 0);
3494 op1
= TREE_OPERAND (expr
, 1);
3498 if (is_gimple_val (op0
))
3501 cost0
= force_var_cost (data
, op0
, NULL
);
3503 if (is_gimple_val (op1
))
3506 cost1
= force_var_cost (data
, op1
, NULL
);
3511 /* Just an arbitrary value, FIXME. */
3512 return target_spill_cost
;
3515 mode
= TYPE_MODE (TREE_TYPE (expr
));
3516 switch (TREE_CODE (expr
))
3520 cost
= add_cost (mode
);
3524 if (cst_and_fits_in_hwi (op0
))
3525 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3526 else if (cst_and_fits_in_hwi (op1
))
3527 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3529 return target_spill_cost
;
3539 /* Bound the cost by target_spill_cost. The parts of complicated
3540 computations often are either loop invariant or at least can
3541 be shared between several iv uses, so letting this grow without
3542 limits would not give reasonable results. */
3543 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3546 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3547 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3549 invariants the computation depends on. */
3552 split_address_cost (struct ivopts_data
*data
,
3553 tree addr
, bool *symbol_present
, bool *var_present
,
3554 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3557 HOST_WIDE_INT bitsize
;
3558 HOST_WIDE_INT bitpos
;
3560 enum machine_mode mode
;
3561 int unsignedp
, volatilep
;
3563 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3564 &unsignedp
, &volatilep
, false);
3567 || bitpos
% BITS_PER_UNIT
!= 0
3568 || TREE_CODE (core
) != VAR_DECL
)
3570 *symbol_present
= false;
3571 *var_present
= true;
3572 fd_ivopts_data
= data
;
3573 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3574 return target_spill_cost
;
3577 *offset
+= bitpos
/ BITS_PER_UNIT
;
3578 if (TREE_STATIC (core
)
3579 || DECL_EXTERNAL (core
))
3581 *symbol_present
= true;
3582 *var_present
= false;
3586 *symbol_present
= false;
3587 *var_present
= true;
3591 /* Estimates cost of expressing difference of addresses E1 - E2 as
3592 var + symbol + offset. The value of offset is added to OFFSET,
3593 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594 part is missing. DEPENDS_ON is a set of the invariants the computation
3598 ptr_difference_cost (struct ivopts_data
*data
,
3599 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3600 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3602 HOST_WIDE_INT diff
= 0;
3605 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3607 if (ptr_difference_const (e1
, e2
, &diff
))
3610 *symbol_present
= false;
3611 *var_present
= false;
3615 if (e2
== integer_zero_node
)
3616 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3617 symbol_present
, var_present
, offset
, depends_on
);
3619 *symbol_present
= false;
3620 *var_present
= true;
3622 cost
= force_var_cost (data
, e1
, depends_on
);
3623 cost
+= force_var_cost (data
, e2
, depends_on
);
3624 cost
+= add_cost (Pmode
);
3629 /* Estimates cost of expressing difference E1 - E2 as
3630 var + symbol + offset. The value of offset is added to OFFSET,
3631 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3632 part is missing. DEPENDS_ON is a set of the invariants the computation
3636 difference_cost (struct ivopts_data
*data
,
3637 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3638 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3641 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3642 unsigned HOST_WIDE_INT off1
, off2
;
3644 e1
= strip_offset (e1
, &off1
);
3645 e2
= strip_offset (e2
, &off2
);
3646 *offset
+= off1
- off2
;
3651 if (TREE_CODE (e1
) == ADDR_EXPR
)
3652 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3654 *symbol_present
= false;
3656 if (operand_equal_p (e1
, e2
, 0))
3658 *var_present
= false;
3661 *var_present
= true;
3663 return force_var_cost (data
, e1
, depends_on
);
3667 cost
= force_var_cost (data
, e2
, depends_on
);
3668 cost
+= multiply_by_cost (-1, mode
);
3673 cost
= force_var_cost (data
, e1
, depends_on
);
3674 cost
+= force_var_cost (data
, e2
, depends_on
);
3675 cost
+= add_cost (mode
);
3680 /* Determines the cost of the computation by that USE is expressed
3681 from induction variable CAND. If ADDRESS_P is true, we just need
3682 to create an address from it, otherwise we want to get it into
3683 register. A set of invariants we depend on is stored in
3684 DEPENDS_ON. AT is the statement at that the value is computed. */
3687 get_computation_cost_at (struct ivopts_data
*data
,
3688 struct iv_use
*use
, struct iv_cand
*cand
,
3689 bool address_p
, bitmap
*depends_on
, tree at
)
3691 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3693 tree utype
= TREE_TYPE (ubase
), ctype
;
3694 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3695 HOST_WIDE_INT ratio
, aratio
;
3696 bool var_present
, symbol_present
;
3697 unsigned cost
= 0, n_sums
;
3701 /* Only consider real candidates. */
3705 cbase
= cand
->iv
->base
;
3706 cstep
= cand
->iv
->step
;
3707 ctype
= TREE_TYPE (cbase
);
3709 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3711 /* We do not have a precision to express the values of use. */
3717 /* Do not try to express address of an object with computation based
3718 on address of a different object. This may cause problems in rtl
3719 level alias analysis (that does not expect this to be happening,
3720 as this is illegal in C), and would be unlikely to be useful
3722 if (use
->iv
->base_object
3723 && cand
->iv
->base_object
3724 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3728 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3730 /* TODO -- add direct handling of this case. */
3734 /* CSTEPI is removed from the offset in case statement is after the
3735 increment. If the step is not constant, we use zero instead.
3736 This is a bit imprecise (there is the extra addition), but
3737 redundancy elimination is likely to transform the code so that
3738 it uses value of the variable before increment anyway,
3739 so it is not that much unrealistic. */
3740 if (cst_and_fits_in_hwi (cstep
))
3741 cstepi
= int_cst_value (cstep
);
3745 if (cst_and_fits_in_hwi (ustep
)
3746 && cst_and_fits_in_hwi (cstep
))
3748 ustepi
= int_cst_value (ustep
);
3750 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3757 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3762 if (cst_and_fits_in_hwi (rat
))
3763 ratio
= int_cst_value (rat
);
3764 else if (integer_onep (rat
))
3766 else if (integer_all_onesp (rat
))
3772 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3773 or ratio == 1, it is better to handle this like
3775 ubase - ratio * cbase + ratio * var
3777 (also holds in the case ratio == -1, TODO. */
3779 if (cst_and_fits_in_hwi (cbase
))
3781 offset
= - ratio
* int_cst_value (cbase
);
3782 cost
+= difference_cost (data
,
3783 ubase
, integer_zero_node
,
3784 &symbol_present
, &var_present
, &offset
,
3787 else if (ratio
== 1)
3789 cost
+= difference_cost (data
,
3791 &symbol_present
, &var_present
, &offset
,
3796 cost
+= force_var_cost (data
, cbase
, depends_on
);
3797 cost
+= add_cost (TYPE_MODE (ctype
));
3798 cost
+= difference_cost (data
,
3799 ubase
, integer_zero_node
,
3800 &symbol_present
, &var_present
, &offset
,
3804 /* If we are after the increment, the value of the candidate is higher by
3806 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3807 offset
-= ratio
* cstepi
;
3809 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3810 (symbol/var/const parts may be omitted). If we are looking for an address,
3811 find the cost of addressing this. */
3813 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3815 /* Otherwise estimate the costs for computing the expression. */
3816 aratio
= ratio
> 0 ? ratio
: -ratio
;
3817 if (!symbol_present
&& !var_present
&& !offset
)
3820 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3826 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3830 /* Symbol + offset should be compile-time computable. */
3831 && (symbol_present
|| offset
))
3834 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3838 /* Just get the expression, expand it and measure the cost. */
3839 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3845 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3847 return computation_cost (comp
);
3851 /* Determines the cost of the computation by that USE is expressed
3852 from induction variable CAND. If ADDRESS_P is true, we just need
3853 to create an address from it, otherwise we want to get it into
3854 register. A set of invariants we depend on is stored in
3858 get_computation_cost (struct ivopts_data
*data
,
3859 struct iv_use
*use
, struct iv_cand
*cand
,
3860 bool address_p
, bitmap
*depends_on
)
3862 return get_computation_cost_at (data
,
3863 use
, cand
, address_p
, depends_on
, use
->stmt
);
3866 /* Determines cost of basing replacement of USE on CAND in a generic
3870 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3871 struct iv_use
*use
, struct iv_cand
*cand
)
3876 /* The simple case first -- if we need to express value of the preserved
3877 original biv, the cost is 0. This also prevents us from counting the
3878 cost of increment twice -- once at this use and once in the cost of
3880 if (cand
->pos
== IP_ORIGINAL
3881 && cand
->incremented_at
== use
->stmt
)
3883 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3887 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3888 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3890 return cost
!= INFTY
;
3893 /* Determines cost of basing replacement of USE on CAND in an address. */
3896 determine_use_iv_cost_address (struct ivopts_data
*data
,
3897 struct iv_use
*use
, struct iv_cand
*cand
)
3900 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3902 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3904 return cost
!= INFTY
;
3907 /* Computes value of induction variable IV in iteration NITER. */
3910 iv_value (struct iv
*iv
, tree niter
)
3913 tree type
= TREE_TYPE (iv
->base
);
3915 niter
= fold_convert (type
, niter
);
3916 val
= fold (build2 (MULT_EXPR
, type
, iv
->step
, niter
));
3918 return fold (build2 (PLUS_EXPR
, type
, iv
->base
, val
));
3921 /* Computes value of candidate CAND at position AT in iteration NITER. */
3924 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3926 tree val
= iv_value (cand
->iv
, niter
);
3927 tree type
= TREE_TYPE (cand
->iv
->base
);
3929 if (stmt_after_increment (loop
, cand
, at
))
3930 val
= fold (build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
));
3935 /* Returns period of induction variable iv. */
3938 iv_period (struct iv
*iv
)
3940 tree step
= iv
->step
, period
, type
;
3943 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3945 /* Period of the iv is gcd (step, type range). Since type range is power
3946 of two, it suffices to determine the maximum power of two that divides
3948 pow2div
= num_ending_zeros (step
);
3949 type
= unsigned_type_for (TREE_TYPE (step
));
3951 period
= build_low_bits_mask (type
,
3952 (TYPE_PRECISION (type
)
3953 - tree_low_cst (pow2div
, 1)));
3958 /* Returns the comparison operator used when eliminating the iv USE. */
3960 static enum tree_code
3961 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3963 struct loop
*loop
= data
->current_loop
;
3967 ex_bb
= bb_for_stmt (use
->stmt
);
3968 exit
= EDGE_SUCC (ex_bb
, 0);
3969 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3970 exit
= EDGE_SUCC (ex_bb
, 1);
3972 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3975 /* Check whether it is possible to express the condition in USE by comparison
3976 of candidate CAND. If so, store the value compared with to BOUND. */
3979 may_eliminate_iv (struct ivopts_data
*data
,
3980 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3984 struct tree_niter_desc
*niter
;
3986 tree wider_type
, period
, per_type
;
3987 struct loop
*loop
= data
->current_loop
;
3989 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3992 /* For now works only for exits that dominate the loop latch. TODO -- extend
3993 for other conditions inside loop body. */
3994 ex_bb
= bb_for_stmt (use
->stmt
);
3995 if (use
->stmt
!= last_stmt (ex_bb
)
3996 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3998 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4001 exit
= EDGE_SUCC (ex_bb
, 0);
4002 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4003 exit
= EDGE_SUCC (ex_bb
, 1);
4004 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4007 niter
= niter_for_exit (data
, exit
);
4009 || !zero_p (niter
->may_be_zero
))
4013 nit_type
= TREE_TYPE (nit
);
4015 /* Determine whether we may use the variable to test whether niter iterations
4016 elapsed. This is the case iff the period of the induction variable is
4017 greater than the number of iterations. */
4018 period
= iv_period (cand
->iv
);
4021 per_type
= TREE_TYPE (period
);
4023 wider_type
= TREE_TYPE (period
);
4024 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4025 wider_type
= per_type
;
4027 wider_type
= nit_type
;
4029 if (!integer_nonzerop (fold (build2 (GE_EXPR
, boolean_type_node
,
4030 fold_convert (wider_type
, period
),
4031 fold_convert (wider_type
, nit
)))))
4034 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
4038 /* Determines cost of basing replacement of USE on CAND in a condition. */
4041 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4042 struct iv_use
*use
, struct iv_cand
*cand
)
4044 tree bound
= NULL_TREE
, op
, cond
;
4045 bitmap depends_on
= NULL
;
4048 /* Only consider real candidates. */
4051 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4055 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4057 cost
= force_var_cost (data
, bound
, &depends_on
);
4059 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4060 return cost
!= INFTY
;
4063 /* The induction variable elimination failed; just express the original
4064 giv. If it is compared with an invariant, note that we cannot get
4066 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4069 if (TREE_CODE (cond
) != SSA_NAME
)
4071 op
= TREE_OPERAND (cond
, 0);
4072 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4073 op
= TREE_OPERAND (cond
, 1);
4074 if (TREE_CODE (op
) == SSA_NAME
)
4076 op
= get_iv (data
, op
)->base
;
4077 fd_ivopts_data
= data
;
4078 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4082 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4083 return cost
!= INFTY
;
4086 /* Checks whether it is possible to replace the final value of USE by
4087 a direct computation. If so, the formula is stored to *VALUE. */
4090 may_replace_final_value (struct ivopts_data
*data
, struct iv_use
*use
,
4093 struct loop
*loop
= data
->current_loop
;
4095 struct tree_niter_desc
*niter
;
4097 exit
= single_dom_exit (loop
);
4101 gcc_assert (dominated_by_p (CDI_DOMINATORS
, exit
->src
,
4102 bb_for_stmt (use
->stmt
)));
4104 niter
= niter_for_single_dom_exit (data
);
4106 || !zero_p (niter
->may_be_zero
))
4109 *value
= iv_value (use
->iv
, niter
->niter
);
4114 /* Determines cost of replacing final value of USE using CAND. */
4117 determine_use_iv_cost_outer (struct ivopts_data
*data
,
4118 struct iv_use
*use
, struct iv_cand
*cand
)
4123 tree value
= NULL_TREE
;
4124 struct loop
*loop
= data
->current_loop
;
4126 /* The simple case first -- if we need to express value of the preserved
4127 original biv, the cost is 0. This also prevents us from counting the
4128 cost of increment twice -- once at this use and once in the cost of
4130 if (cand
->pos
== IP_ORIGINAL
4131 && cand
->incremented_at
== use
->stmt
)
4133 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
4139 if (!may_replace_final_value (data
, use
, &value
))
4141 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4146 cost
= force_var_cost (data
, value
, &depends_on
);
4148 cost
/= AVG_LOOP_NITER (loop
);
4150 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, value
);
4151 return cost
!= INFTY
;
4154 exit
= single_dom_exit (loop
);
4157 /* If there is just a single exit, we may use value of the candidate
4158 after we take it to determine the value of use. */
4159 cost
= get_computation_cost_at (data
, use
, cand
, false, &depends_on
,
4160 last_stmt (exit
->src
));
4162 cost
/= AVG_LOOP_NITER (loop
);
4166 /* Otherwise we just need to compute the iv. */
4167 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4170 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4172 return cost
!= INFTY
;
4175 /* Determines cost of basing replacement of USE on CAND. Returns false
4176 if USE cannot be based on CAND. */
4179 determine_use_iv_cost (struct ivopts_data
*data
,
4180 struct iv_use
*use
, struct iv_cand
*cand
)
4184 case USE_NONLINEAR_EXPR
:
4185 return determine_use_iv_cost_generic (data
, use
, cand
);
4188 return determine_use_iv_cost_outer (data
, use
, cand
);
4191 return determine_use_iv_cost_address (data
, use
, cand
);
4194 return determine_use_iv_cost_condition (data
, use
, cand
);
4201 /* Determines costs of basing the use of the iv on an iv candidate. */
4204 determine_use_iv_costs (struct ivopts_data
*data
)
4208 struct iv_cand
*cand
;
4209 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4211 alloc_use_cost_map (data
);
4213 for (i
= 0; i
< n_iv_uses (data
); i
++)
4215 use
= iv_use (data
, i
);
4217 if (data
->consider_all_candidates
)
4219 for (j
= 0; j
< n_iv_cands (data
); j
++)
4221 cand
= iv_cand (data
, j
);
4222 determine_use_iv_cost (data
, use
, cand
);
4229 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4231 cand
= iv_cand (data
, j
);
4232 if (!determine_use_iv_cost (data
, use
, cand
))
4233 bitmap_set_bit (to_clear
, j
);
4236 /* Remove the candidates for that the cost is infinite from
4237 the list of related candidates. */
4238 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4239 bitmap_clear (to_clear
);
4243 BITMAP_FREE (to_clear
);
4245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4247 fprintf (dump_file
, "Use-candidate costs:\n");
4249 for (i
= 0; i
< n_iv_uses (data
); i
++)
4251 use
= iv_use (data
, i
);
4253 fprintf (dump_file
, "Use %d:\n", i
);
4254 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4255 for (j
= 0; j
< use
->n_map_members
; j
++)
4257 if (!use
->cost_map
[j
].cand
4258 || use
->cost_map
[j
].cost
== INFTY
)
4261 fprintf (dump_file
, " %d\t%d\t",
4262 use
->cost_map
[j
].cand
->id
,
4263 use
->cost_map
[j
].cost
);
4264 if (use
->cost_map
[j
].depends_on
)
4265 bitmap_print (dump_file
,
4266 use
->cost_map
[j
].depends_on
, "","");
4267 fprintf (dump_file
, "\n");
4270 fprintf (dump_file
, "\n");
4272 fprintf (dump_file
, "\n");
4276 /* Determines cost of the candidate CAND. */
4279 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4281 unsigned cost_base
, cost_step
;
4290 /* There are two costs associated with the candidate -- its increment
4291 and its initialization. The second is almost negligible for any loop
4292 that rolls enough, so we take it just very little into account. */
4294 base
= cand
->iv
->base
;
4295 cost_base
= force_var_cost (data
, base
, NULL
);
4296 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4298 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4300 /* Prefer the original iv unless we may gain something by replacing it;
4301 this is not really relevant for artificial ivs created by other
4303 if (cand
->pos
== IP_ORIGINAL
4304 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4307 /* Prefer not to insert statements into latch unless there are some
4308 already (so that we do not create unnecessary jumps). */
4309 if (cand
->pos
== IP_END
4310 && empty_block_p (ip_end_pos (data
->current_loop
)))
4314 /* Determines costs of computation of the candidates. */
4317 determine_iv_costs (struct ivopts_data
*data
)
4321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4323 fprintf (dump_file
, "Candidate costs:\n");
4324 fprintf (dump_file
, " cand\tcost\n");
4327 for (i
= 0; i
< n_iv_cands (data
); i
++)
4329 struct iv_cand
*cand
= iv_cand (data
, i
);
4331 determine_iv_cost (data
, cand
);
4333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4334 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4338 fprintf (dump_file
, "\n");
4341 /* Calculates cost for having SIZE induction variables. */
4344 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4346 return global_cost_for_size (size
,
4347 loop_data (data
->current_loop
)->regs_used
,
4351 /* For each size of the induction variable set determine the penalty. */
4354 determine_set_costs (struct ivopts_data
*data
)
4358 struct loop
*loop
= data
->current_loop
;
4361 /* We use the following model (definitely improvable, especially the
4362 cost function -- TODO):
4364 We estimate the number of registers available (using MD data), name it A.
4366 We estimate the number of registers used by the loop, name it U. This
4367 number is obtained as the number of loop phi nodes (not counting virtual
4368 registers and bivs) + the number of variables from outside of the loop.
4370 We set a reserve R (free regs that are used for temporary computations,
4371 etc.). For now the reserve is a constant 3.
4373 Let I be the number of induction variables.
4375 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4376 make a lot of ivs without a reason).
4377 -- if A - R < U + I <= A, the cost is I * PRES_COST
4378 -- if U + I > A, the cost is I * PRES_COST and
4379 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4383 fprintf (dump_file
, "Global costs:\n");
4384 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4385 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4386 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4387 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4391 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4393 op
= PHI_RESULT (phi
);
4395 if (!is_gimple_reg (op
))
4398 if (get_iv (data
, op
))
4404 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4406 struct version_info
*info
= ver_info (data
, j
);
4408 if (info
->inv_id
&& info
->has_nonlin_use
)
4412 loop_data (loop
)->regs_used
= n
;
4413 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4414 fprintf (dump_file
, " regs_used %d\n", n
);
4416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4418 fprintf (dump_file
, " cost for size:\n");
4419 fprintf (dump_file
, " ivs\tcost\n");
4420 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4421 fprintf (dump_file
, " %d\t%d\n", j
,
4422 ivopts_global_cost_for_size (data
, j
));
4423 fprintf (dump_file
, "\n");
4427 /* Returns true if A is a cheaper cost pair than B. */
4430 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4438 if (a
->cost
< b
->cost
)
4441 if (a
->cost
> b
->cost
)
4444 /* In case the costs are the same, prefer the cheaper candidate. */
4445 if (a
->cand
->cost
< b
->cand
->cost
)
4451 /* Computes the cost field of IVS structure. */
4454 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4458 cost
+= ivs
->cand_use_cost
;
4459 cost
+= ivs
->cand_cost
;
4460 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4465 /* Remove invariants in set INVS to set IVS. */
4468 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4476 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4478 ivs
->n_invariant_uses
[iid
]--;
4479 if (ivs
->n_invariant_uses
[iid
] == 0)
4484 /* Set USE not to be expressed by any candidate in IVS. */
4487 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4490 unsigned uid
= use
->id
, cid
;
4491 struct cost_pair
*cp
;
4493 cp
= ivs
->cand_for_use
[uid
];
4499 ivs
->cand_for_use
[uid
] = NULL
;
4500 ivs
->n_cand_uses
[cid
]--;
4502 if (ivs
->n_cand_uses
[cid
] == 0)
4504 bitmap_clear_bit (ivs
->cands
, cid
);
4505 /* Do not count the pseudocandidates. */
4509 ivs
->cand_cost
-= cp
->cand
->cost
;
4511 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4514 ivs
->cand_use_cost
-= cp
->cost
;
4516 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4517 iv_ca_recount_cost (data
, ivs
);
4520 /* Add invariants in set INVS to set IVS. */
4523 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4531 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4533 ivs
->n_invariant_uses
[iid
]++;
4534 if (ivs
->n_invariant_uses
[iid
] == 1)
4539 /* Set cost pair for USE in set IVS to CP. */
4542 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4543 struct iv_use
*use
, struct cost_pair
*cp
)
4545 unsigned uid
= use
->id
, cid
;
4547 if (ivs
->cand_for_use
[uid
] == cp
)
4550 if (ivs
->cand_for_use
[uid
])
4551 iv_ca_set_no_cp (data
, ivs
, use
);
4558 ivs
->cand_for_use
[uid
] = cp
;
4559 ivs
->n_cand_uses
[cid
]++;
4560 if (ivs
->n_cand_uses
[cid
] == 1)
4562 bitmap_set_bit (ivs
->cands
, cid
);
4563 /* Do not count the pseudocandidates. */
4567 ivs
->cand_cost
+= cp
->cand
->cost
;
4569 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4572 ivs
->cand_use_cost
+= cp
->cost
;
4573 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4574 iv_ca_recount_cost (data
, ivs
);
4578 /* Extend set IVS by expressing USE by some of the candidates in it
4582 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4585 struct cost_pair
*best_cp
= NULL
, *cp
;
4589 gcc_assert (ivs
->upto
>= use
->id
);
4591 if (ivs
->upto
== use
->id
)
4597 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4599 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4601 if (cheaper_cost_pair (cp
, best_cp
))
4605 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4608 /* Get cost for assignment IVS. */
4611 iv_ca_cost (struct iv_ca
*ivs
)
4613 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4616 /* Returns true if all dependences of CP are among invariants in IVS. */
4619 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4624 if (!cp
->depends_on
)
4627 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4629 if (ivs
->n_invariant_uses
[i
] == 0)
4636 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4637 it before NEXT_CHANGE. */
4639 static struct iv_ca_delta
*
4640 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4641 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4643 struct iv_ca_delta
*change
= xmalloc (sizeof (struct iv_ca_delta
));
4646 change
->old_cp
= old_cp
;
4647 change
->new_cp
= new_cp
;
4648 change
->next_change
= next_change
;
4653 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4656 static struct iv_ca_delta
*
4657 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4659 struct iv_ca_delta
*last
;
4667 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4669 last
->next_change
= l2
;
4674 /* Returns candidate by that USE is expressed in IVS. */
4676 static struct cost_pair
*
4677 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4679 return ivs
->cand_for_use
[use
->id
];
4682 /* Reverse the list of changes DELTA, forming the inverse to it. */
4684 static struct iv_ca_delta
*
4685 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4687 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4688 struct cost_pair
*tmp
;
4690 for (act
= delta
; act
; act
= next
)
4692 next
= act
->next_change
;
4693 act
->next_change
= prev
;
4697 act
->old_cp
= act
->new_cp
;
4704 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4705 reverted instead. */
4708 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4709 struct iv_ca_delta
*delta
, bool forward
)
4711 struct cost_pair
*from
, *to
;
4712 struct iv_ca_delta
*act
;
4715 delta
= iv_ca_delta_reverse (delta
);
4717 for (act
= delta
; act
; act
= act
->next_change
)
4721 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4722 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4726 iv_ca_delta_reverse (delta
);
4729 /* Returns true if CAND is used in IVS. */
4732 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4734 return ivs
->n_cand_uses
[cand
->id
] > 0;
4737 /* Returns number of induction variable candidates in the set IVS. */
4740 iv_ca_n_cands (struct iv_ca
*ivs
)
4742 return ivs
->n_cands
;
4745 /* Free the list of changes DELTA. */
4748 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4750 struct iv_ca_delta
*act
, *next
;
4752 for (act
= *delta
; act
; act
= next
)
4754 next
= act
->next_change
;
4761 /* Allocates new iv candidates assignment. */
4763 static struct iv_ca
*
4764 iv_ca_new (struct ivopts_data
*data
)
4766 struct iv_ca
*nw
= xmalloc (sizeof (struct iv_ca
));
4770 nw
->cand_for_use
= xcalloc (n_iv_uses (data
), sizeof (struct cost_pair
*));
4771 nw
->n_cand_uses
= xcalloc (n_iv_cands (data
), sizeof (unsigned));
4772 nw
->cands
= BITMAP_ALLOC (NULL
);
4775 nw
->cand_use_cost
= 0;
4777 nw
->n_invariant_uses
= xcalloc (data
->max_inv_id
+ 1, sizeof (unsigned));
4783 /* Free memory occupied by the set IVS. */
4786 iv_ca_free (struct iv_ca
**ivs
)
4788 free ((*ivs
)->cand_for_use
);
4789 free ((*ivs
)->n_cand_uses
);
4790 BITMAP_FREE ((*ivs
)->cands
);
4791 free ((*ivs
)->n_invariant_uses
);
4796 /* Dumps IVS to FILE. */
4799 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4801 const char *pref
= " invariants ";
4804 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4805 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4807 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4808 if (ivs
->n_invariant_uses
[i
])
4810 fprintf (file
, "%s%d", pref
, i
);
4813 fprintf (file
, "\n");
4816 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4817 new set, and store differences in DELTA. Number of induction variables
4818 in the new set is stored to N_IVS. */
4821 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4822 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4827 struct cost_pair
*old_cp
, *new_cp
;
4830 for (i
= 0; i
< ivs
->upto
; i
++)
4832 use
= iv_use (data
, i
);
4833 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4836 && old_cp
->cand
== cand
)
4839 new_cp
= get_use_iv_cost (data
, use
, cand
);
4843 if (!iv_ca_has_deps (ivs
, new_cp
))
4846 if (!cheaper_cost_pair (new_cp
, old_cp
))
4849 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4852 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4853 cost
= iv_ca_cost (ivs
);
4855 *n_ivs
= iv_ca_n_cands (ivs
);
4856 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4861 /* Try narrowing set IVS by removing CAND. Return the cost of
4862 the new set and store the differences in DELTA. */
4865 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4866 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4870 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4872 struct iv_cand
*cnd
;
4876 for (i
= 0; i
< n_iv_uses (data
); i
++)
4878 use
= iv_use (data
, i
);
4880 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4881 if (old_cp
->cand
!= cand
)
4886 if (data
->consider_all_candidates
)
4888 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4893 cnd
= iv_cand (data
, ci
);
4895 cp
= get_use_iv_cost (data
, use
, cnd
);
4898 if (!iv_ca_has_deps (ivs
, cp
))
4901 if (!cheaper_cost_pair (cp
, new_cp
))
4909 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4914 cnd
= iv_cand (data
, ci
);
4916 cp
= get_use_iv_cost (data
, use
, cnd
);
4919 if (!iv_ca_has_deps (ivs
, cp
))
4922 if (!cheaper_cost_pair (cp
, new_cp
))
4931 iv_ca_delta_free (delta
);
4935 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4938 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4939 cost
= iv_ca_cost (ivs
);
4940 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4945 /* Try optimizing the set of candidates IVS by removing candidates different
4946 from to EXCEPT_CAND from it. Return cost of the new set, and store
4947 differences in DELTA. */
4950 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4951 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4954 struct iv_ca_delta
*act_delta
, *best_delta
;
4955 unsigned i
, best_cost
, acost
;
4956 struct iv_cand
*cand
;
4959 best_cost
= iv_ca_cost (ivs
);
4961 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4963 cand
= iv_cand (data
, i
);
4965 if (cand
== except_cand
)
4968 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4970 if (acost
< best_cost
)
4973 iv_ca_delta_free (&best_delta
);
4974 best_delta
= act_delta
;
4977 iv_ca_delta_free (&act_delta
);
4986 /* Recurse to possibly remove other unnecessary ivs. */
4987 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4988 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4989 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4990 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4994 /* Tries to extend the sets IVS in the best possible way in order
4995 to express the USE. */
4998 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5001 unsigned best_cost
, act_cost
;
5004 struct iv_cand
*cand
;
5005 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5006 struct cost_pair
*cp
;
5008 iv_ca_add_use (data
, ivs
, use
);
5009 best_cost
= iv_ca_cost (ivs
);
5011 cp
= iv_ca_cand_for_use (ivs
, use
);
5014 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5015 iv_ca_set_no_cp (data
, ivs
, use
);
5018 /* First try important candidates. Only if it fails, try the specific ones.
5019 Rationale -- in loops with many variables the best choice often is to use
5020 just one generic biv. If we added here many ivs specific to the uses,
5021 the optimization algorithm later would be likely to get stuck in a local
5022 minimum, thus causing us to create too many ivs. The approach from
5023 few ivs to more seems more likely to be successful -- starting from few
5024 ivs, replacing an expensive use by a specific iv should always be a
5026 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5028 cand
= iv_cand (data
, i
);
5030 if (iv_ca_cand_used_p (ivs
, cand
))
5033 cp
= get_use_iv_cost (data
, use
, cand
);
5037 iv_ca_set_cp (data
, ivs
, use
, cp
);
5038 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5039 iv_ca_set_no_cp (data
, ivs
, use
);
5040 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5042 if (act_cost
< best_cost
)
5044 best_cost
= act_cost
;
5046 iv_ca_delta_free (&best_delta
);
5047 best_delta
= act_delta
;
5050 iv_ca_delta_free (&act_delta
);
5053 if (best_cost
== INFTY
)
5055 for (i
= 0; i
< use
->n_map_members
; i
++)
5057 cp
= use
->cost_map
+ i
;
5062 /* Already tried this. */
5063 if (cand
->important
)
5066 if (iv_ca_cand_used_p (ivs
, cand
))
5070 iv_ca_set_cp (data
, ivs
, use
, cp
);
5071 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5072 iv_ca_set_no_cp (data
, ivs
, use
);
5073 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5076 if (act_cost
< best_cost
)
5078 best_cost
= act_cost
;
5081 iv_ca_delta_free (&best_delta
);
5082 best_delta
= act_delta
;
5085 iv_ca_delta_free (&act_delta
);
5089 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5090 iv_ca_delta_free (&best_delta
);
5092 return (best_cost
!= INFTY
);
5095 /* Finds an initial assignment of candidates to uses. */
5097 static struct iv_ca
*
5098 get_initial_solution (struct ivopts_data
*data
)
5100 struct iv_ca
*ivs
= iv_ca_new (data
);
5103 for (i
= 0; i
< n_iv_uses (data
); i
++)
5104 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5113 /* Tries to improve set of induction variables IVS. */
5116 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5118 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5119 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5120 struct iv_cand
*cand
;
5122 /* Try extending the set of induction variables by one. */
5123 for (i
= 0; i
< n_iv_cands (data
); i
++)
5125 cand
= iv_cand (data
, i
);
5127 if (iv_ca_cand_used_p (ivs
, cand
))
5130 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5134 /* If we successfully added the candidate and the set is small enough,
5135 try optimizing it by removing other candidates. */
5136 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5138 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5139 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5140 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5141 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5144 if (acost
< best_cost
)
5147 iv_ca_delta_free (&best_delta
);
5148 best_delta
= act_delta
;
5151 iv_ca_delta_free (&act_delta
);
5156 /* Try removing the candidates from the set instead. */
5157 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5159 /* Nothing more we can do. */
5164 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5165 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5166 iv_ca_delta_free (&best_delta
);
5170 /* Attempts to find the optimal set of induction variables. We do simple
5171 greedy heuristic -- we try to replace at most one candidate in the selected
5172 solution and remove the unused ivs while this improves the cost. */
5174 static struct iv_ca
*
5175 find_optimal_iv_set (struct ivopts_data
*data
)
5181 /* Get the initial solution. */
5182 set
= get_initial_solution (data
);
5185 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5186 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5192 fprintf (dump_file
, "Initial set of candidates:\n");
5193 iv_ca_dump (data
, dump_file
, set
);
5196 while (try_improve_iv_set (data
, set
))
5198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5200 fprintf (dump_file
, "Improved to:\n");
5201 iv_ca_dump (data
, dump_file
, set
);
5205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5206 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5208 for (i
= 0; i
< n_iv_uses (data
); i
++)
5210 use
= iv_use (data
, i
);
5211 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5217 /* Creates a new induction variable corresponding to CAND. */
5220 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5222 block_stmt_iterator incr_pos
;
5232 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5236 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5241 /* Mark that the iv is preserved. */
5242 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5243 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5245 /* Rewrite the increment so that it uses var_before directly. */
5246 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5251 gimple_add_tmp_var (cand
->var_before
);
5252 add_referenced_tmp_var (cand
->var_before
);
5254 base
= unshare_expr (cand
->iv
->base
);
5256 create_iv (base
, unshare_expr (cand
->iv
->step
),
5257 cand
->var_before
, data
->current_loop
,
5258 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5261 /* Creates new induction variables described in SET. */
5264 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5267 struct iv_cand
*cand
;
5270 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5272 cand
= iv_cand (data
, i
);
5273 create_new_iv (data
, cand
);
5277 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5278 is true, remove also the ssa name defined by the statement. */
5281 remove_statement (tree stmt
, bool including_defined_name
)
5283 if (TREE_CODE (stmt
) == PHI_NODE
)
5285 if (!including_defined_name
)
5287 /* Prevent the ssa name defined by the statement from being removed. */
5288 SET_PHI_RESULT (stmt
, NULL
);
5290 remove_phi_node (stmt
, NULL_TREE
);
5294 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5300 /* Rewrites USE (definition of iv used in a nonlinear expression)
5301 using candidate CAND. */
5304 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5305 struct iv_use
*use
, struct iv_cand
*cand
)
5308 tree op
, stmts
, tgt
, ass
;
5309 block_stmt_iterator bsi
, pbsi
;
5311 /* An important special case -- if we are asked to express value of
5312 the original iv by itself, just exit; there is no need to
5313 introduce a new computation (that might also need casting the
5314 variable to unsigned and back). */
5315 if (cand
->pos
== IP_ORIGINAL
5316 && TREE_CODE (use
->stmt
) == MODIFY_EXPR
5317 && TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
)
5319 op
= TREE_OPERAND (use
->stmt
, 1);
5321 /* Be a bit careful. In case variable is expressed in some
5322 complicated way, rewrite it so that we may get rid of this
5323 complicated expression. */
5324 if ((TREE_CODE (op
) == PLUS_EXPR
5325 || TREE_CODE (op
) == MINUS_EXPR
)
5326 && TREE_OPERAND (op
, 0) == cand
->var_before
5327 && TREE_CODE (TREE_OPERAND (op
, 1)) == INTEGER_CST
)
5331 comp
= get_computation (data
->current_loop
, use
, cand
);
5332 switch (TREE_CODE (use
->stmt
))
5335 tgt
= PHI_RESULT (use
->stmt
);
5337 /* If we should keep the biv, do not replace it. */
5338 if (name_info (data
, tgt
)->preserve_biv
)
5341 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5342 while (!bsi_end_p (pbsi
)
5343 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5351 tgt
= TREE_OPERAND (use
->stmt
, 0);
5352 bsi
= bsi_for_stmt (use
->stmt
);
5359 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5361 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5364 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5365 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5366 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5367 remove_statement (use
->stmt
, false);
5368 SSA_NAME_DEF_STMT (tgt
) = ass
;
5373 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5374 TREE_OPERAND (use
->stmt
, 1) = op
;
5378 /* Replaces ssa name in index IDX by its basic variable. Callback for
5382 idx_remove_ssa_names (tree base
, tree
*idx
,
5383 void *data ATTRIBUTE_UNUSED
)
5387 if (TREE_CODE (*idx
) == SSA_NAME
)
5388 *idx
= SSA_NAME_VAR (*idx
);
5390 if (TREE_CODE (base
) == ARRAY_REF
)
5392 op
= &TREE_OPERAND (base
, 2);
5394 && TREE_CODE (*op
) == SSA_NAME
)
5395 *op
= SSA_NAME_VAR (*op
);
5396 op
= &TREE_OPERAND (base
, 3);
5398 && TREE_CODE (*op
) == SSA_NAME
)
5399 *op
= SSA_NAME_VAR (*op
);
5405 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5408 unshare_and_remove_ssa_names (tree ref
)
5410 ref
= unshare_expr (ref
);
5411 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5416 /* Extract the alias analysis info for the memory reference REF. There are
5417 several ways how this information may be stored and what precisely is
5418 its semantics depending on the type of the reference, but there always is
5419 somewhere hidden one _DECL node that is used to determine the set of
5420 virtual operands for the reference. The code below deciphers this jungle
5421 and extracts this single useful piece of information. */
5424 get_ref_tag (tree ref
)
5426 tree var
= get_base_address (ref
);
5432 if (TREE_CODE (var
) == INDIRECT_REF
)
5433 var
= TREE_OPERAND (var
, 0);
5434 if (TREE_CODE (var
) == SSA_NAME
)
5436 if (SSA_NAME_PTR_INFO (var
))
5438 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5443 var
= SSA_NAME_VAR (var
);
5448 tag
= var_ann (var
)->type_mem_tag
;
5458 /* Copies the reference information from OLD_REF to NEW_REF. */
5461 copy_ref_info (tree new_ref
, tree old_ref
)
5463 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5464 copy_mem_ref_info (new_ref
, old_ref
);
5467 TMR_TAG (new_ref
) = get_ref_tag (old_ref
);
5468 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5472 /* Rewrites USE (address that is an iv) using candidate CAND. */
5475 rewrite_use_address (struct ivopts_data
*data
,
5476 struct iv_use
*use
, struct iv_cand
*cand
)
5478 struct affine_tree_combination aff
;
5479 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5482 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5483 unshare_aff_combination (&aff
);
5485 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5486 copy_ref_info (ref
, *use
->op_p
);
5490 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5494 rewrite_use_compare (struct ivopts_data
*data
,
5495 struct iv_use
*use
, struct iv_cand
*cand
)
5498 tree
*op_p
, cond
, op
, stmts
, bound
;
5499 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5500 enum tree_code compare
;
5501 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5506 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5507 tree var_type
= TREE_TYPE (var
);
5509 compare
= iv_elimination_compare (data
, use
);
5510 bound
= fold_convert (var_type
, bound
);
5511 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5515 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5517 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5518 update_stmt (use
->stmt
);
5522 /* The induction variable elimination failed; just express the original
5524 comp
= get_computation (data
->current_loop
, use
, cand
);
5527 op_p
= &TREE_OPERAND (cond
, 0);
5528 if (TREE_CODE (*op_p
) != SSA_NAME
5529 || zero_p (get_iv (data
, *op_p
)->step
))
5530 op_p
= &TREE_OPERAND (cond
, 1);
5532 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5534 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5539 /* Ensure that operand *OP_P may be used at the end of EXIT without
5540 violating loop closed ssa form. */
5543 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5546 struct loop
*def_loop
;
5549 use
= USE_FROM_PTR (op_p
);
5550 if (TREE_CODE (use
) != SSA_NAME
)
5553 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5557 def_loop
= def_bb
->loop_father
;
5558 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5561 /* Try finding a phi node that copies the value out of the loop. */
5562 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5563 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5568 /* Create such a phi node. */
5569 tree new_name
= duplicate_ssa_name (use
, NULL
);
5571 phi
= create_phi_node (new_name
, exit
->dest
);
5572 SSA_NAME_DEF_STMT (new_name
) = phi
;
5573 add_phi_arg (phi
, use
, exit
);
5576 SET_USE (op_p
, PHI_RESULT (phi
));
5579 /* Ensure that operands of STMT may be used at the end of EXIT without
5580 violating loop closed ssa form. */
5583 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5586 use_operand_p use_p
;
5588 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5589 protect_loop_closed_ssa_form_use (exit
, use_p
);
5592 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5593 so that they are emitted on the correct place, and so that the loop closed
5594 ssa form is preserved. */
5597 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5599 tree_stmt_iterator tsi
;
5600 block_stmt_iterator bsi
;
5601 tree phi
, stmt
, def
, next
;
5603 if (!single_pred_p (exit
->dest
))
5604 split_loop_exit_edge (exit
);
5606 /* Ensure there is label in exit->dest, so that we can
5608 tree_block_label (exit
->dest
);
5609 bsi
= bsi_after_labels (exit
->dest
);
5611 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5613 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5615 bsi_insert_after (&bsi
, tsi_stmt (tsi
), BSI_NEW_STMT
);
5616 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5621 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
5622 protect_loop_closed_ssa_form (exit
, bsi_stmt (bsi
));
5628 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5630 next
= PHI_CHAIN (phi
);
5632 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5634 def
= PHI_RESULT (phi
);
5635 remove_statement (phi
, false);
5636 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5638 SSA_NAME_DEF_STMT (def
) = stmt
;
5639 bsi_insert_after (&bsi
, stmt
, BSI_CONTINUE_LINKING
);
5644 /* Rewrites the final value of USE (that is only needed outside of the loop)
5645 using candidate CAND. */
5648 rewrite_use_outer (struct ivopts_data
*data
,
5649 struct iv_use
*use
, struct iv_cand
*cand
)
5652 tree value
, op
, stmts
, tgt
;
5655 switch (TREE_CODE (use
->stmt
))
5658 tgt
= PHI_RESULT (use
->stmt
);
5661 tgt
= TREE_OPERAND (use
->stmt
, 0);
5667 exit
= single_dom_exit (data
->current_loop
);
5673 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5674 value
= unshare_expr (cp
->value
);
5677 value
= get_computation_at (data
->current_loop
,
5678 use
, cand
, last_stmt (exit
->src
));
5680 op
= force_gimple_operand (value
, &stmts
, true, SSA_NAME_VAR (tgt
));
5682 /* If we will preserve the iv anyway and we would need to perform
5683 some computation to replace the final value, do nothing. */
5684 if (stmts
&& name_info (data
, tgt
)->preserve_biv
)
5687 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5689 use_operand_p use_p
= PHI_ARG_DEF_PTR_FROM_EDGE (phi
, exit
);
5691 if (USE_FROM_PTR (use_p
) == tgt
)
5692 SET_USE (use_p
, op
);
5696 compute_phi_arg_on_exit (exit
, stmts
, op
);
5698 /* Enable removal of the statement. We cannot remove it directly,
5699 since we may still need the aliasing information attached to the
5700 ssa name defined by it. */
5701 name_info (data
, tgt
)->iv
->have_use_for
= false;
5705 /* If the variable is going to be preserved anyway, there is nothing to
5707 if (name_info (data
, tgt
)->preserve_biv
)
5710 /* Otherwise we just need to compute the iv. */
5711 rewrite_use_nonlinear_expr (data
, use
, cand
);
5714 /* Rewrites USE using candidate CAND. */
5717 rewrite_use (struct ivopts_data
*data
,
5718 struct iv_use
*use
, struct iv_cand
*cand
)
5722 case USE_NONLINEAR_EXPR
:
5723 rewrite_use_nonlinear_expr (data
, use
, cand
);
5727 rewrite_use_outer (data
, use
, cand
);
5731 rewrite_use_address (data
, use
, cand
);
5735 rewrite_use_compare (data
, use
, cand
);
5741 update_stmt (use
->stmt
);
5744 /* Rewrite the uses using the selected induction variables. */
5747 rewrite_uses (struct ivopts_data
*data
)
5750 struct iv_cand
*cand
;
5753 for (i
= 0; i
< n_iv_uses (data
); i
++)
5755 use
= iv_use (data
, i
);
5756 cand
= use
->selected
;
5759 rewrite_use (data
, use
, cand
);
5763 /* Removes the ivs that are not used after rewriting. */
5766 remove_unused_ivs (struct ivopts_data
*data
)
5771 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5773 struct version_info
*info
;
5775 info
= ver_info (data
, j
);
5777 && !zero_p (info
->iv
->step
)
5779 && !info
->iv
->have_use_for
5780 && !info
->preserve_biv
)
5781 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5785 /* Frees data allocated by the optimization of a single loop. */
5788 free_loop_data (struct ivopts_data
*data
)
5794 htab_empty (data
->niters
);
5796 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5798 struct version_info
*info
;
5800 info
= ver_info (data
, i
);
5804 info
->has_nonlin_use
= false;
5805 info
->preserve_biv
= false;
5808 bitmap_clear (data
->relevant
);
5809 bitmap_clear (data
->important_candidates
);
5811 for (i
= 0; i
< n_iv_uses (data
); i
++)
5813 struct iv_use
*use
= iv_use (data
, i
);
5816 BITMAP_FREE (use
->related_cands
);
5817 for (j
= 0; j
< use
->n_map_members
; j
++)
5818 if (use
->cost_map
[j
].depends_on
)
5819 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5820 free (use
->cost_map
);
5823 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5825 for (i
= 0; i
< n_iv_cands (data
); i
++)
5827 struct iv_cand
*cand
= iv_cand (data
, i
);
5831 if (cand
->depends_on
)
5832 BITMAP_FREE (cand
->depends_on
);
5835 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5837 if (data
->version_info_size
< num_ssa_names
)
5839 data
->version_info_size
= 2 * num_ssa_names
;
5840 free (data
->version_info
);
5841 data
->version_info
= xcalloc (data
->version_info_size
,
5842 sizeof (struct version_info
));
5845 data
->max_inv_id
= 0;
5847 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5848 SET_DECL_RTL (obj
, NULL_RTX
);
5850 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5853 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5857 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5861 for (i
= 1; i
< loops
->num
; i
++)
5862 if (loops
->parray
[i
])
5864 free (loops
->parray
[i
]->aux
);
5865 loops
->parray
[i
]->aux
= NULL
;
5868 free_loop_data (data
);
5869 free (data
->version_info
);
5870 BITMAP_FREE (data
->relevant
);
5871 BITMAP_FREE (data
->important_candidates
);
5872 htab_delete (data
->niters
);
5874 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5875 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5876 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5879 /* Optimizes the LOOP. Returns true if anything changed. */
5882 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5884 bool changed
= false;
5885 struct iv_ca
*iv_ca
;
5888 data
->current_loop
= loop
;
5890 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5892 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5894 exit
= single_dom_exit (loop
);
5897 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5898 exit
->src
->index
, exit
->dest
->index
);
5899 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5900 fprintf (dump_file
, "\n");
5903 fprintf (dump_file
, "\n");
5906 /* For each ssa name determines whether it behaves as an induction variable
5908 if (!find_induction_variables (data
))
5911 /* Finds interesting uses (item 1). */
5912 find_interesting_uses (data
);
5913 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5916 /* Finds candidates for the induction variables (item 2). */
5917 find_iv_candidates (data
);
5919 /* Calculates the costs (item 3, part 1). */
5920 determine_use_iv_costs (data
);
5921 determine_iv_costs (data
);
5922 determine_set_costs (data
);
5924 /* Find the optimal set of induction variables (item 3, part 2). */
5925 iv_ca
= find_optimal_iv_set (data
);
5930 /* Create the new induction variables (item 4, part 1). */
5931 create_new_ivs (data
, iv_ca
);
5932 iv_ca_free (&iv_ca
);
5934 /* Rewrite the uses (item 4, part 2). */
5935 rewrite_uses (data
);
5937 /* Remove the ivs that are unused after rewriting. */
5938 remove_unused_ivs (data
);
5940 /* We have changed the structure of induction variables; it might happen
5941 that definitions in the scev database refer to some of them that were
5946 free_loop_data (data
);
5951 /* Main entry point. Optimizes induction variables in LOOPS. */
5954 tree_ssa_iv_optimize (struct loops
*loops
)
5957 struct ivopts_data data
;
5959 tree_ssa_iv_optimize_init (loops
, &data
);
5961 /* Optimize the loops starting with the innermost ones. */
5962 loop
= loops
->tree_root
;
5966 /* Scan the loops, inner ones first. */
5967 while (loop
!= loops
->tree_root
)
5969 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5970 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5972 tree_ssa_iv_optimize_loop (&data
, loop
);
5984 tree_ssa_iv_optimize_finalize (loops
, &data
);