1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used
; /* Number of registers used. */
133 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
134 USE_ADDRESS
, /* Use in an address. */
135 USE_COMPARE
/* Use is a compare. */
138 /* The candidate - cost pair. */
141 struct iv_cand
*cand
; /* The candidate. */
142 unsigned cost
; /* The cost. */
143 bitmap depends_on
; /* The list of invariants that have to be
145 tree value
; /* For final value elimination, the expression for
146 the final value of the iv. For iv elimination,
147 the new bound to compare with. */
153 unsigned id
; /* The id of the use. */
154 enum use_type type
; /* Type of the use. */
155 struct iv
*iv
; /* The induction variable it is based on. */
156 tree stmt
; /* Statement in that it occurs. */
157 tree
*op_p
; /* The place where it occurs. */
158 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
161 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
162 struct cost_pair
*cost_map
;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand
*selected
;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
172 IP_NORMAL
, /* At the end, just before the exit condition. */
173 IP_END
, /* At the end of the latch block. */
174 IP_ORIGINAL
/* The original biv. */
177 /* The induction variable candidate. */
180 unsigned id
; /* The number of the candidate. */
181 bool important
; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos
; /* Where it is computed. */
184 tree incremented_at
; /* For original biv, the statement where it is
186 tree var_before
; /* The variable used for it before increment. */
187 tree var_after
; /* The variable used for it after increment. */
188 struct iv
*iv
; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost
; /* Cost of the candidate. */
193 bitmap depends_on
; /* The list of invariants that are used in step of the
197 /* The data used by the induction variable optimizations. */
199 typedef struct iv_use
*iv_use_p
;
201 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
203 typedef struct iv_cand
*iv_cand_p
;
204 DEF_VEC_P(iv_cand_p
);
205 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
209 /* The currently optimized loop. */
210 struct loop
*current_loop
;
212 /* Numbers of iterations for all exits of the current loop. */
215 /* The size of version_info array allocated. */
216 unsigned version_info_size
;
218 /* The array of information for the ssa names. */
219 struct version_info
*version_info
;
221 /* The bitmap of indices in version_info whose value was changed. */
224 /* The maximum invariant id. */
227 /* The uses of induction variables. */
228 VEC(iv_use_p
,heap
) *iv_uses
;
230 /* The candidates. */
231 VEC(iv_cand_p
,heap
) *iv_candidates
;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates
;
236 /* Whether to consider just related and important candidates when replacing a
238 bool consider_all_candidates
;
241 /* An assignment of iv candidates to uses. */
245 /* The number of uses covered by the assignment. */
248 /* Number of uses that cannot be expressed by the candidates in the set. */
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair
**cand_for_use
;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses
;
257 /* The candidates used. */
260 /* The number of candidates in the set. */
263 /* Total number of registers needed. */
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost
;
269 /* Total cost of candidates. */
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses
;
275 /* Total cost of the assignment. */
279 /* Difference of two iv candidate assignments. */
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair
*old_cp
;
289 /* A new assignment. */
290 struct cost_pair
*new_cp
;
292 /* Next change in the list. */
293 struct iv_ca_delta
*next_change
;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
316 static VEC(tree
,heap
) *decl_rtl_to_reset
;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data
*data
)
323 return VEC_length (iv_use_p
, data
->iv_uses
);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use
*
329 iv_use (struct ivopts_data
*data
, unsigned i
)
331 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data
*data
)
339 return VEC_length (iv_cand_p
, data
->iv_candidates
);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand
*
345 iv_cand (struct ivopts_data
*data
, unsigned i
)
347 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
350 /* The data for LOOP. */
352 static inline struct loop_data
*
353 loop_data (struct loop
*loop
)
358 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 single_dom_exit (struct loop
*loop
)
363 edge exit
= loop
->single_exit
;
368 if (!just_once_each_iteration_p (loop
, exit
->src
))
374 /* Dumps information about the induction variable IV to FILE. */
376 extern void dump_iv (FILE *, struct iv
*);
378 dump_iv (FILE *file
, struct iv
*iv
)
382 fprintf (file
, "ssa name ");
383 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
384 fprintf (file
, "\n");
387 fprintf (file
, " type ");
388 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
389 fprintf (file
, "\n");
393 fprintf (file
, " base ");
394 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
395 fprintf (file
, "\n");
397 fprintf (file
, " step ");
398 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
399 fprintf (file
, "\n");
403 fprintf (file
, " invariant ");
404 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
405 fprintf (file
, "\n");
410 fprintf (file
, " base object ");
411 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
412 fprintf (file
, "\n");
416 fprintf (file
, " is a biv\n");
419 /* Dumps information about the USE to FILE. */
421 extern void dump_use (FILE *, struct iv_use
*);
423 dump_use (FILE *file
, struct iv_use
*use
)
425 fprintf (file
, "use %d\n", use
->id
);
429 case USE_NONLINEAR_EXPR
:
430 fprintf (file
, " generic\n");
434 fprintf (file
, " address\n");
438 fprintf (file
, " compare\n");
445 fprintf (file
, " in statement ");
446 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
447 fprintf (file
, "\n");
449 fprintf (file
, " at position ");
451 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
452 fprintf (file
, "\n");
454 dump_iv (file
, use
->iv
);
456 if (use
->related_cands
)
458 fprintf (file
, " related candidates ");
459 dump_bitmap (file
, use
->related_cands
);
463 /* Dumps information about the uses to FILE. */
465 extern void dump_uses (FILE *, struct ivopts_data
*);
467 dump_uses (FILE *file
, struct ivopts_data
*data
)
472 for (i
= 0; i
< n_iv_uses (data
); i
++)
474 use
= iv_use (data
, i
);
476 dump_use (file
, use
);
477 fprintf (file
, "\n");
481 /* Dumps information about induction variable candidate CAND to FILE. */
483 extern void dump_cand (FILE *, struct iv_cand
*);
485 dump_cand (FILE *file
, struct iv_cand
*cand
)
487 struct iv
*iv
= cand
->iv
;
489 fprintf (file
, "candidate %d%s\n",
490 cand
->id
, cand
->important
? " (important)" : "");
492 if (cand
->depends_on
)
494 fprintf (file
, " depends on ");
495 dump_bitmap (file
, cand
->depends_on
);
500 fprintf (file
, " final value replacement\n");
507 fprintf (file
, " incremented before exit test\n");
511 fprintf (file
, " incremented at end\n");
515 fprintf (file
, " original biv\n");
522 /* Returns the info for ssa version VER. */
524 static inline struct version_info
*
525 ver_info (struct ivopts_data
*data
, unsigned ver
)
527 return data
->version_info
+ ver
;
530 /* Returns the info for ssa name NAME. */
532 static inline struct version_info
*
533 name_info (struct ivopts_data
*data
, tree name
)
535 return ver_info (data
, SSA_NAME_VERSION (name
));
538 /* Checks whether there exists number X such that X * B = A, counting modulo
542 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
545 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
546 unsigned HOST_WIDE_INT inv
, ex
, val
;
552 /* First divide the whole equation by 2 as long as possible. */
553 while (!(a
& 1) && !(b
& 1))
563 /* If b is still even, a is odd and there is no such x. */
567 /* Find the inverse of b. We compute it as
568 b^(2^(bits - 1) - 1) (mod 2^bits). */
571 for (i
= 0; i
< bits
- 1; i
++)
573 inv
= (inv
* ex
) & mask
;
574 ex
= (ex
* ex
) & mask
;
577 val
= (a
* inv
) & mask
;
579 gcc_assert (((val
* b
) & mask
) == a
);
581 if ((val
>> (bits
- 1)) & 1)
589 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
593 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
595 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
599 if (sbb
== loop
->latch
)
605 return stmt
== last_stmt (bb
);
608 /* Returns true if STMT if after the place where the original induction
609 variable CAND is incremented. */
612 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
614 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
615 basic_block stmt_bb
= bb_for_stmt (stmt
);
616 block_stmt_iterator bsi
;
618 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
621 if (stmt_bb
!= cand_bb
)
624 /* Scan the block from the end, since the original ivs are usually
625 incremented at the end of the loop body. */
626 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
628 if (bsi_stmt (bsi
) == cand
->incremented_at
)
630 if (bsi_stmt (bsi
) == stmt
)
635 /* Returns true if STMT if after the place where the induction variable
636 CAND is incremented in LOOP. */
639 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
647 return stmt_after_ip_normal_pos (loop
, stmt
);
650 return stmt_after_ip_original_pos (cand
, stmt
);
657 /* Element of the table in that we cache the numbers of iterations obtained
658 from exits of the loop. */
662 /* The edge for that the number of iterations is cached. */
665 /* True if the # of iterations was successfully determined. */
668 /* Description of # of iterations. */
669 struct tree_niter_desc niter
;
672 /* Hash function for nfe_cache_elt E. */
675 nfe_hash (const void *e
)
677 const struct nfe_cache_elt
*elt
= e
;
679 return htab_hash_pointer (elt
->exit
);
682 /* Equality function for nfe_cache_elt E1 and edge E2. */
685 nfe_eq (const void *e1
, const void *e2
)
687 const struct nfe_cache_elt
*elt1
= e1
;
689 return elt1
->exit
== e2
;
692 /* Returns structure describing number of iterations determined from
693 EXIT of DATA->current_loop, or NULL if something goes wrong. */
695 static struct tree_niter_desc
*
696 niter_for_exit (struct ivopts_data
*data
, edge exit
)
698 struct nfe_cache_elt
*nfe_desc
;
701 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
702 htab_hash_pointer (exit
),
707 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
708 nfe_desc
->exit
= exit
;
709 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
710 exit
, &nfe_desc
->niter
,
717 if (!nfe_desc
->valid_p
)
720 return &nfe_desc
->niter
;
723 /* Returns structure describing number of iterations determined from
724 single dominating exit of DATA->current_loop, or NULL if something
727 static struct tree_niter_desc
*
728 niter_for_single_dom_exit (struct ivopts_data
*data
)
730 edge exit
= single_dom_exit (data
->current_loop
);
735 return niter_for_exit (data
, exit
);
738 /* Initializes data structures used by the iv optimization pass, stored
739 in DATA. LOOPS is the loop tree. */
742 tree_ssa_iv_optimize_init (struct loops
*loops
, struct ivopts_data
*data
)
746 data
->version_info_size
= 2 * num_ssa_names
;
747 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
748 data
->relevant
= BITMAP_ALLOC (NULL
);
749 data
->important_candidates
= BITMAP_ALLOC (NULL
);
750 data
->max_inv_id
= 0;
751 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
753 for (i
= 1; i
< loops
->num
; i
++)
754 if (loops
->parray
[i
])
755 loops
->parray
[i
]->aux
= xcalloc (1, sizeof (struct loop_data
));
757 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
758 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
759 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
762 /* Returns a memory object to that EXPR points. In case we are able to
763 determine that it does not point to any such object, NULL is returned. */
766 determine_base_object (tree expr
)
768 enum tree_code code
= TREE_CODE (expr
);
769 tree base
, obj
, op0
, op1
;
771 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
780 obj
= TREE_OPERAND (expr
, 0);
781 base
= get_base_address (obj
);
786 if (TREE_CODE (base
) == INDIRECT_REF
)
787 return determine_base_object (TREE_OPERAND (base
, 0));
789 return fold_convert (ptr_type_node
,
790 build_fold_addr_expr (base
));
794 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
795 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
801 return (code
== PLUS_EXPR
803 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
805 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
809 return determine_base_object (TREE_OPERAND (expr
, 0));
812 return fold_convert (ptr_type_node
, expr
);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base
, tree step
)
822 struct iv
*iv
= XCNEW (struct iv
);
824 if (step
&& integer_zerop (step
))
828 iv
->base_object
= determine_base_object (base
);
831 iv
->have_use_for
= false;
833 iv
->ssa_name
= NULL_TREE
;
838 /* Sets STEP and BASE for induction variable IV. */
841 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
843 struct version_info
*info
= name_info (data
, iv
);
845 gcc_assert (!info
->iv
);
847 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
848 info
->iv
= alloc_iv (base
, step
);
849 info
->iv
->ssa_name
= iv
;
852 /* Finds induction variable declaration for VAR. */
855 get_iv (struct ivopts_data
*data
, tree var
)
859 if (!name_info (data
, var
)->iv
)
861 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
864 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
865 set_iv (data
, var
, var
, NULL_TREE
);
868 return name_info (data
, var
)->iv
;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
875 determine_biv_step (tree phi
)
877 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
878 tree name
= PHI_RESULT (phi
);
881 if (!is_gimple_reg (name
))
884 if (!simple_iv (loop
, phi
, name
, &iv
, true))
887 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
890 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
893 abnormal_ssa_name_p (tree exp
)
898 if (TREE_CODE (exp
) != SSA_NAME
)
901 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
904 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
905 abnormal phi node. Callback for for_each_index. */
908 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
909 void *data ATTRIBUTE_UNUSED
)
911 if (TREE_CODE (base
) == ARRAY_REF
)
913 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
915 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
919 return !abnormal_ssa_name_p (*index
);
922 /* Returns true if EXPR contains a ssa name that occurs in an
923 abnormal phi node. */
926 contains_abnormal_ssa_name_p (tree expr
)
929 enum tree_code_class
class;
934 code
= TREE_CODE (expr
);
935 class = TREE_CODE_CLASS (code
);
937 if (code
== SSA_NAME
)
938 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
940 if (code
== INTEGER_CST
941 || is_gimple_min_invariant (expr
))
944 if (code
== ADDR_EXPR
)
945 return !for_each_index (&TREE_OPERAND (expr
, 0),
946 idx_contains_abnormal_ssa_name_p
,
953 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
958 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
970 /* Finds basic ivs. */
973 find_bivs (struct ivopts_data
*data
)
975 tree phi
, step
, type
, base
;
977 struct loop
*loop
= data
->current_loop
;
979 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
981 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
984 step
= determine_biv_step (phi
);
988 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
989 base
= expand_simple_operations (base
);
990 if (contains_abnormal_ssa_name_p (base
)
991 || contains_abnormal_ssa_name_p (step
))
994 type
= TREE_TYPE (PHI_RESULT (phi
));
995 base
= fold_convert (type
, base
);
997 step
= fold_convert (type
, step
);
999 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1006 /* Marks basic ivs. */
1009 mark_bivs (struct ivopts_data
*data
)
1012 struct iv
*iv
, *incr_iv
;
1013 struct loop
*loop
= data
->current_loop
;
1014 basic_block incr_bb
;
1016 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1018 iv
= get_iv (data
, PHI_RESULT (phi
));
1022 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1023 incr_iv
= get_iv (data
, var
);
1027 /* If the increment is in the subloop, ignore it. */
1028 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1029 if (incr_bb
->loop_father
!= data
->current_loop
1030 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1034 incr_iv
->biv_p
= true;
1038 /* Checks whether STMT defines a linear induction variable and stores its
1039 parameters to IV. */
1042 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1045 struct loop
*loop
= data
->current_loop
;
1047 iv
->base
= NULL_TREE
;
1048 iv
->step
= NULL_TREE
;
1050 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1053 lhs
= TREE_OPERAND (stmt
, 0);
1054 if (TREE_CODE (lhs
) != SSA_NAME
)
1057 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1059 iv
->base
= expand_simple_operations (iv
->base
);
1061 if (contains_abnormal_ssa_name_p (iv
->base
)
1062 || contains_abnormal_ssa_name_p (iv
->step
))
1068 /* Finds general ivs in statement STMT. */
1071 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1075 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1078 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1081 /* Finds general ivs in basic block BB. */
1084 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1086 block_stmt_iterator bsi
;
1088 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1089 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1092 /* Finds general ivs. */
1095 find_givs (struct ivopts_data
*data
)
1097 struct loop
*loop
= data
->current_loop
;
1098 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1101 for (i
= 0; i
< loop
->num_nodes
; i
++)
1102 find_givs_in_bb (data
, body
[i
]);
1106 /* For each ssa name defined in LOOP determines whether it is an induction
1107 variable and if so, its initial value and step. */
1110 find_induction_variables (struct ivopts_data
*data
)
1115 if (!find_bivs (data
))
1121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1123 struct tree_niter_desc
*niter
;
1125 niter
= niter_for_single_dom_exit (data
);
1129 fprintf (dump_file
, " number of iterations ");
1130 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1131 fprintf (dump_file
, "\n");
1133 fprintf (dump_file
, " may be zero if ");
1134 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1135 fprintf (dump_file
, "\n");
1136 fprintf (dump_file
, "\n");
1139 fprintf (dump_file
, "Induction variables:\n\n");
1141 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1143 if (ver_info (data
, i
)->iv
)
1144 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1151 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1153 static struct iv_use
*
1154 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1155 tree stmt
, enum use_type use_type
)
1157 struct iv_use
*use
= XCNEW (struct iv_use
);
1159 use
->id
= n_iv_uses (data
);
1160 use
->type
= use_type
;
1164 use
->related_cands
= BITMAP_ALLOC (NULL
);
1166 /* To avoid showing ssa name in the dumps, if it was not reset by the
1168 iv
->ssa_name
= NULL_TREE
;
1170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1171 dump_use (dump_file
, use
);
1173 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1178 /* Checks whether OP is a loop-level invariant and if so, records it.
1179 NONLINEAR_USE is true if the invariant is used in a way we do not
1180 handle specially. */
1183 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1186 struct version_info
*info
;
1188 if (TREE_CODE (op
) != SSA_NAME
1189 || !is_gimple_reg (op
))
1192 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1194 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1197 info
= name_info (data
, op
);
1199 info
->has_nonlin_use
|= nonlinear_use
;
1201 info
->inv_id
= ++data
->max_inv_id
;
1202 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1205 /* Checks whether the use OP is interesting and if so, records it. */
1207 static struct iv_use
*
1208 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1215 if (TREE_CODE (op
) != SSA_NAME
)
1218 iv
= get_iv (data
, op
);
1222 if (iv
->have_use_for
)
1224 use
= iv_use (data
, iv
->use_id
);
1226 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1230 if (zero_p (iv
->step
))
1232 record_invariant (data
, op
, true);
1235 iv
->have_use_for
= true;
1237 civ
= XNEW (struct iv
);
1240 stmt
= SSA_NAME_DEF_STMT (op
);
1241 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1242 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1244 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1245 iv
->use_id
= use
->id
;
1250 /* Checks whether the condition *COND_P in STMT is interesting
1251 and if so, records it. */
1254 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1258 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1260 tree zero
= integer_zero_node
;
1262 const_iv
.step
= NULL_TREE
;
1264 if (TREE_CODE (*cond_p
) != SSA_NAME
1265 && !COMPARISON_CLASS_P (*cond_p
))
1268 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1275 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1276 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1279 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1280 iv0
= get_iv (data
, *op0_p
);
1284 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1285 iv1
= get_iv (data
, *op1_p
);
1289 if (/* When comparing with non-invariant value, we may not do any senseful
1290 induction variable elimination. */
1292 /* Eliminating condition based on two ivs would be nontrivial.
1293 ??? TODO -- it is not really important to handle this case. */
1294 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1296 find_interesting_uses_op (data
, *op0_p
);
1297 find_interesting_uses_op (data
, *op1_p
);
1301 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1303 /* If both are invariants, this is a work for unswitching. */
1307 civ
= XNEW (struct iv
);
1308 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1309 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1312 /* Returns true if expression EXPR is obviously invariant in LOOP,
1313 i.e. if all its operands are defined outside of the LOOP. */
1316 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1321 if (is_gimple_min_invariant (expr
))
1324 if (TREE_CODE (expr
) == SSA_NAME
)
1326 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1328 && flow_bb_inside_loop_p (loop
, def_bb
))
1337 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1338 for (i
= 0; i
< len
; i
++)
1339 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1345 /* Cumulates the steps of indices into DATA and replaces their values with the
1346 initial ones. Returns false when the value of the index cannot be determined.
1347 Callback for for_each_index. */
1349 struct ifs_ivopts_data
1351 struct ivopts_data
*ivopts_data
;
1357 idx_find_step (tree base
, tree
*idx
, void *data
)
1359 struct ifs_ivopts_data
*dta
= data
;
1361 tree step
, iv_step
, lbound
, off
;
1362 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1364 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1365 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1368 /* If base is a component ref, require that the offset of the reference
1370 if (TREE_CODE (base
) == COMPONENT_REF
)
1372 off
= component_ref_field_offset (base
);
1373 return expr_invariant_in_loop_p (loop
, off
);
1376 /* If base is array, first check whether we will be able to move the
1377 reference out of the loop (in order to take its address in strength
1378 reduction). In order for this to work we need both lower bound
1379 and step to be loop invariants. */
1380 if (TREE_CODE (base
) == ARRAY_REF
)
1382 step
= array_ref_element_size (base
);
1383 lbound
= array_ref_low_bound (base
);
1385 if (!expr_invariant_in_loop_p (loop
, step
)
1386 || !expr_invariant_in_loop_p (loop
, lbound
))
1390 if (TREE_CODE (*idx
) != SSA_NAME
)
1393 iv
= get_iv (dta
->ivopts_data
, *idx
);
1402 if (TREE_CODE (base
) == ARRAY_REF
)
1404 step
= array_ref_element_size (base
);
1406 /* We only handle addresses whose step is an integer constant. */
1407 if (TREE_CODE (step
) != INTEGER_CST
)
1411 /* The step for pointer arithmetics already is 1 byte. */
1412 step
= build_int_cst (sizetype
, 1);
1414 /* FIXME: convert_step should not be used outside chrec_convert: fix
1415 this by calling chrec_convert. */
1416 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1417 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1421 /* The index might wrap. */
1425 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1428 *dta
->step_p
= step
;
1430 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1435 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1436 object is passed to it in DATA. */
1439 idx_record_use (tree base
, tree
*idx
,
1442 find_interesting_uses_op (data
, *idx
);
1443 if (TREE_CODE (base
) == ARRAY_REF
)
1445 find_interesting_uses_op (data
, array_ref_element_size (base
));
1446 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1451 /* Returns true if memory reference REF may be unaligned. */
1454 may_be_unaligned_p (tree ref
)
1458 HOST_WIDE_INT bitsize
;
1459 HOST_WIDE_INT bitpos
;
1461 enum machine_mode mode
;
1462 int unsignedp
, volatilep
;
1463 unsigned base_align
;
1465 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1466 thus they are not misaligned. */
1467 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1470 /* The test below is basically copy of what expr.c:normal_inner_ref
1471 does to check whether the object must be loaded by parts when
1472 STRICT_ALIGNMENT is true. */
1473 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1474 &unsignedp
, &volatilep
, true);
1475 base_type
= TREE_TYPE (base
);
1476 base_align
= TYPE_ALIGN (base_type
);
1479 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1480 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1481 || bitpos
% BITS_PER_UNIT
!= 0))
1487 /* Finds addresses in *OP_P inside STMT. */
1490 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1492 tree base
= *op_p
, step
= NULL
;
1494 struct ifs_ivopts_data ifs_ivopts_data
;
1496 /* Do not play with volatile memory references. A bit too conservative,
1497 perhaps, but safe. */
1498 if (stmt_ann (stmt
)->has_volatile_ops
)
1501 /* Ignore bitfields for now. Not really something terribly complicated
1503 if (TREE_CODE (base
) == COMPONENT_REF
1504 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1507 if (STRICT_ALIGNMENT
1508 && may_be_unaligned_p (base
))
1511 base
= unshare_expr (base
);
1513 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1515 tree type
= build_pointer_type (TREE_TYPE (base
));
1519 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1521 civ
= get_iv (data
, TMR_BASE (base
));
1525 TMR_BASE (base
) = civ
->base
;
1528 if (TMR_INDEX (base
)
1529 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1531 civ
= get_iv (data
, TMR_INDEX (base
));
1535 TMR_INDEX (base
) = civ
->base
;
1540 if (TMR_STEP (base
))
1541 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1544 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1552 base
= tree_mem_ref_addr (type
, base
);
1556 ifs_ivopts_data
.ivopts_data
= data
;
1557 ifs_ivopts_data
.stmt
= stmt
;
1558 ifs_ivopts_data
.step_p
= &step
;
1559 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1563 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1564 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1566 base
= build_fold_addr_expr (base
);
1569 civ
= alloc_iv (base
, step
);
1570 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1574 for_each_index (op_p
, idx_record_use
, data
);
1577 /* Finds and records invariants used in STMT. */
1580 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1583 use_operand_p use_p
;
1586 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1588 op
= USE_FROM_PTR (use_p
);
1589 record_invariant (data
, op
, false);
1593 /* Finds interesting uses of induction variables in the statement STMT. */
1596 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1601 use_operand_p use_p
;
1603 find_invariants_stmt (data
, stmt
);
1605 if (TREE_CODE (stmt
) == COND_EXPR
)
1607 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1611 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1613 lhs
= TREE_OPERAND (stmt
, 0);
1614 rhs
= TREE_OPERAND (stmt
, 1);
1616 if (TREE_CODE (lhs
) == SSA_NAME
)
1618 /* If the statement defines an induction variable, the uses are not
1619 interesting by themselves. */
1621 iv
= get_iv (data
, lhs
);
1623 if (iv
&& !zero_p (iv
->step
))
1627 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1629 case tcc_comparison
:
1630 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1634 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1635 if (REFERENCE_CLASS_P (lhs
))
1636 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1642 if (REFERENCE_CLASS_P (lhs
)
1643 && is_gimple_val (rhs
))
1645 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1646 find_interesting_uses_op (data
, rhs
);
1650 /* TODO -- we should also handle address uses of type
1652 memory = call (whatever);
1659 if (TREE_CODE (stmt
) == PHI_NODE
1660 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1662 lhs
= PHI_RESULT (stmt
);
1663 iv
= get_iv (data
, lhs
);
1665 if (iv
&& !zero_p (iv
->step
))
1669 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1671 op
= USE_FROM_PTR (use_p
);
1673 if (TREE_CODE (op
) != SSA_NAME
)
1676 iv
= get_iv (data
, op
);
1680 find_interesting_uses_op (data
, op
);
1684 /* Finds interesting uses of induction variables outside of loops
1685 on loop exit edge EXIT. */
1688 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1692 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1694 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1695 find_interesting_uses_op (data
, def
);
1699 /* Finds uses of the induction variables that are interesting. */
1702 find_interesting_uses (struct ivopts_data
*data
)
1705 block_stmt_iterator bsi
;
1707 basic_block
*body
= get_loop_body (data
->current_loop
);
1709 struct version_info
*info
;
1712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1713 fprintf (dump_file
, "Uses:\n\n");
1715 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1720 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1721 if (e
->dest
!= EXIT_BLOCK_PTR
1722 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1723 find_interesting_uses_outside (data
, e
);
1725 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1726 find_interesting_uses_stmt (data
, phi
);
1727 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1728 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1735 fprintf (dump_file
, "\n");
1737 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1739 info
= ver_info (data
, i
);
1742 fprintf (dump_file
, " ");
1743 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1744 fprintf (dump_file
, " is invariant (%d)%s\n",
1745 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1749 fprintf (dump_file
, "\n");
1755 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1756 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1757 we are at the top-level of the processed address. */
1760 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1761 unsigned HOST_WIDE_INT
*offset
)
1763 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1764 enum tree_code code
;
1765 tree type
, orig_type
= TREE_TYPE (expr
);
1766 unsigned HOST_WIDE_INT off0
, off1
, st
;
1767 tree orig_expr
= expr
;
1771 type
= TREE_TYPE (expr
);
1772 code
= TREE_CODE (expr
);
1778 if (!cst_and_fits_in_hwi (expr
)
1782 *offset
= int_cst_value (expr
);
1783 return build_int_cst_type (orig_type
, 0);
1787 op0
= TREE_OPERAND (expr
, 0);
1788 op1
= TREE_OPERAND (expr
, 1);
1790 op0
= strip_offset_1 (op0
, false, false, &off0
);
1791 op1
= strip_offset_1 (op1
, false, false, &off1
);
1793 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1794 if (op0
== TREE_OPERAND (expr
, 0)
1795 && op1
== TREE_OPERAND (expr
, 1))
1800 else if (zero_p (op0
))
1802 if (code
== PLUS_EXPR
)
1805 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1808 expr
= fold_build2 (code
, type
, op0
, op1
);
1810 return fold_convert (orig_type
, expr
);
1816 step
= array_ref_element_size (expr
);
1817 if (!cst_and_fits_in_hwi (step
))
1820 st
= int_cst_value (step
);
1821 op1
= TREE_OPERAND (expr
, 1);
1822 op1
= strip_offset_1 (op1
, false, false, &off1
);
1823 *offset
= off1
* st
;
1828 /* Strip the component reference completely. */
1829 op0
= TREE_OPERAND (expr
, 0);
1830 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1840 tmp
= component_ref_field_offset (expr
);
1842 && cst_and_fits_in_hwi (tmp
))
1844 /* Strip the component reference completely. */
1845 op0
= TREE_OPERAND (expr
, 0);
1846 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1847 *offset
= off0
+ int_cst_value (tmp
);
1853 op0
= TREE_OPERAND (expr
, 0);
1854 op0
= strip_offset_1 (op0
, true, true, &off0
);
1857 if (op0
== TREE_OPERAND (expr
, 0))
1860 expr
= build_fold_addr_expr (op0
);
1861 return fold_convert (orig_type
, expr
);
1864 inside_addr
= false;
1871 /* Default handling of expressions for that we want to recurse into
1872 the first operand. */
1873 op0
= TREE_OPERAND (expr
, 0);
1874 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1877 if (op0
== TREE_OPERAND (expr
, 0)
1878 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1881 expr
= copy_node (expr
);
1882 TREE_OPERAND (expr
, 0) = op0
;
1884 TREE_OPERAND (expr
, 1) = op1
;
1886 /* Inside address, we might strip the top level component references,
1887 thus changing type of the expression. Handling of ADDR_EXPR
1889 expr
= fold_convert (orig_type
, expr
);
1894 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1897 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1899 return strip_offset_1 (expr
, false, false, offset
);
1902 /* Returns variant of TYPE that can be used as base for different uses.
1903 For integer types, we return unsigned variant of the type, which
1904 avoids problems with overflows. For pointer types, we return void *. */
1907 generic_type_for (tree type
)
1909 if (POINTER_TYPE_P (type
))
1910 return ptr_type_node
;
1912 if (TYPE_UNSIGNED (type
))
1915 return unsigned_type_for (type
);
1918 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1919 the bitmap to that we should store it. */
1921 static struct ivopts_data
*fd_ivopts_data
;
1923 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1925 bitmap
*depends_on
= data
;
1926 struct version_info
*info
;
1928 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1930 info
= name_info (fd_ivopts_data
, *expr_p
);
1932 if (!info
->inv_id
|| info
->has_nonlin_use
)
1936 *depends_on
= BITMAP_ALLOC (NULL
);
1937 bitmap_set_bit (*depends_on
, info
->inv_id
);
1942 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1943 position to POS. If USE is not NULL, the candidate is set as related to
1944 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1945 replacement of the final value of the iv by a direct computation. */
1947 static struct iv_cand
*
1948 add_candidate_1 (struct ivopts_data
*data
,
1949 tree base
, tree step
, bool important
, enum iv_position pos
,
1950 struct iv_use
*use
, tree incremented_at
)
1953 struct iv_cand
*cand
= NULL
;
1954 tree type
, orig_type
;
1958 orig_type
= TREE_TYPE (base
);
1959 type
= generic_type_for (orig_type
);
1960 if (type
!= orig_type
)
1962 base
= fold_convert (type
, base
);
1964 step
= fold_convert (type
, step
);
1968 for (i
= 0; i
< n_iv_cands (data
); i
++)
1970 cand
= iv_cand (data
, i
);
1972 if (cand
->pos
!= pos
)
1975 if (cand
->incremented_at
!= incremented_at
)
1989 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1992 if (zero_p (cand
->iv
->step
))
1999 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2004 if (i
== n_iv_cands (data
))
2006 cand
= XCNEW (struct iv_cand
);
2012 cand
->iv
= alloc_iv (base
, step
);
2015 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2017 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2018 cand
->var_after
= cand
->var_before
;
2020 cand
->important
= important
;
2021 cand
->incremented_at
= incremented_at
;
2022 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2025 && TREE_CODE (step
) != INTEGER_CST
)
2027 fd_ivopts_data
= data
;
2028 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2031 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2032 dump_cand (dump_file
, cand
);
2035 if (important
&& !cand
->important
)
2037 cand
->important
= true;
2038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2039 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2044 bitmap_set_bit (use
->related_cands
, i
);
2045 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2046 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2056 The purpose is to avoid splitting latch edge with a biv increment, thus
2057 creating a jump, possibly confusing other optimization passes and leaving
2058 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2059 is not available (so we do not have a better alternative), or if the latch
2060 edge is already nonempty. */
2063 allow_ip_end_pos_p (struct loop
*loop
)
2065 if (!ip_normal_pos (loop
))
2068 if (!empty_block_p (ip_end_pos (loop
)))
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. The candidate computation is scheduled on all available positions. */
2079 add_candidate (struct ivopts_data
*data
,
2080 tree base
, tree step
, bool important
, struct iv_use
*use
)
2082 if (ip_normal_pos (data
->current_loop
))
2083 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2084 if (ip_end_pos (data
->current_loop
)
2085 && allow_ip_end_pos_p (data
->current_loop
))
2086 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090 type with SIZE bits. */
2093 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2096 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2097 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2101 /* Adds standard iv candidates. */
2104 add_standard_iv_candidates (struct ivopts_data
*data
)
2106 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2108 /* The same for a double-integer type if it is still fast enough. */
2109 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2110 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2114 /* Adds candidates bases on the old induction variable IV. */
2117 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2120 struct iv_cand
*cand
;
2122 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2124 /* The same, but with initial value zero. */
2125 add_candidate (data
,
2126 build_int_cst (TREE_TYPE (iv
->base
), 0),
2127 iv
->step
, true, NULL
);
2129 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2130 if (TREE_CODE (phi
) == PHI_NODE
)
2132 /* Additionally record the possibility of leaving the original iv
2134 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2135 cand
= add_candidate_1 (data
,
2136 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2137 SSA_NAME_DEF_STMT (def
));
2138 cand
->var_before
= iv
->ssa_name
;
2139 cand
->var_after
= def
;
2143 /* Adds candidates based on the old induction variables. */
2146 add_old_ivs_candidates (struct ivopts_data
*data
)
2152 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2154 iv
= ver_info (data
, i
)->iv
;
2155 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2156 add_old_iv_candidates (data
, iv
);
2160 /* Adds candidates based on the value of the induction variable IV and USE. */
2163 add_iv_value_candidates (struct ivopts_data
*data
,
2164 struct iv
*iv
, struct iv_use
*use
)
2166 unsigned HOST_WIDE_INT offset
;
2169 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2171 /* The same, but with initial value zero. Make such variable important,
2172 since it is generic enough so that possibly many uses may be based
2174 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2175 iv
->step
, true, use
);
2177 /* Third, try removing the constant offset. */
2178 base
= strip_offset (iv
->base
, &offset
);
2180 add_candidate (data
, base
, iv
->step
, false, use
);
2183 /* Adds candidates based on the uses. */
2186 add_derived_ivs_candidates (struct ivopts_data
*data
)
2190 for (i
= 0; i
< n_iv_uses (data
); i
++)
2192 struct iv_use
*use
= iv_use (data
, i
);
2199 case USE_NONLINEAR_EXPR
:
2202 /* Just add the ivs based on the value of the iv used here. */
2203 add_iv_value_candidates (data
, use
->iv
, use
);
2212 /* Record important candidates and add them to related_cands bitmaps
2216 record_important_candidates (struct ivopts_data
*data
)
2221 for (i
= 0; i
< n_iv_cands (data
); i
++)
2223 struct iv_cand
*cand
= iv_cand (data
, i
);
2225 if (cand
->important
)
2226 bitmap_set_bit (data
->important_candidates
, i
);
2229 data
->consider_all_candidates
= (n_iv_cands (data
)
2230 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2232 if (data
->consider_all_candidates
)
2234 /* We will not need "related_cands" bitmaps in this case,
2235 so release them to decrease peak memory consumption. */
2236 for (i
= 0; i
< n_iv_uses (data
); i
++)
2238 use
= iv_use (data
, i
);
2239 BITMAP_FREE (use
->related_cands
);
2244 /* Add important candidates to the related_cands bitmaps. */
2245 for (i
= 0; i
< n_iv_uses (data
); i
++)
2246 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2247 data
->important_candidates
);
2251 /* Finds the candidates for the induction variables. */
2254 find_iv_candidates (struct ivopts_data
*data
)
2256 /* Add commonly used ivs. */
2257 add_standard_iv_candidates (data
);
2259 /* Add old induction variables. */
2260 add_old_ivs_candidates (data
);
2262 /* Add induction variables derived from uses. */
2263 add_derived_ivs_candidates (data
);
2265 /* Record the important candidates. */
2266 record_important_candidates (data
);
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271 we allocate a simple list to every use. */
2274 alloc_use_cost_map (struct ivopts_data
*data
)
2276 unsigned i
, size
, s
, j
;
2278 for (i
= 0; i
< n_iv_uses (data
); i
++)
2280 struct iv_use
*use
= iv_use (data
, i
);
2283 if (data
->consider_all_candidates
)
2284 size
= n_iv_cands (data
);
2288 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2293 /* Round up to the power of two, so that moduling by it is fast. */
2294 for (size
= 1; size
< s
; size
<<= 1)
2298 use
->n_map_members
= size
;
2299 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304 on invariants DEPENDS_ON and that the value used in expressing it
2308 set_use_iv_cost (struct ivopts_data
*data
,
2309 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2310 bitmap depends_on
, tree value
)
2316 BITMAP_FREE (depends_on
);
2320 if (data
->consider_all_candidates
)
2322 use
->cost_map
[cand
->id
].cand
= cand
;
2323 use
->cost_map
[cand
->id
].cost
= cost
;
2324 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2325 use
->cost_map
[cand
->id
].value
= value
;
2329 /* n_map_members is a power of two, so this computes modulo. */
2330 s
= cand
->id
& (use
->n_map_members
- 1);
2331 for (i
= s
; i
< use
->n_map_members
; i
++)
2332 if (!use
->cost_map
[i
].cand
)
2334 for (i
= 0; i
< s
; i
++)
2335 if (!use
->cost_map
[i
].cand
)
2341 use
->cost_map
[i
].cand
= cand
;
2342 use
->cost_map
[i
].cost
= cost
;
2343 use
->cost_map
[i
].depends_on
= depends_on
;
2344 use
->cost_map
[i
].value
= value
;
2347 /* Gets cost of (USE, CANDIDATE) pair. */
2349 static struct cost_pair
*
2350 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2351 struct iv_cand
*cand
)
2354 struct cost_pair
*ret
;
2359 if (data
->consider_all_candidates
)
2361 ret
= use
->cost_map
+ cand
->id
;
2368 /* n_map_members is a power of two, so this computes modulo. */
2369 s
= cand
->id
& (use
->n_map_members
- 1);
2370 for (i
= s
; i
< use
->n_map_members
; i
++)
2371 if (use
->cost_map
[i
].cand
== cand
)
2372 return use
->cost_map
+ i
;
2374 for (i
= 0; i
< s
; i
++)
2375 if (use
->cost_map
[i
].cand
== cand
)
2376 return use
->cost_map
+ i
;
2381 /* Returns estimate on cost of computing SEQ. */
2389 for (; seq
; seq
= NEXT_INSN (seq
))
2391 set
= single_set (seq
);
2393 cost
+= rtx_cost (set
, SET
);
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2403 produce_memory_decl_rtl (tree obj
, int *regno
)
2408 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2410 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2411 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2414 x
= gen_raw_REG (Pmode
, (*regno
)++);
2416 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2419 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2420 walk_tree. DATA contains the actual fake register number. */
2423 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2425 tree obj
= NULL_TREE
;
2429 switch (TREE_CODE (*expr_p
))
2432 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2433 handled_component_p (*expr_p
);
2434 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2438 x
= produce_memory_decl_rtl (obj
, regno
);
2443 obj
= SSA_NAME_VAR (*expr_p
);
2444 if (!DECL_RTL_SET_P (obj
))
2445 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2454 if (DECL_RTL_SET_P (obj
))
2457 if (DECL_MODE (obj
) == BLKmode
)
2458 x
= produce_memory_decl_rtl (obj
, regno
);
2460 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2470 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2471 SET_DECL_RTL (obj
, x
);
2477 /* Determines cost of the computation of EXPR. */
2480 computation_cost (tree expr
)
2483 tree type
= TREE_TYPE (expr
);
2485 /* Avoid using hard regs in ways which may be unsupported. */
2486 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2488 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2490 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2494 cost
= seq_cost (seq
);
2496 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2501 /* Returns variable containing the value of candidate CAND at statement AT. */
2504 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2506 if (stmt_after_increment (loop
, cand
, stmt
))
2507 return cand
->var_after
;
2509 return cand
->var_before
;
2512 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2513 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2516 tree_int_cst_sign_bit (tree t
)
2518 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2519 unsigned HOST_WIDE_INT w
;
2521 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2522 w
= TREE_INT_CST_LOW (t
);
2525 w
= TREE_INT_CST_HIGH (t
);
2526 bitno
-= HOST_BITS_PER_WIDE_INT
;
2529 return (w
>> bitno
) & 1;
2532 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2533 return cst. Otherwise return NULL_TREE. */
2536 constant_multiple_of (tree type
, tree top
, tree bot
)
2538 tree res
, mby
, p0
, p1
;
2539 enum tree_code code
;
2545 if (operand_equal_p (top
, bot
, 0))
2546 return build_int_cst (type
, 1);
2548 code
= TREE_CODE (top
);
2552 mby
= TREE_OPERAND (top
, 1);
2553 if (TREE_CODE (mby
) != INTEGER_CST
)
2556 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2560 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2561 fold_convert (type
, mby
));
2565 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2568 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2572 return fold_binary_to_constant (code
, type
, p0
, p1
);
2575 if (TREE_CODE (bot
) != INTEGER_CST
)
2578 bot
= fold_convert (type
, bot
);
2579 top
= fold_convert (type
, top
);
2581 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2582 the result afterwards. */
2583 if (tree_int_cst_sign_bit (bot
))
2586 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2591 /* Ditto for TOP. */
2592 if (tree_int_cst_sign_bit (top
))
2595 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2598 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2601 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2603 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2611 /* Sets COMB to CST. */
2614 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2615 unsigned HOST_WIDE_INT cst
)
2617 unsigned prec
= TYPE_PRECISION (type
);
2620 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2623 comb
->rest
= NULL_TREE
;
2624 comb
->offset
= cst
& comb
->mask
;
2627 /* Sets COMB to single element ELT. */
2630 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2632 unsigned prec
= TYPE_PRECISION (type
);
2635 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2638 comb
->elts
[0] = elt
;
2640 comb
->rest
= NULL_TREE
;
2644 /* Scales COMB by SCALE. */
2647 aff_combination_scale (struct affine_tree_combination
*comb
,
2648 unsigned HOST_WIDE_INT scale
)
2657 aff_combination_const (comb
, comb
->type
, 0);
2661 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2662 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2664 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2665 comb
->elts
[j
] = comb
->elts
[i
];
2666 if (comb
->coefs
[j
] != 0)
2673 if (comb
->n
< MAX_AFF_ELTS
)
2675 comb
->coefs
[comb
->n
] = scale
;
2676 comb
->elts
[comb
->n
] = comb
->rest
;
2677 comb
->rest
= NULL_TREE
;
2681 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2682 build_int_cst_type (comb
->type
, scale
));
2686 /* Adds ELT * SCALE to COMB. */
2689 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2690 unsigned HOST_WIDE_INT scale
)
2697 for (i
= 0; i
< comb
->n
; i
++)
2698 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2700 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2705 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2706 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2710 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2711 comb
->coefs
[comb
->n
] = 1;
2712 comb
->elts
[comb
->n
] = comb
->rest
;
2713 comb
->rest
= NULL_TREE
;
2718 if (comb
->n
< MAX_AFF_ELTS
)
2720 comb
->coefs
[comb
->n
] = scale
;
2721 comb
->elts
[comb
->n
] = elt
;
2727 elt
= fold_convert (comb
->type
, elt
);
2729 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2730 fold_convert (comb
->type
, elt
),
2731 build_int_cst_type (comb
->type
, scale
));
2734 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2739 /* Adds COMB2 to COMB1. */
2742 aff_combination_add (struct affine_tree_combination
*comb1
,
2743 struct affine_tree_combination
*comb2
)
2747 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2748 for (i
= 0; i
< comb2
->n
; i
++)
2749 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2751 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2754 /* Splits EXPR into an affine combination of parts. */
2757 tree_to_aff_combination (tree expr
, tree type
,
2758 struct affine_tree_combination
*comb
)
2760 struct affine_tree_combination tmp
;
2761 enum tree_code code
;
2762 tree cst
, core
, toffset
;
2763 HOST_WIDE_INT bitpos
, bitsize
;
2764 enum machine_mode mode
;
2765 int unsignedp
, volatilep
;
2769 code
= TREE_CODE (expr
);
2773 aff_combination_const (comb
, type
, int_cst_value (expr
));
2778 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2779 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2780 if (code
== MINUS_EXPR
)
2781 aff_combination_scale (&tmp
, -1);
2782 aff_combination_add (comb
, &tmp
);
2786 cst
= TREE_OPERAND (expr
, 1);
2787 if (TREE_CODE (cst
) != INTEGER_CST
)
2789 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2790 aff_combination_scale (comb
, int_cst_value (cst
));
2794 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2795 aff_combination_scale (comb
, -1);
2799 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2800 &toffset
, &mode
, &unsignedp
, &volatilep
,
2802 if (bitpos
% BITS_PER_UNIT
!= 0)
2804 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2805 core
= build_fold_addr_expr (core
);
2806 if (TREE_CODE (core
) == ADDR_EXPR
)
2807 aff_combination_add_elt (comb
, core
, 1);
2810 tree_to_aff_combination (core
, type
, &tmp
);
2811 aff_combination_add (comb
, &tmp
);
2815 tree_to_aff_combination (toffset
, type
, &tmp
);
2816 aff_combination_add (comb
, &tmp
);
2824 aff_combination_elt (comb
, type
, expr
);
2827 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2830 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2831 unsigned HOST_WIDE_INT mask
)
2833 enum tree_code code
;
2836 elt
= fold_convert (type
, elt
);
2843 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2849 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2851 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2855 return fold_build2 (MULT_EXPR
, type
, elt
,
2856 build_int_cst_type (type
, scale
));
2858 if ((scale
| (mask
>> 1)) == mask
)
2860 /* Scale is negative. */
2862 scale
= (-scale
) & mask
;
2867 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2868 build_int_cst_type (type
, scale
));
2869 return fold_build2 (code
, type
, expr
, elt
);
2872 /* Copies the tree elements of COMB to ensure that they are not shared. */
2875 unshare_aff_combination (struct affine_tree_combination
*comb
)
2879 for (i
= 0; i
< comb
->n
; i
++)
2880 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2882 comb
->rest
= unshare_expr (comb
->rest
);
2885 /* Makes tree from the affine combination COMB. */
2888 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2890 tree type
= comb
->type
;
2891 tree expr
= comb
->rest
;
2893 unsigned HOST_WIDE_INT off
, sgn
;
2895 /* Handle the special case produced by get_computation_aff when
2896 the type does not fit in HOST_WIDE_INT. */
2897 if (comb
->n
== 0 && comb
->offset
== 0)
2898 return fold_convert (type
, expr
);
2900 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2902 for (i
= 0; i
< comb
->n
; i
++)
2903 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2906 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2908 /* Offset is negative. */
2909 off
= (-comb
->offset
) & comb
->mask
;
2917 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND at statement AT in LOOP. The expression is stored in a decomposed
2923 form into AFF. Returns false if USE cannot be expressed using CAND. */
2926 get_computation_aff (struct loop
*loop
,
2927 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2928 struct affine_tree_combination
*aff
)
2930 tree ubase
= use
->iv
->base
;
2931 tree ustep
= use
->iv
->step
;
2932 tree cbase
= cand
->iv
->base
;
2933 tree cstep
= cand
->iv
->step
;
2934 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2938 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2939 HOST_WIDE_INT ratioi
;
2940 struct affine_tree_combination cbase_aff
, expr_aff
;
2941 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
2943 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2945 /* We do not have a precision to express the values of use. */
2949 expr
= var_at_stmt (loop
, cand
, at
);
2951 if (TREE_TYPE (expr
) != ctype
)
2953 /* This may happen with the original ivs. */
2954 expr
= fold_convert (ctype
, expr
);
2957 if (TYPE_UNSIGNED (utype
))
2961 uutype
= unsigned_type_for (utype
);
2962 ubase
= fold_convert (uutype
, ubase
);
2963 ustep
= fold_convert (uutype
, ustep
);
2966 if (uutype
!= ctype
)
2968 expr
= fold_convert (uutype
, expr
);
2969 cbase
= fold_convert (uutype
, cbase
);
2970 cstep
= fold_convert (uutype
, cstep
);
2972 /* If the conversion is not noop, we must take it into account when
2973 considering the value of the step. */
2974 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2978 if (cst_and_fits_in_hwi (cstep_orig
)
2979 && cst_and_fits_in_hwi (ustep_orig
))
2981 ustepi
= int_cst_value (ustep_orig
);
2982 cstepi
= int_cst_value (cstep_orig
);
2984 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2986 /* TODO maybe consider case when ustep divides cstep and the ratio is
2987 a power of 2 (so that the division is fast to execute)? We would
2988 need to be much more careful with overflows etc. then. */
2992 ratio
= build_int_cst_type (uutype
, ratioi
);
2996 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
3000 /* Ratioi is only used to detect special cases when the multiplicative
3001 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3002 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3003 to integer_onep/integer_all_onesp, since the former ignores
3005 if (cst_and_fits_in_hwi (ratio
))
3006 ratioi
= int_cst_value (ratio
);
3007 else if (integer_onep (ratio
))
3009 else if (integer_all_onesp (ratio
))
3015 /* We may need to shift the value if we are after the increment. */
3016 if (stmt_after_increment (loop
, cand
, at
))
3017 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3019 /* use = ubase - ratio * cbase + ratio * var.
3021 In general case ubase + ratio * (var - cbase) could be better (one less
3022 multiplication), but often it is possible to eliminate redundant parts
3023 of computations from (ubase - ratio * cbase) term, and if it does not
3024 happen, fold is able to apply the distributive law to obtain this form
3027 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3029 /* Let's compute in trees and just return the result in AFF. This case
3030 should not be very common, and fold itself is not that bad either,
3031 so making the aff. functions more complicated to handle this case
3032 is not that urgent. */
3035 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3036 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3038 else if (ratioi
== -1)
3040 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3041 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3045 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3046 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3047 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3048 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3059 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3060 possible to compute ratioi. */
3061 gcc_assert (ratioi
);
3063 tree_to_aff_combination (ubase
, uutype
, aff
);
3064 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3065 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3066 aff_combination_scale (&cbase_aff
, -ratioi
);
3067 aff_combination_scale (&expr_aff
, ratioi
);
3068 aff_combination_add (aff
, &cbase_aff
);
3069 aff_combination_add (aff
, &expr_aff
);
3074 /* Determines the expression by that USE is expressed from induction variable
3075 CAND at statement AT in LOOP. The computation is unshared. */
3078 get_computation_at (struct loop
*loop
,
3079 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3081 struct affine_tree_combination aff
;
3082 tree type
= TREE_TYPE (use
->iv
->base
);
3084 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3086 unshare_aff_combination (&aff
);
3087 return fold_convert (type
, aff_combination_to_tree (&aff
));
3090 /* Determines the expression by that USE is expressed from induction variable
3091 CAND in LOOP. The computation is unshared. */
3094 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3096 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3099 /* Returns cost of addition in MODE. */
3102 add_cost (enum machine_mode mode
)
3104 static unsigned costs
[NUM_MACHINE_MODES
];
3112 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3113 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3114 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3119 cost
= seq_cost (seq
);
3125 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3126 fprintf (dump_file
, "Addition in %s costs %d\n",
3127 GET_MODE_NAME (mode
), cost
);
3131 /* Entry in a hashtable of already known costs for multiplication. */
3134 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3135 enum machine_mode mode
; /* In mode. */
3136 unsigned cost
; /* The cost. */
3139 /* Counts hash value for the ENTRY. */
3142 mbc_entry_hash (const void *entry
)
3144 const struct mbc_entry
*e
= entry
;
3146 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3149 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3152 mbc_entry_eq (const void *entry1
, const void *entry2
)
3154 const struct mbc_entry
*e1
= entry1
;
3155 const struct mbc_entry
*e2
= entry2
;
3157 return (e1
->mode
== e2
->mode
3158 && e1
->cst
== e2
->cst
);
3161 /* Returns cost of multiplication by constant CST in MODE. */
3164 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3166 static htab_t costs
;
3167 struct mbc_entry
**cached
, act
;
3172 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3176 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3178 return (*cached
)->cost
;
3180 *cached
= XNEW (struct mbc_entry
);
3181 (*cached
)->mode
= mode
;
3182 (*cached
)->cst
= cst
;
3185 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3186 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3190 cost
= seq_cost (seq
);
3192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3193 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3194 (int) cst
, GET_MODE_NAME (mode
), cost
);
3196 (*cached
)->cost
= cost
;
3201 /* Returns true if multiplying by RATIO is allowed in address. */
3204 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3206 #define MAX_RATIO 128
3207 static sbitmap valid_mult
;
3211 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3215 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3216 sbitmap_zero (valid_mult
);
3217 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3218 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3220 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3221 if (memory_address_p (Pmode
, addr
))
3222 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3225 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3227 fprintf (dump_file
, " allowed multipliers:");
3228 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3229 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3230 fprintf (dump_file
, " %d", (int) i
);
3231 fprintf (dump_file
, "\n");
3232 fprintf (dump_file
, "\n");
3236 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3239 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3242 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3243 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3244 variable is omitted. The created memory accesses MODE.
3246 TODO -- there must be some better way. This all is quite crude. */
3249 get_address_cost (bool symbol_present
, bool var_present
,
3250 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3252 static bool initialized
= false;
3253 static HOST_WIDE_INT rat
, off
;
3254 static HOST_WIDE_INT min_offset
, max_offset
;
3255 static unsigned costs
[2][2][2][2];
3256 unsigned cost
, acost
;
3257 rtx seq
, addr
, base
;
3258 bool offset_p
, ratio_p
;
3260 HOST_WIDE_INT s_offset
;
3261 unsigned HOST_WIDE_INT mask
;
3269 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3271 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3272 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3274 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3275 if (!memory_address_p (Pmode
, addr
))
3278 max_offset
= i
>> 1;
3281 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3283 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3284 if (!memory_address_p (Pmode
, addr
))
3287 min_offset
= -(i
>> 1);
3289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3291 fprintf (dump_file
, "get_address_cost:\n");
3292 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3293 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3297 for (i
= 2; i
<= MAX_RATIO
; i
++)
3298 if (multiplier_allowed_in_address_p (i
))
3305 bits
= GET_MODE_BITSIZE (Pmode
);
3306 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3308 if ((offset
>> (bits
- 1) & 1))
3313 offset_p
= (s_offset
!= 0
3314 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3315 ratio_p
= (ratio
!= 1
3316 && multiplier_allowed_in_address_p (ratio
));
3318 if (ratio
!= 1 && !ratio_p
)
3319 cost
+= multiply_by_cost (ratio
, Pmode
);
3321 if (s_offset
&& !offset_p
&& !symbol_present
)
3323 cost
+= add_cost (Pmode
);
3327 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3330 int old_cse_not_expected
;
3333 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3334 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3336 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3339 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3343 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3345 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3346 gen_rtx_fmt_ee (PLUS
, Pmode
,
3348 gen_int_mode (off
, Pmode
)));
3351 base
= gen_int_mode (off
, Pmode
);
3356 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3359 /* To avoid splitting addressing modes, pretend that no cse will
3361 old_cse_not_expected
= cse_not_expected
;
3362 cse_not_expected
= true;
3363 addr
= memory_address (Pmode
, addr
);
3364 cse_not_expected
= old_cse_not_expected
;
3368 acost
= seq_cost (seq
);
3369 acost
+= address_cost (addr
, Pmode
);
3373 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3376 return cost
+ acost
;
3379 /* Estimates cost of forcing expression EXPR into a variable. */
3382 force_expr_to_var_cost (tree expr
)
3384 static bool costs_initialized
= false;
3385 static unsigned integer_cost
;
3386 static unsigned symbol_cost
;
3387 static unsigned address_cost
;
3389 unsigned cost0
, cost1
, cost
;
3390 enum machine_mode mode
;
3392 if (!costs_initialized
)
3394 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3395 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3396 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3398 tree type
= build_pointer_type (integer_type_node
);
3400 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3403 SET_DECL_RTL (var
, x
);
3404 TREE_STATIC (var
) = 1;
3405 addr
= build1 (ADDR_EXPR
, type
, var
);
3406 symbol_cost
= computation_cost (addr
) + 1;
3409 = computation_cost (build2 (PLUS_EXPR
, type
,
3411 build_int_cst_type (type
, 2000))) + 1;
3412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3414 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3415 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3416 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3417 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3418 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3419 fprintf (dump_file
, "\n");
3422 costs_initialized
= true;
3427 if (SSA_VAR_P (expr
))
3430 if (TREE_INVARIANT (expr
))
3432 if (TREE_CODE (expr
) == INTEGER_CST
)
3433 return integer_cost
;
3435 if (TREE_CODE (expr
) == ADDR_EXPR
)
3437 tree obj
= TREE_OPERAND (expr
, 0);
3439 if (TREE_CODE (obj
) == VAR_DECL
3440 || TREE_CODE (obj
) == PARM_DECL
3441 || TREE_CODE (obj
) == RESULT_DECL
)
3445 return address_cost
;
3448 switch (TREE_CODE (expr
))
3453 op0
= TREE_OPERAND (expr
, 0);
3454 op1
= TREE_OPERAND (expr
, 1);
3458 if (is_gimple_val (op0
))
3461 cost0
= force_expr_to_var_cost (op0
);
3463 if (is_gimple_val (op1
))
3466 cost1
= force_expr_to_var_cost (op1
);
3471 /* Just an arbitrary value, FIXME. */
3472 return target_spill_cost
;
3475 mode
= TYPE_MODE (TREE_TYPE (expr
));
3476 switch (TREE_CODE (expr
))
3480 cost
= add_cost (mode
);
3484 if (cst_and_fits_in_hwi (op0
))
3485 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3486 else if (cst_and_fits_in_hwi (op1
))
3487 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3489 return target_spill_cost
;
3499 /* Bound the cost by target_spill_cost. The parts of complicated
3500 computations often are either loop invariant or at least can
3501 be shared between several iv uses, so letting this grow without
3502 limits would not give reasonable results. */
3503 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3506 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3507 invariants the computation depends on. */
3510 force_var_cost (struct ivopts_data
*data
,
3511 tree expr
, bitmap
*depends_on
)
3515 fd_ivopts_data
= data
;
3516 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3519 return force_expr_to_var_cost (expr
);
3522 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3523 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3524 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3525 invariants the computation depends on. */
3528 split_address_cost (struct ivopts_data
*data
,
3529 tree addr
, bool *symbol_present
, bool *var_present
,
3530 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3533 HOST_WIDE_INT bitsize
;
3534 HOST_WIDE_INT bitpos
;
3536 enum machine_mode mode
;
3537 int unsignedp
, volatilep
;
3539 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3540 &unsignedp
, &volatilep
, false);
3543 || bitpos
% BITS_PER_UNIT
!= 0
3544 || TREE_CODE (core
) != VAR_DECL
)
3546 *symbol_present
= false;
3547 *var_present
= true;
3548 fd_ivopts_data
= data
;
3549 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3550 return target_spill_cost
;
3553 *offset
+= bitpos
/ BITS_PER_UNIT
;
3554 if (TREE_STATIC (core
)
3555 || DECL_EXTERNAL (core
))
3557 *symbol_present
= true;
3558 *var_present
= false;
3562 *symbol_present
= false;
3563 *var_present
= true;
3567 /* Estimates cost of expressing difference of addresses E1 - E2 as
3568 var + symbol + offset. The value of offset is added to OFFSET,
3569 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3570 part is missing. DEPENDS_ON is a set of the invariants the computation
3574 ptr_difference_cost (struct ivopts_data
*data
,
3575 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3576 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3578 HOST_WIDE_INT diff
= 0;
3581 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3583 if (ptr_difference_const (e1
, e2
, &diff
))
3586 *symbol_present
= false;
3587 *var_present
= false;
3591 if (e2
== integer_zero_node
)
3592 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3593 symbol_present
, var_present
, offset
, depends_on
);
3595 *symbol_present
= false;
3596 *var_present
= true;
3598 cost
= force_var_cost (data
, e1
, depends_on
);
3599 cost
+= force_var_cost (data
, e2
, depends_on
);
3600 cost
+= add_cost (Pmode
);
3605 /* Estimates cost of expressing difference E1 - E2 as
3606 var + symbol + offset. The value of offset is added to OFFSET,
3607 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3608 part is missing. DEPENDS_ON is a set of the invariants the computation
3612 difference_cost (struct ivopts_data
*data
,
3613 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3614 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3617 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3618 unsigned HOST_WIDE_INT off1
, off2
;
3620 e1
= strip_offset (e1
, &off1
);
3621 e2
= strip_offset (e2
, &off2
);
3622 *offset
+= off1
- off2
;
3627 if (TREE_CODE (e1
) == ADDR_EXPR
)
3628 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3630 *symbol_present
= false;
3632 if (operand_equal_p (e1
, e2
, 0))
3634 *var_present
= false;
3637 *var_present
= true;
3639 return force_var_cost (data
, e1
, depends_on
);
3643 cost
= force_var_cost (data
, e2
, depends_on
);
3644 cost
+= multiply_by_cost (-1, mode
);
3649 cost
= force_var_cost (data
, e1
, depends_on
);
3650 cost
+= force_var_cost (data
, e2
, depends_on
);
3651 cost
+= add_cost (mode
);
3656 /* Determines the cost of the computation by that USE is expressed
3657 from induction variable CAND. If ADDRESS_P is true, we just need
3658 to create an address from it, otherwise we want to get it into
3659 register. A set of invariants we depend on is stored in
3660 DEPENDS_ON. AT is the statement at that the value is computed. */
3663 get_computation_cost_at (struct ivopts_data
*data
,
3664 struct iv_use
*use
, struct iv_cand
*cand
,
3665 bool address_p
, bitmap
*depends_on
, tree at
)
3667 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3669 tree utype
= TREE_TYPE (ubase
), ctype
;
3670 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3671 HOST_WIDE_INT ratio
, aratio
;
3672 bool var_present
, symbol_present
;
3673 unsigned cost
= 0, n_sums
;
3677 /* Only consider real candidates. */
3681 cbase
= cand
->iv
->base
;
3682 cstep
= cand
->iv
->step
;
3683 ctype
= TREE_TYPE (cbase
);
3685 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3687 /* We do not have a precision to express the values of use. */
3693 /* Do not try to express address of an object with computation based
3694 on address of a different object. This may cause problems in rtl
3695 level alias analysis (that does not expect this to be happening,
3696 as this is illegal in C), and would be unlikely to be useful
3698 if (use
->iv
->base_object
3699 && cand
->iv
->base_object
3700 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3704 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3706 /* TODO -- add direct handling of this case. */
3710 /* CSTEPI is removed from the offset in case statement is after the
3711 increment. If the step is not constant, we use zero instead.
3712 This is a bit imprecise (there is the extra addition), but
3713 redundancy elimination is likely to transform the code so that
3714 it uses value of the variable before increment anyway,
3715 so it is not that much unrealistic. */
3716 if (cst_and_fits_in_hwi (cstep
))
3717 cstepi
= int_cst_value (cstep
);
3721 if (cst_and_fits_in_hwi (ustep
)
3722 && cst_and_fits_in_hwi (cstep
))
3724 ustepi
= int_cst_value (ustep
);
3726 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3733 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3738 if (cst_and_fits_in_hwi (rat
))
3739 ratio
= int_cst_value (rat
);
3740 else if (integer_onep (rat
))
3742 else if (integer_all_onesp (rat
))
3748 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3749 or ratio == 1, it is better to handle this like
3751 ubase - ratio * cbase + ratio * var
3753 (also holds in the case ratio == -1, TODO. */
3755 if (cst_and_fits_in_hwi (cbase
))
3757 offset
= - ratio
* int_cst_value (cbase
);
3758 cost
+= difference_cost (data
,
3759 ubase
, integer_zero_node
,
3760 &symbol_present
, &var_present
, &offset
,
3763 else if (ratio
== 1)
3765 cost
+= difference_cost (data
,
3767 &symbol_present
, &var_present
, &offset
,
3772 cost
+= force_var_cost (data
, cbase
, depends_on
);
3773 cost
+= add_cost (TYPE_MODE (ctype
));
3774 cost
+= difference_cost (data
,
3775 ubase
, integer_zero_node
,
3776 &symbol_present
, &var_present
, &offset
,
3780 /* If we are after the increment, the value of the candidate is higher by
3782 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3783 offset
-= ratio
* cstepi
;
3785 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3786 (symbol/var/const parts may be omitted). If we are looking for an address,
3787 find the cost of addressing this. */
3789 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3791 /* Otherwise estimate the costs for computing the expression. */
3792 aratio
= ratio
> 0 ? ratio
: -ratio
;
3793 if (!symbol_present
&& !var_present
&& !offset
)
3796 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3802 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3806 /* Symbol + offset should be compile-time computable. */
3807 && (symbol_present
|| offset
))
3810 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3814 /* Just get the expression, expand it and measure the cost. */
3815 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3821 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3823 return computation_cost (comp
);
3827 /* Determines the cost of the computation by that USE is expressed
3828 from induction variable CAND. If ADDRESS_P is true, we just need
3829 to create an address from it, otherwise we want to get it into
3830 register. A set of invariants we depend on is stored in
3834 get_computation_cost (struct ivopts_data
*data
,
3835 struct iv_use
*use
, struct iv_cand
*cand
,
3836 bool address_p
, bitmap
*depends_on
)
3838 return get_computation_cost_at (data
,
3839 use
, cand
, address_p
, depends_on
, use
->stmt
);
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3846 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3847 struct iv_use
*use
, struct iv_cand
*cand
)
3852 /* The simple case first -- if we need to express value of the preserved
3853 original biv, the cost is 0. This also prevents us from counting the
3854 cost of increment twice -- once at this use and once in the cost of
3856 if (cand
->pos
== IP_ORIGINAL
3857 && cand
->incremented_at
== use
->stmt
)
3859 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3863 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3864 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3866 return cost
!= INFTY
;
3869 /* Determines cost of basing replacement of USE on CAND in an address. */
3872 determine_use_iv_cost_address (struct ivopts_data
*data
,
3873 struct iv_use
*use
, struct iv_cand
*cand
)
3876 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3878 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3880 return cost
!= INFTY
;
3883 /* Computes value of induction variable IV in iteration NITER. */
3886 iv_value (struct iv
*iv
, tree niter
)
3889 tree type
= TREE_TYPE (iv
->base
);
3891 niter
= fold_convert (type
, niter
);
3892 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3894 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3897 /* Computes value of candidate CAND at position AT in iteration NITER. */
3900 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3902 tree val
= iv_value (cand
->iv
, niter
);
3903 tree type
= TREE_TYPE (cand
->iv
->base
);
3905 if (stmt_after_increment (loop
, cand
, at
))
3906 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3911 /* Returns period of induction variable iv. */
3914 iv_period (struct iv
*iv
)
3916 tree step
= iv
->step
, period
, type
;
3919 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3921 /* Period of the iv is gcd (step, type range). Since type range is power
3922 of two, it suffices to determine the maximum power of two that divides
3924 pow2div
= num_ending_zeros (step
);
3925 type
= unsigned_type_for (TREE_TYPE (step
));
3927 period
= build_low_bits_mask (type
,
3928 (TYPE_PRECISION (type
)
3929 - tree_low_cst (pow2div
, 1)));
3934 /* Returns the comparison operator used when eliminating the iv USE. */
3936 static enum tree_code
3937 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3939 struct loop
*loop
= data
->current_loop
;
3943 ex_bb
= bb_for_stmt (use
->stmt
);
3944 exit
= EDGE_SUCC (ex_bb
, 0);
3945 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3946 exit
= EDGE_SUCC (ex_bb
, 1);
3948 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3951 /* Check whether it is possible to express the condition in USE by comparison
3952 of candidate CAND. If so, store the value compared with to BOUND. */
3955 may_eliminate_iv (struct ivopts_data
*data
,
3956 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3960 struct tree_niter_desc
*niter
;
3962 tree wider_type
, period
, per_type
;
3963 struct loop
*loop
= data
->current_loop
;
3965 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3968 /* For now works only for exits that dominate the loop latch. TODO -- extend
3969 for other conditions inside loop body. */
3970 ex_bb
= bb_for_stmt (use
->stmt
);
3971 if (use
->stmt
!= last_stmt (ex_bb
)
3972 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3974 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3977 exit
= EDGE_SUCC (ex_bb
, 0);
3978 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3979 exit
= EDGE_SUCC (ex_bb
, 1);
3980 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3983 niter
= niter_for_exit (data
, exit
);
3985 || !zero_p (niter
->may_be_zero
))
3989 nit_type
= TREE_TYPE (nit
);
3991 /* Determine whether we may use the variable to test whether niter iterations
3992 elapsed. This is the case iff the period of the induction variable is
3993 greater than the number of iterations. */
3994 period
= iv_period (cand
->iv
);
3997 per_type
= TREE_TYPE (period
);
3999 wider_type
= TREE_TYPE (period
);
4000 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4001 wider_type
= per_type
;
4003 wider_type
= nit_type
;
4005 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
4006 fold_convert (wider_type
, period
),
4007 fold_convert (wider_type
, nit
))))
4010 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
4014 /* Determines cost of basing replacement of USE on CAND in a condition. */
4017 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4018 struct iv_use
*use
, struct iv_cand
*cand
)
4020 tree bound
= NULL_TREE
, op
, cond
;
4021 bitmap depends_on
= NULL
;
4024 /* Only consider real candidates. */
4027 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4031 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4033 cost
= force_var_cost (data
, bound
, &depends_on
);
4035 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4036 return cost
!= INFTY
;
4039 /* The induction variable elimination failed; just express the original
4040 giv. If it is compared with an invariant, note that we cannot get
4042 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4045 if (TREE_CODE (cond
) != SSA_NAME
)
4047 op
= TREE_OPERAND (cond
, 0);
4048 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4049 op
= TREE_OPERAND (cond
, 1);
4050 if (TREE_CODE (op
) == SSA_NAME
)
4052 op
= get_iv (data
, op
)->base
;
4053 fd_ivopts_data
= data
;
4054 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4058 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4059 return cost
!= INFTY
;
4062 /* Determines cost of basing replacement of USE on CAND. Returns false
4063 if USE cannot be based on CAND. */
4066 determine_use_iv_cost (struct ivopts_data
*data
,
4067 struct iv_use
*use
, struct iv_cand
*cand
)
4071 case USE_NONLINEAR_EXPR
:
4072 return determine_use_iv_cost_generic (data
, use
, cand
);
4075 return determine_use_iv_cost_address (data
, use
, cand
);
4078 return determine_use_iv_cost_condition (data
, use
, cand
);
4085 /* Determines costs of basing the use of the iv on an iv candidate. */
4088 determine_use_iv_costs (struct ivopts_data
*data
)
4092 struct iv_cand
*cand
;
4093 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4095 alloc_use_cost_map (data
);
4097 for (i
= 0; i
< n_iv_uses (data
); i
++)
4099 use
= iv_use (data
, i
);
4101 if (data
->consider_all_candidates
)
4103 for (j
= 0; j
< n_iv_cands (data
); j
++)
4105 cand
= iv_cand (data
, j
);
4106 determine_use_iv_cost (data
, use
, cand
);
4113 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4115 cand
= iv_cand (data
, j
);
4116 if (!determine_use_iv_cost (data
, use
, cand
))
4117 bitmap_set_bit (to_clear
, j
);
4120 /* Remove the candidates for that the cost is infinite from
4121 the list of related candidates. */
4122 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4123 bitmap_clear (to_clear
);
4127 BITMAP_FREE (to_clear
);
4129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4131 fprintf (dump_file
, "Use-candidate costs:\n");
4133 for (i
= 0; i
< n_iv_uses (data
); i
++)
4135 use
= iv_use (data
, i
);
4137 fprintf (dump_file
, "Use %d:\n", i
);
4138 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4139 for (j
= 0; j
< use
->n_map_members
; j
++)
4141 if (!use
->cost_map
[j
].cand
4142 || use
->cost_map
[j
].cost
== INFTY
)
4145 fprintf (dump_file
, " %d\t%d\t",
4146 use
->cost_map
[j
].cand
->id
,
4147 use
->cost_map
[j
].cost
);
4148 if (use
->cost_map
[j
].depends_on
)
4149 bitmap_print (dump_file
,
4150 use
->cost_map
[j
].depends_on
, "","");
4151 fprintf (dump_file
, "\n");
4154 fprintf (dump_file
, "\n");
4156 fprintf (dump_file
, "\n");
4160 /* Determines cost of the candidate CAND. */
4163 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4165 unsigned cost_base
, cost_step
;
4174 /* There are two costs associated with the candidate -- its increment
4175 and its initialization. The second is almost negligible for any loop
4176 that rolls enough, so we take it just very little into account. */
4178 base
= cand
->iv
->base
;
4179 cost_base
= force_var_cost (data
, base
, NULL
);
4180 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4182 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4184 /* Prefer the original iv unless we may gain something by replacing it;
4185 this is not really relevant for artificial ivs created by other
4187 if (cand
->pos
== IP_ORIGINAL
4188 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4191 /* Prefer not to insert statements into latch unless there are some
4192 already (so that we do not create unnecessary jumps). */
4193 if (cand
->pos
== IP_END
4194 && empty_block_p (ip_end_pos (data
->current_loop
)))
4198 /* Determines costs of computation of the candidates. */
4201 determine_iv_costs (struct ivopts_data
*data
)
4205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4207 fprintf (dump_file
, "Candidate costs:\n");
4208 fprintf (dump_file
, " cand\tcost\n");
4211 for (i
= 0; i
< n_iv_cands (data
); i
++)
4213 struct iv_cand
*cand
= iv_cand (data
, i
);
4215 determine_iv_cost (data
, cand
);
4217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4218 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4221 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4222 fprintf (dump_file
, "\n");
4225 /* Calculates cost for having SIZE induction variables. */
4228 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4230 return global_cost_for_size (size
,
4231 loop_data (data
->current_loop
)->regs_used
,
4235 /* For each size of the induction variable set determine the penalty. */
4238 determine_set_costs (struct ivopts_data
*data
)
4242 struct loop
*loop
= data
->current_loop
;
4245 /* We use the following model (definitely improvable, especially the
4246 cost function -- TODO):
4248 We estimate the number of registers available (using MD data), name it A.
4250 We estimate the number of registers used by the loop, name it U. This
4251 number is obtained as the number of loop phi nodes (not counting virtual
4252 registers and bivs) + the number of variables from outside of the loop.
4254 We set a reserve R (free regs that are used for temporary computations,
4255 etc.). For now the reserve is a constant 3.
4257 Let I be the number of induction variables.
4259 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4260 make a lot of ivs without a reason).
4261 -- if A - R < U + I <= A, the cost is I * PRES_COST
4262 -- if U + I > A, the cost is I * PRES_COST and
4263 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4265 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4267 fprintf (dump_file
, "Global costs:\n");
4268 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4269 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4270 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4271 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4275 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4277 op
= PHI_RESULT (phi
);
4279 if (!is_gimple_reg (op
))
4282 if (get_iv (data
, op
))
4288 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4290 struct version_info
*info
= ver_info (data
, j
);
4292 if (info
->inv_id
&& info
->has_nonlin_use
)
4296 loop_data (loop
)->regs_used
= n
;
4297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4298 fprintf (dump_file
, " regs_used %d\n", n
);
4300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4302 fprintf (dump_file
, " cost for size:\n");
4303 fprintf (dump_file
, " ivs\tcost\n");
4304 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4305 fprintf (dump_file
, " %d\t%d\n", j
,
4306 ivopts_global_cost_for_size (data
, j
));
4307 fprintf (dump_file
, "\n");
4311 /* Returns true if A is a cheaper cost pair than B. */
4314 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4322 if (a
->cost
< b
->cost
)
4325 if (a
->cost
> b
->cost
)
4328 /* In case the costs are the same, prefer the cheaper candidate. */
4329 if (a
->cand
->cost
< b
->cand
->cost
)
4335 /* Computes the cost field of IVS structure. */
4338 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4342 cost
+= ivs
->cand_use_cost
;
4343 cost
+= ivs
->cand_cost
;
4344 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4349 /* Remove invariants in set INVS to set IVS. */
4352 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4360 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4362 ivs
->n_invariant_uses
[iid
]--;
4363 if (ivs
->n_invariant_uses
[iid
] == 0)
4368 /* Set USE not to be expressed by any candidate in IVS. */
4371 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4374 unsigned uid
= use
->id
, cid
;
4375 struct cost_pair
*cp
;
4377 cp
= ivs
->cand_for_use
[uid
];
4383 ivs
->cand_for_use
[uid
] = NULL
;
4384 ivs
->n_cand_uses
[cid
]--;
4386 if (ivs
->n_cand_uses
[cid
] == 0)
4388 bitmap_clear_bit (ivs
->cands
, cid
);
4389 /* Do not count the pseudocandidates. */
4393 ivs
->cand_cost
-= cp
->cand
->cost
;
4395 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4398 ivs
->cand_use_cost
-= cp
->cost
;
4400 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4401 iv_ca_recount_cost (data
, ivs
);
4404 /* Add invariants in set INVS to set IVS. */
4407 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4415 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4417 ivs
->n_invariant_uses
[iid
]++;
4418 if (ivs
->n_invariant_uses
[iid
] == 1)
4423 /* Set cost pair for USE in set IVS to CP. */
4426 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4427 struct iv_use
*use
, struct cost_pair
*cp
)
4429 unsigned uid
= use
->id
, cid
;
4431 if (ivs
->cand_for_use
[uid
] == cp
)
4434 if (ivs
->cand_for_use
[uid
])
4435 iv_ca_set_no_cp (data
, ivs
, use
);
4442 ivs
->cand_for_use
[uid
] = cp
;
4443 ivs
->n_cand_uses
[cid
]++;
4444 if (ivs
->n_cand_uses
[cid
] == 1)
4446 bitmap_set_bit (ivs
->cands
, cid
);
4447 /* Do not count the pseudocandidates. */
4451 ivs
->cand_cost
+= cp
->cand
->cost
;
4453 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4456 ivs
->cand_use_cost
+= cp
->cost
;
4457 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4458 iv_ca_recount_cost (data
, ivs
);
4462 /* Extend set IVS by expressing USE by some of the candidates in it
4466 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4469 struct cost_pair
*best_cp
= NULL
, *cp
;
4473 gcc_assert (ivs
->upto
>= use
->id
);
4475 if (ivs
->upto
== use
->id
)
4481 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4483 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4485 if (cheaper_cost_pair (cp
, best_cp
))
4489 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4492 /* Get cost for assignment IVS. */
4495 iv_ca_cost (struct iv_ca
*ivs
)
4497 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4500 /* Returns true if all dependences of CP are among invariants in IVS. */
4503 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4508 if (!cp
->depends_on
)
4511 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4513 if (ivs
->n_invariant_uses
[i
] == 0)
4520 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4521 it before NEXT_CHANGE. */
4523 static struct iv_ca_delta
*
4524 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4525 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4527 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4530 change
->old_cp
= old_cp
;
4531 change
->new_cp
= new_cp
;
4532 change
->next_change
= next_change
;
4537 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4540 static struct iv_ca_delta
*
4541 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4543 struct iv_ca_delta
*last
;
4551 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4553 last
->next_change
= l2
;
4558 /* Returns candidate by that USE is expressed in IVS. */
4560 static struct cost_pair
*
4561 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4563 return ivs
->cand_for_use
[use
->id
];
4566 /* Reverse the list of changes DELTA, forming the inverse to it. */
4568 static struct iv_ca_delta
*
4569 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4571 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4572 struct cost_pair
*tmp
;
4574 for (act
= delta
; act
; act
= next
)
4576 next
= act
->next_change
;
4577 act
->next_change
= prev
;
4581 act
->old_cp
= act
->new_cp
;
4588 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4589 reverted instead. */
4592 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4593 struct iv_ca_delta
*delta
, bool forward
)
4595 struct cost_pair
*from
, *to
;
4596 struct iv_ca_delta
*act
;
4599 delta
= iv_ca_delta_reverse (delta
);
4601 for (act
= delta
; act
; act
= act
->next_change
)
4605 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4606 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4610 iv_ca_delta_reverse (delta
);
4613 /* Returns true if CAND is used in IVS. */
4616 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4618 return ivs
->n_cand_uses
[cand
->id
] > 0;
4621 /* Returns number of induction variable candidates in the set IVS. */
4624 iv_ca_n_cands (struct iv_ca
*ivs
)
4626 return ivs
->n_cands
;
4629 /* Free the list of changes DELTA. */
4632 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4634 struct iv_ca_delta
*act
, *next
;
4636 for (act
= *delta
; act
; act
= next
)
4638 next
= act
->next_change
;
4645 /* Allocates new iv candidates assignment. */
4647 static struct iv_ca
*
4648 iv_ca_new (struct ivopts_data
*data
)
4650 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4654 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4655 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4656 nw
->cands
= BITMAP_ALLOC (NULL
);
4659 nw
->cand_use_cost
= 0;
4661 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4667 /* Free memory occupied by the set IVS. */
4670 iv_ca_free (struct iv_ca
**ivs
)
4672 free ((*ivs
)->cand_for_use
);
4673 free ((*ivs
)->n_cand_uses
);
4674 BITMAP_FREE ((*ivs
)->cands
);
4675 free ((*ivs
)->n_invariant_uses
);
4680 /* Dumps IVS to FILE. */
4683 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4685 const char *pref
= " invariants ";
4688 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4689 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4691 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4692 if (ivs
->n_invariant_uses
[i
])
4694 fprintf (file
, "%s%d", pref
, i
);
4697 fprintf (file
, "\n");
4700 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4701 new set, and store differences in DELTA. Number of induction variables
4702 in the new set is stored to N_IVS. */
4705 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4706 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4711 struct cost_pair
*old_cp
, *new_cp
;
4714 for (i
= 0; i
< ivs
->upto
; i
++)
4716 use
= iv_use (data
, i
);
4717 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4720 && old_cp
->cand
== cand
)
4723 new_cp
= get_use_iv_cost (data
, use
, cand
);
4727 if (!iv_ca_has_deps (ivs
, new_cp
))
4730 if (!cheaper_cost_pair (new_cp
, old_cp
))
4733 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4736 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4737 cost
= iv_ca_cost (ivs
);
4739 *n_ivs
= iv_ca_n_cands (ivs
);
4740 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4745 /* Try narrowing set IVS by removing CAND. Return the cost of
4746 the new set and store the differences in DELTA. */
4749 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4750 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4754 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4756 struct iv_cand
*cnd
;
4760 for (i
= 0; i
< n_iv_uses (data
); i
++)
4762 use
= iv_use (data
, i
);
4764 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4765 if (old_cp
->cand
!= cand
)
4770 if (data
->consider_all_candidates
)
4772 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4777 cnd
= iv_cand (data
, ci
);
4779 cp
= get_use_iv_cost (data
, use
, cnd
);
4782 if (!iv_ca_has_deps (ivs
, cp
))
4785 if (!cheaper_cost_pair (cp
, new_cp
))
4793 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4798 cnd
= iv_cand (data
, ci
);
4800 cp
= get_use_iv_cost (data
, use
, cnd
);
4803 if (!iv_ca_has_deps (ivs
, cp
))
4806 if (!cheaper_cost_pair (cp
, new_cp
))
4815 iv_ca_delta_free (delta
);
4819 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4822 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4823 cost
= iv_ca_cost (ivs
);
4824 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4829 /* Try optimizing the set of candidates IVS by removing candidates different
4830 from to EXCEPT_CAND from it. Return cost of the new set, and store
4831 differences in DELTA. */
4834 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4835 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4838 struct iv_ca_delta
*act_delta
, *best_delta
;
4839 unsigned i
, best_cost
, acost
;
4840 struct iv_cand
*cand
;
4843 best_cost
= iv_ca_cost (ivs
);
4845 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4847 cand
= iv_cand (data
, i
);
4849 if (cand
== except_cand
)
4852 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4854 if (acost
< best_cost
)
4857 iv_ca_delta_free (&best_delta
);
4858 best_delta
= act_delta
;
4861 iv_ca_delta_free (&act_delta
);
4870 /* Recurse to possibly remove other unnecessary ivs. */
4871 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4872 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4873 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4874 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4878 /* Tries to extend the sets IVS in the best possible way in order
4879 to express the USE. */
4882 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4885 unsigned best_cost
, act_cost
;
4888 struct iv_cand
*cand
;
4889 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4890 struct cost_pair
*cp
;
4892 iv_ca_add_use (data
, ivs
, use
);
4893 best_cost
= iv_ca_cost (ivs
);
4895 cp
= iv_ca_cand_for_use (ivs
, use
);
4898 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4899 iv_ca_set_no_cp (data
, ivs
, use
);
4902 /* First try important candidates. Only if it fails, try the specific ones.
4903 Rationale -- in loops with many variables the best choice often is to use
4904 just one generic biv. If we added here many ivs specific to the uses,
4905 the optimization algorithm later would be likely to get stuck in a local
4906 minimum, thus causing us to create too many ivs. The approach from
4907 few ivs to more seems more likely to be successful -- starting from few
4908 ivs, replacing an expensive use by a specific iv should always be a
4910 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4912 cand
= iv_cand (data
, i
);
4914 if (iv_ca_cand_used_p (ivs
, cand
))
4917 cp
= get_use_iv_cost (data
, use
, cand
);
4921 iv_ca_set_cp (data
, ivs
, use
, cp
);
4922 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4923 iv_ca_set_no_cp (data
, ivs
, use
);
4924 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4926 if (act_cost
< best_cost
)
4928 best_cost
= act_cost
;
4930 iv_ca_delta_free (&best_delta
);
4931 best_delta
= act_delta
;
4934 iv_ca_delta_free (&act_delta
);
4937 if (best_cost
== INFTY
)
4939 for (i
= 0; i
< use
->n_map_members
; i
++)
4941 cp
= use
->cost_map
+ i
;
4946 /* Already tried this. */
4947 if (cand
->important
)
4950 if (iv_ca_cand_used_p (ivs
, cand
))
4954 iv_ca_set_cp (data
, ivs
, use
, cp
);
4955 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4956 iv_ca_set_no_cp (data
, ivs
, use
);
4957 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4960 if (act_cost
< best_cost
)
4962 best_cost
= act_cost
;
4965 iv_ca_delta_free (&best_delta
);
4966 best_delta
= act_delta
;
4969 iv_ca_delta_free (&act_delta
);
4973 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4974 iv_ca_delta_free (&best_delta
);
4976 return (best_cost
!= INFTY
);
4979 /* Finds an initial assignment of candidates to uses. */
4981 static struct iv_ca
*
4982 get_initial_solution (struct ivopts_data
*data
)
4984 struct iv_ca
*ivs
= iv_ca_new (data
);
4987 for (i
= 0; i
< n_iv_uses (data
); i
++)
4988 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4997 /* Tries to improve set of induction variables IVS. */
5000 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5002 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5003 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5004 struct iv_cand
*cand
;
5006 /* Try extending the set of induction variables by one. */
5007 for (i
= 0; i
< n_iv_cands (data
); i
++)
5009 cand
= iv_cand (data
, i
);
5011 if (iv_ca_cand_used_p (ivs
, cand
))
5014 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5018 /* If we successfully added the candidate and the set is small enough,
5019 try optimizing it by removing other candidates. */
5020 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5022 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5023 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5024 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5025 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5028 if (acost
< best_cost
)
5031 iv_ca_delta_free (&best_delta
);
5032 best_delta
= act_delta
;
5035 iv_ca_delta_free (&act_delta
);
5040 /* Try removing the candidates from the set instead. */
5041 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5043 /* Nothing more we can do. */
5048 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5049 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5050 iv_ca_delta_free (&best_delta
);
5054 /* Attempts to find the optimal set of induction variables. We do simple
5055 greedy heuristic -- we try to replace at most one candidate in the selected
5056 solution and remove the unused ivs while this improves the cost. */
5058 static struct iv_ca
*
5059 find_optimal_iv_set (struct ivopts_data
*data
)
5065 /* Get the initial solution. */
5066 set
= get_initial_solution (data
);
5069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5070 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5076 fprintf (dump_file
, "Initial set of candidates:\n");
5077 iv_ca_dump (data
, dump_file
, set
);
5080 while (try_improve_iv_set (data
, set
))
5082 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5084 fprintf (dump_file
, "Improved to:\n");
5085 iv_ca_dump (data
, dump_file
, set
);
5089 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5090 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5092 for (i
= 0; i
< n_iv_uses (data
); i
++)
5094 use
= iv_use (data
, i
);
5095 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5101 /* Creates a new induction variable corresponding to CAND. */
5104 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5106 block_stmt_iterator incr_pos
;
5116 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5120 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5125 /* Mark that the iv is preserved. */
5126 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5127 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5129 /* Rewrite the increment so that it uses var_before directly. */
5130 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5135 gimple_add_tmp_var (cand
->var_before
);
5136 add_referenced_tmp_var (cand
->var_before
);
5138 base
= unshare_expr (cand
->iv
->base
);
5140 create_iv (base
, unshare_expr (cand
->iv
->step
),
5141 cand
->var_before
, data
->current_loop
,
5142 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5145 /* Creates new induction variables described in SET. */
5148 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5151 struct iv_cand
*cand
;
5154 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5156 cand
= iv_cand (data
, i
);
5157 create_new_iv (data
, cand
);
5161 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5162 is true, remove also the ssa name defined by the statement. */
5165 remove_statement (tree stmt
, bool including_defined_name
)
5167 if (TREE_CODE (stmt
) == PHI_NODE
)
5169 if (!including_defined_name
)
5171 /* Prevent the ssa name defined by the statement from being removed. */
5172 SET_PHI_RESULT (stmt
, NULL
);
5174 remove_phi_node (stmt
, NULL_TREE
);
5178 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5180 bsi_remove (&bsi
, true);
5184 /* Rewrites USE (definition of iv used in a nonlinear expression)
5185 using candidate CAND. */
5188 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5189 struct iv_use
*use
, struct iv_cand
*cand
)
5192 tree op
, stmts
, tgt
, ass
;
5193 block_stmt_iterator bsi
, pbsi
;
5195 /* An important special case -- if we are asked to express value of
5196 the original iv by itself, just exit; there is no need to
5197 introduce a new computation (that might also need casting the
5198 variable to unsigned and back). */
5199 if (cand
->pos
== IP_ORIGINAL
5200 && cand
->incremented_at
== use
->stmt
)
5202 tree step
, ctype
, utype
;
5203 enum tree_code incr_code
= PLUS_EXPR
;
5205 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5206 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5208 step
= cand
->iv
->step
;
5209 ctype
= TREE_TYPE (step
);
5210 utype
= TREE_TYPE (cand
->var_after
);
5211 if (TREE_CODE (step
) == NEGATE_EXPR
)
5213 incr_code
= MINUS_EXPR
;
5214 step
= TREE_OPERAND (step
, 0);
5217 /* Check whether we may leave the computation unchanged.
5218 This is the case only if it does not rely on other
5219 computations in the loop -- otherwise, the computation
5220 we rely upon may be removed in remove_unused_ivs,
5221 thus leading to ICE. */
5222 op
= TREE_OPERAND (use
->stmt
, 1);
5223 if (TREE_CODE (op
) == PLUS_EXPR
5224 || TREE_CODE (op
) == MINUS_EXPR
)
5226 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5227 op
= TREE_OPERAND (op
, 1);
5228 else if (TREE_CODE (op
) == PLUS_EXPR
5229 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5230 op
= TREE_OPERAND (op
, 0);
5238 && (TREE_CODE (op
) == INTEGER_CST
5239 || operand_equal_p (op
, step
, 0)))
5242 /* Otherwise, add the necessary computations to express
5244 op
= fold_convert (ctype
, cand
->var_before
);
5245 comp
= fold_convert (utype
,
5246 build2 (incr_code
, ctype
, op
,
5247 unshare_expr (step
)));
5250 comp
= get_computation (data
->current_loop
, use
, cand
);
5252 switch (TREE_CODE (use
->stmt
))
5255 tgt
= PHI_RESULT (use
->stmt
);
5257 /* If we should keep the biv, do not replace it. */
5258 if (name_info (data
, tgt
)->preserve_biv
)
5261 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5262 while (!bsi_end_p (pbsi
)
5263 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5271 tgt
= TREE_OPERAND (use
->stmt
, 0);
5272 bsi
= bsi_for_stmt (use
->stmt
);
5279 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5281 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5284 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5285 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5286 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5287 remove_statement (use
->stmt
, false);
5288 SSA_NAME_DEF_STMT (tgt
) = ass
;
5293 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5294 TREE_OPERAND (use
->stmt
, 1) = op
;
5298 /* Replaces ssa name in index IDX by its basic variable. Callback for
5302 idx_remove_ssa_names (tree base
, tree
*idx
,
5303 void *data ATTRIBUTE_UNUSED
)
5307 if (TREE_CODE (*idx
) == SSA_NAME
)
5308 *idx
= SSA_NAME_VAR (*idx
);
5310 if (TREE_CODE (base
) == ARRAY_REF
)
5312 op
= &TREE_OPERAND (base
, 2);
5314 && TREE_CODE (*op
) == SSA_NAME
)
5315 *op
= SSA_NAME_VAR (*op
);
5316 op
= &TREE_OPERAND (base
, 3);
5318 && TREE_CODE (*op
) == SSA_NAME
)
5319 *op
= SSA_NAME_VAR (*op
);
5325 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5328 unshare_and_remove_ssa_names (tree ref
)
5330 ref
= unshare_expr (ref
);
5331 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5336 /* Extract the alias analysis info for the memory reference REF. There are
5337 several ways how this information may be stored and what precisely is
5338 its semantics depending on the type of the reference, but there always is
5339 somewhere hidden one _DECL node that is used to determine the set of
5340 virtual operands for the reference. The code below deciphers this jungle
5341 and extracts this single useful piece of information. */
5344 get_ref_tag (tree ref
, tree orig
)
5346 tree var
= get_base_address (ref
);
5347 tree aref
= NULL_TREE
, tag
, sv
;
5348 HOST_WIDE_INT offset
, size
, maxsize
;
5350 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5352 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5357 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5358 return unshare_expr (sv
);
5363 if (TREE_CODE (var
) == INDIRECT_REF
)
5365 /* In case the base is a dereference of a pointer, first check its name
5366 mem tag, and if it does not have one, use type mem tag. */
5367 var
= TREE_OPERAND (var
, 0);
5368 if (TREE_CODE (var
) != SSA_NAME
)
5371 if (SSA_NAME_PTR_INFO (var
))
5373 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5378 var
= SSA_NAME_VAR (var
);
5379 tag
= var_ann (var
)->type_mem_tag
;
5380 gcc_assert (tag
!= NULL_TREE
);
5388 tag
= var_ann (var
)->type_mem_tag
;
5396 /* Copies the reference information from OLD_REF to NEW_REF. */
5399 copy_ref_info (tree new_ref
, tree old_ref
)
5401 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5402 copy_mem_ref_info (new_ref
, old_ref
);
5405 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5406 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5410 /* Rewrites USE (address that is an iv) using candidate CAND. */
5413 rewrite_use_address (struct ivopts_data
*data
,
5414 struct iv_use
*use
, struct iv_cand
*cand
)
5416 struct affine_tree_combination aff
;
5417 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5420 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5421 unshare_aff_combination (&aff
);
5423 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5424 copy_ref_info (ref
, *use
->op_p
);
5428 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5432 rewrite_use_compare (struct ivopts_data
*data
,
5433 struct iv_use
*use
, struct iv_cand
*cand
)
5436 tree
*op_p
, cond
, op
, stmts
, bound
;
5437 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5438 enum tree_code compare
;
5439 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5444 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5445 tree var_type
= TREE_TYPE (var
);
5447 compare
= iv_elimination_compare (data
, use
);
5448 bound
= fold_convert (var_type
, bound
);
5449 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5453 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5455 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5456 update_stmt (use
->stmt
);
5460 /* The induction variable elimination failed; just express the original
5462 comp
= get_computation (data
->current_loop
, use
, cand
);
5465 op_p
= &TREE_OPERAND (cond
, 0);
5466 if (TREE_CODE (*op_p
) != SSA_NAME
5467 || zero_p (get_iv (data
, *op_p
)->step
))
5468 op_p
= &TREE_OPERAND (cond
, 1);
5470 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5472 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5477 /* Ensure that operand *OP_P may be used at the end of EXIT without
5478 violating loop closed ssa form. */
5481 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5484 struct loop
*def_loop
;
5487 use
= USE_FROM_PTR (op_p
);
5488 if (TREE_CODE (use
) != SSA_NAME
)
5491 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5495 def_loop
= def_bb
->loop_father
;
5496 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5499 /* Try finding a phi node that copies the value out of the loop. */
5500 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5501 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5506 /* Create such a phi node. */
5507 tree new_name
= duplicate_ssa_name (use
, NULL
);
5509 phi
= create_phi_node (new_name
, exit
->dest
);
5510 SSA_NAME_DEF_STMT (new_name
) = phi
;
5511 add_phi_arg (phi
, use
, exit
);
5514 SET_USE (op_p
, PHI_RESULT (phi
));
5517 /* Ensure that operands of STMT may be used at the end of EXIT without
5518 violating loop closed ssa form. */
5521 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5524 use_operand_p use_p
;
5526 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5527 protect_loop_closed_ssa_form_use (exit
, use_p
);
5530 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5531 so that they are emitted on the correct place, and so that the loop closed
5532 ssa form is preserved. */
5535 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5537 tree_stmt_iterator tsi
;
5538 block_stmt_iterator bsi
;
5539 tree phi
, stmt
, def
, next
;
5541 if (!single_pred_p (exit
->dest
))
5542 split_loop_exit_edge (exit
);
5544 /* Ensure there is label in exit->dest, so that we can
5546 tree_block_label (exit
->dest
);
5547 bsi
= bsi_after_labels (exit
->dest
);
5549 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5551 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5553 tree stmt
= tsi_stmt (tsi
);
5554 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5555 protect_loop_closed_ssa_form (exit
, stmt
);
5560 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5561 protect_loop_closed_ssa_form (exit
, stmts
);
5567 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5569 next
= PHI_CHAIN (phi
);
5571 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5573 def
= PHI_RESULT (phi
);
5574 remove_statement (phi
, false);
5575 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5577 SSA_NAME_DEF_STMT (def
) = stmt
;
5578 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5583 /* Rewrites USE using candidate CAND. */
5586 rewrite_use (struct ivopts_data
*data
,
5587 struct iv_use
*use
, struct iv_cand
*cand
)
5591 case USE_NONLINEAR_EXPR
:
5592 rewrite_use_nonlinear_expr (data
, use
, cand
);
5596 rewrite_use_address (data
, use
, cand
);
5600 rewrite_use_compare (data
, use
, cand
);
5606 mark_new_vars_to_rename (use
->stmt
);
5609 /* Rewrite the uses using the selected induction variables. */
5612 rewrite_uses (struct ivopts_data
*data
)
5615 struct iv_cand
*cand
;
5618 for (i
= 0; i
< n_iv_uses (data
); i
++)
5620 use
= iv_use (data
, i
);
5621 cand
= use
->selected
;
5624 rewrite_use (data
, use
, cand
);
5628 /* Removes the ivs that are not used after rewriting. */
5631 remove_unused_ivs (struct ivopts_data
*data
)
5636 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5638 struct version_info
*info
;
5640 info
= ver_info (data
, j
);
5642 && !zero_p (info
->iv
->step
)
5644 && !info
->iv
->have_use_for
5645 && !info
->preserve_biv
)
5646 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5650 /* Frees data allocated by the optimization of a single loop. */
5653 free_loop_data (struct ivopts_data
*data
)
5659 htab_empty (data
->niters
);
5661 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5663 struct version_info
*info
;
5665 info
= ver_info (data
, i
);
5669 info
->has_nonlin_use
= false;
5670 info
->preserve_biv
= false;
5673 bitmap_clear (data
->relevant
);
5674 bitmap_clear (data
->important_candidates
);
5676 for (i
= 0; i
< n_iv_uses (data
); i
++)
5678 struct iv_use
*use
= iv_use (data
, i
);
5681 BITMAP_FREE (use
->related_cands
);
5682 for (j
= 0; j
< use
->n_map_members
; j
++)
5683 if (use
->cost_map
[j
].depends_on
)
5684 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5685 free (use
->cost_map
);
5688 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5690 for (i
= 0; i
< n_iv_cands (data
); i
++)
5692 struct iv_cand
*cand
= iv_cand (data
, i
);
5696 if (cand
->depends_on
)
5697 BITMAP_FREE (cand
->depends_on
);
5700 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5702 if (data
->version_info_size
< num_ssa_names
)
5704 data
->version_info_size
= 2 * num_ssa_names
;
5705 free (data
->version_info
);
5706 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5709 data
->max_inv_id
= 0;
5711 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5712 SET_DECL_RTL (obj
, NULL_RTX
);
5714 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5717 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5721 tree_ssa_iv_optimize_finalize (struct loops
*loops
, struct ivopts_data
*data
)
5725 for (i
= 1; i
< loops
->num
; i
++)
5726 if (loops
->parray
[i
])
5728 free (loops
->parray
[i
]->aux
);
5729 loops
->parray
[i
]->aux
= NULL
;
5732 free_loop_data (data
);
5733 free (data
->version_info
);
5734 BITMAP_FREE (data
->relevant
);
5735 BITMAP_FREE (data
->important_candidates
);
5736 htab_delete (data
->niters
);
5738 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5739 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5740 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5743 /* Optimizes the LOOP. Returns true if anything changed. */
5746 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5748 bool changed
= false;
5749 struct iv_ca
*iv_ca
;
5752 data
->current_loop
= loop
;
5754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5756 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5758 exit
= single_dom_exit (loop
);
5761 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5762 exit
->src
->index
, exit
->dest
->index
);
5763 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5764 fprintf (dump_file
, "\n");
5767 fprintf (dump_file
, "\n");
5770 /* For each ssa name determines whether it behaves as an induction variable
5772 if (!find_induction_variables (data
))
5775 /* Finds interesting uses (item 1). */
5776 find_interesting_uses (data
);
5777 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5780 /* Finds candidates for the induction variables (item 2). */
5781 find_iv_candidates (data
);
5783 /* Calculates the costs (item 3, part 1). */
5784 determine_use_iv_costs (data
);
5785 determine_iv_costs (data
);
5786 determine_set_costs (data
);
5788 /* Find the optimal set of induction variables (item 3, part 2). */
5789 iv_ca
= find_optimal_iv_set (data
);
5794 /* Create the new induction variables (item 4, part 1). */
5795 create_new_ivs (data
, iv_ca
);
5796 iv_ca_free (&iv_ca
);
5798 /* Rewrite the uses (item 4, part 2). */
5799 rewrite_uses (data
);
5801 /* Remove the ivs that are unused after rewriting. */
5802 remove_unused_ivs (data
);
5804 /* We have changed the structure of induction variables; it might happen
5805 that definitions in the scev database refer to some of them that were
5810 free_loop_data (data
);
5815 /* Main entry point. Optimizes induction variables in LOOPS. */
5818 tree_ssa_iv_optimize (struct loops
*loops
)
5821 struct ivopts_data data
;
5823 tree_ssa_iv_optimize_init (loops
, &data
);
5825 /* Optimize the loops starting with the innermost ones. */
5826 loop
= loops
->tree_root
;
5830 /* Scan the loops, inner ones first. */
5831 while (loop
!= loops
->tree_root
)
5833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5834 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5836 tree_ssa_iv_optimize_loop (&data
, loop
);
5848 tree_ssa_iv_optimize_finalize (loops
, &data
);