1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
72 #include "tree-pass.h"
76 #include "insn-config.h"
80 #include "gimple-pretty-print.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
98 #include "tree-scalar-evolution.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop
*loop
)
122 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
124 return AVG_LOOP_NITER (loop
);
129 /* Representation of the induction variable. */
132 tree base
; /* Initial value of the iv. */
133 tree base_object
; /* A memory object to that the induction variable points. */
134 tree step
; /* Step of the iv (constant only). */
135 tree ssa_name
; /* The ssa name with the value. */
136 unsigned use_id
; /* The identifier in the use if it is the case. */
137 bool biv_p
; /* Is it a biv? */
138 bool have_use_for
; /* Do we already have a use for it? */
139 bool no_overflow
; /* True if the iv doesn't overflow. */
140 bool have_address_use
;/* For biv, indicate if it's used in any address
144 /* Per-ssa version information (induction variable descriptions, etc.). */
147 tree name
; /* The ssa name. */
148 struct iv
*iv
; /* Induction variable description. */
149 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv
; /* For the original biv, whether to preserve it. */
152 unsigned inv_id
; /* Id of an invariant. */
158 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
159 USE_ADDRESS
, /* Use in an address. */
160 USE_COMPARE
/* Use is a compare. */
163 /* Cost of a computation. */
166 int cost
; /* The runtime cost. */
167 unsigned complexity
; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost
= {0, 0};
174 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
176 /* The candidate - cost pair. */
179 struct iv_cand
*cand
; /* The candidate. */
180 comp_cost cost
; /* The cost. */
181 bitmap depends_on
; /* The list of invariants that have to be
183 tree value
; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp
; /* For iv elimination, the comparison. */
187 int inv_expr_id
; /* Loop invariant expression id. */
193 unsigned id
; /* The id of the use. */
194 unsigned sub_id
; /* The id of the sub use. */
195 enum use_type type
; /* Type of the use. */
196 struct iv
*iv
; /* The induction variable it is based on. */
197 gimple
*stmt
; /* Statement in that it occurs. */
198 tree
*op_p
; /* The place where it occurs. */
199 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
202 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
203 struct cost_pair
*cost_map
;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand
*selected
;
207 /* The selected candidate. */
209 struct iv_use
*next
; /* The next sub use. */
210 tree addr_base
; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset
;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
218 IP_NORMAL
, /* At the end, just before the exit condition. */
219 IP_END
, /* At the end of the latch block. */
220 IP_BEFORE_USE
, /* Immediately before a specific use. */
221 IP_AFTER_USE
, /* Immediately after a specific use. */
222 IP_ORIGINAL
/* The original biv. */
225 /* The induction variable candidate. */
228 unsigned id
; /* The number of the candidate. */
229 bool important
; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
232 gimple
*incremented_at
;/* For original biv, the statement where it is
234 tree var_before
; /* The variable used for it before increment. */
235 tree var_after
; /* The variable used for it after increment. */
236 struct iv
*iv
; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost
; /* Cost of the candidate. */
241 unsigned cost_step
; /* Cost of the candidate's increment operation. */
242 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on
; /* The list of invariants that are used in step of the
246 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
250 /* Loop invariant expression hashtable entry. */
251 struct iv_inv_expr_ent
258 /* The data used by the induction variable optimizations. */
260 /* Hashtable helpers. */
262 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
264 static inline hashval_t
hash (const iv_inv_expr_ent
*);
265 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
268 /* Hash function for loop invariant expressions. */
271 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
276 /* Hash table equality function for expressions. */
279 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
280 const iv_inv_expr_ent
*expr2
)
282 return expr1
->hash
== expr2
->hash
283 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
288 /* The currently optimized loop. */
289 struct loop
*current_loop
;
290 source_location loop_loc
;
292 /* Numbers of iterations for all exits of the current loop. */
293 hash_map
<edge
, tree_niter_desc
*> *niters
;
295 /* Number of registers used in it. */
298 /* The size of version_info array allocated. */
299 unsigned version_info_size
;
301 /* The array of information for the ssa names. */
302 struct version_info
*version_info
;
304 /* The hashtable of loop invariant expressions created
306 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
308 /* Loop invariant expression id. */
311 /* The bitmap of indices in version_info whose value was changed. */
314 /* The uses of induction variables. */
315 vec
<iv_use
*> iv_uses
;
317 /* The candidates. */
318 vec
<iv_cand
*> iv_candidates
;
320 /* A bitmap of important candidates. */
321 bitmap important_candidates
;
323 /* Cache used by tree_to_aff_combination_expand. */
324 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
326 /* The maximum invariant id. */
329 /* Number of no_overflow BIVs which are not used in memory address. */
330 unsigned bivs_not_used_in_addr
;
332 /* Obstack for iv structure. */
333 struct obstack iv_obstack
;
335 /* Whether to consider just related and important candidates when replacing a
337 bool consider_all_candidates
;
339 /* Are we optimizing for speed? */
342 /* Whether the loop body includes any function calls. */
343 bool body_includes_call
;
345 /* Whether the loop body can only be exited via single exit. */
346 bool loop_single_exit_p
;
349 /* An assignment of iv candidates to uses. */
353 /* The number of uses covered by the assignment. */
356 /* Number of uses that cannot be expressed by the candidates in the set. */
359 /* Candidate assigned to a use, together with the related costs. */
360 struct cost_pair
**cand_for_use
;
362 /* Number of times each candidate is used. */
363 unsigned *n_cand_uses
;
365 /* The candidates used. */
368 /* The number of candidates in the set. */
371 /* Total number of registers needed. */
374 /* Total cost of expressing uses. */
375 comp_cost cand_use_cost
;
377 /* Total cost of candidates. */
380 /* Number of times each invariant is used. */
381 unsigned *n_invariant_uses
;
383 /* The array holding the number of uses of each loop
384 invariant expressions created by ivopt. */
385 unsigned *used_inv_expr
;
387 /* The number of created loop invariants. */
388 unsigned num_used_inv_expr
;
390 /* Total cost of the assignment. */
394 /* Difference of two iv candidate assignments. */
401 /* An old assignment (for rollback purposes). */
402 struct cost_pair
*old_cp
;
404 /* A new assignment. */
405 struct cost_pair
*new_cp
;
407 /* Next change in the list. */
408 struct iv_ca_delta
*next_change
;
411 /* Bound on number of candidates below that all candidates are considered. */
413 #define CONSIDER_ALL_CANDIDATES_BOUND \
414 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
416 /* If there are more iv occurrences, we just give up (it is quite unlikely that
417 optimizing such a loop would help, and it would take ages). */
419 #define MAX_CONSIDERED_USES \
420 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
422 /* If there are at most this number of ivs in the set, try removing unnecessary
423 ivs from the set always. */
425 #define ALWAYS_PRUNE_CAND_SET_BOUND \
426 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
428 /* The list of trees for that the decl_rtl field must be reset is stored
431 static vec
<tree
> decl_rtl_to_reset
;
433 static comp_cost
force_expr_to_var_cost (tree
, bool);
435 /* Number of uses recorded in DATA. */
437 static inline unsigned
438 n_iv_uses (struct ivopts_data
*data
)
440 return data
->iv_uses
.length ();
443 /* Ith use recorded in DATA. */
445 static inline struct iv_use
*
446 iv_use (struct ivopts_data
*data
, unsigned i
)
448 return data
->iv_uses
[i
];
451 /* Number of candidates recorded in DATA. */
453 static inline unsigned
454 n_iv_cands (struct ivopts_data
*data
)
456 return data
->iv_candidates
.length ();
459 /* Ith candidate recorded in DATA. */
461 static inline struct iv_cand
*
462 iv_cand (struct ivopts_data
*data
, unsigned i
)
464 return data
->iv_candidates
[i
];
467 /* The single loop exit if it dominates the latch, NULL otherwise. */
470 single_dom_exit (struct loop
*loop
)
472 edge exit
= single_exit (loop
);
477 if (!just_once_each_iteration_p (loop
, exit
->src
))
483 /* Dumps information about the induction variable IV to FILE. */
486 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
488 if (iv
->ssa_name
&& dump_name
)
490 fprintf (file
, "ssa name ");
491 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
492 fprintf (file
, "\n");
495 fprintf (file
, " type ");
496 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
497 fprintf (file
, "\n");
501 fprintf (file
, " base ");
502 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
503 fprintf (file
, "\n");
505 fprintf (file
, " step ");
506 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
507 fprintf (file
, "\n");
511 fprintf (file
, " invariant ");
512 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
513 fprintf (file
, "\n");
518 fprintf (file
, " base object ");
519 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
520 fprintf (file
, "\n");
524 fprintf (file
, " is a biv\n");
527 fprintf (file
, " iv doesn't overflow wrto loop niter\n");
530 /* Dumps information about the USE to FILE. */
533 dump_use (FILE *file
, struct iv_use
*use
)
535 fprintf (file
, "use %d", use
->id
);
537 fprintf (file
, ".%d", use
->sub_id
);
539 fprintf (file
, "\n");
543 case USE_NONLINEAR_EXPR
:
544 fprintf (file
, " generic\n");
548 fprintf (file
, " address\n");
552 fprintf (file
, " compare\n");
559 fprintf (file
, " in statement ");
560 print_gimple_stmt (file
, use
->stmt
, 0, 0);
561 fprintf (file
, "\n");
563 fprintf (file
, " at position ");
565 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
566 fprintf (file
, "\n");
568 dump_iv (file
, use
->iv
, false);
570 if (use
->related_cands
)
572 fprintf (file
, " related candidates ");
573 dump_bitmap (file
, use
->related_cands
);
577 /* Dumps information about the uses to FILE. */
580 dump_uses (FILE *file
, struct ivopts_data
*data
)
585 for (i
= 0; i
< n_iv_uses (data
); i
++)
587 use
= iv_use (data
, i
);
590 dump_use (file
, use
);
594 fprintf (file
, "\n");
598 /* Dumps information about induction variable candidate CAND to FILE. */
601 dump_cand (FILE *file
, struct iv_cand
*cand
)
603 struct iv
*iv
= cand
->iv
;
605 fprintf (file
, "candidate %d%s\n",
606 cand
->id
, cand
->important
? " (important)" : "");
608 if (cand
->depends_on
)
610 fprintf (file
, " depends on ");
611 dump_bitmap (file
, cand
->depends_on
);
616 fprintf (file
, " final value replacement\n");
620 if (cand
->var_before
)
622 fprintf (file
, " var_before ");
623 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
624 fprintf (file
, "\n");
628 fprintf (file
, " var_after ");
629 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
630 fprintf (file
, "\n");
636 fprintf (file
, " incremented before exit test\n");
640 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
644 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
648 fprintf (file
, " incremented at end\n");
652 fprintf (file
, " original biv\n");
656 dump_iv (file
, iv
, false);
659 /* Returns the info for ssa version VER. */
661 static inline struct version_info
*
662 ver_info (struct ivopts_data
*data
, unsigned ver
)
664 return data
->version_info
+ ver
;
667 /* Returns the info for ssa name NAME. */
669 static inline struct version_info
*
670 name_info (struct ivopts_data
*data
, tree name
)
672 return ver_info (data
, SSA_NAME_VERSION (name
));
675 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
679 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
681 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
685 if (sbb
== loop
->latch
)
691 return stmt
== last_stmt (bb
);
694 /* Returns true if STMT if after the place where the original induction
695 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
696 if the positions are identical. */
699 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
701 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
702 basic_block stmt_bb
= gimple_bb (stmt
);
704 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
707 if (stmt_bb
!= cand_bb
)
711 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
713 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
716 /* Returns true if STMT if after the place where the induction variable
717 CAND is incremented in LOOP. */
720 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
728 return stmt_after_ip_normal_pos (loop
, stmt
);
732 return stmt_after_inc_pos (cand
, stmt
, false);
735 return stmt_after_inc_pos (cand
, stmt
, true);
742 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
745 abnormal_ssa_name_p (tree exp
)
750 if (TREE_CODE (exp
) != SSA_NAME
)
753 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
756 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
757 abnormal phi node. Callback for for_each_index. */
760 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
761 void *data ATTRIBUTE_UNUSED
)
763 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
765 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
767 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
771 return !abnormal_ssa_name_p (*index
);
774 /* Returns true if EXPR contains a ssa name that occurs in an
775 abnormal phi node. */
778 contains_abnormal_ssa_name_p (tree expr
)
781 enum tree_code_class codeclass
;
786 code
= TREE_CODE (expr
);
787 codeclass
= TREE_CODE_CLASS (code
);
789 if (code
== SSA_NAME
)
790 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
792 if (code
== INTEGER_CST
793 || is_gimple_min_invariant (expr
))
796 if (code
== ADDR_EXPR
)
797 return !for_each_index (&TREE_OPERAND (expr
, 0),
798 idx_contains_abnormal_ssa_name_p
,
801 if (code
== COND_EXPR
)
802 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
803 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
804 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
810 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
815 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
827 /* Returns the structure describing number of iterations determined from
828 EXIT of DATA->current_loop, or NULL if something goes wrong. */
830 static struct tree_niter_desc
*
831 niter_for_exit (struct ivopts_data
*data
, edge exit
)
833 struct tree_niter_desc
*desc
;
834 tree_niter_desc
**slot
;
838 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
842 slot
= data
->niters
->get (exit
);
846 /* Try to determine number of iterations. We cannot safely work with ssa
847 names that appear in phi nodes on abnormal edges, so that we do not
848 create overlapping life ranges for them (PR 27283). */
849 desc
= XNEW (struct tree_niter_desc
);
850 if (!number_of_iterations_exit (data
->current_loop
,
852 || contains_abnormal_ssa_name_p (desc
->niter
))
857 data
->niters
->put (exit
, desc
);
865 /* Returns the structure describing number of iterations determined from
866 single dominating exit of DATA->current_loop, or NULL if something
869 static struct tree_niter_desc
*
870 niter_for_single_dom_exit (struct ivopts_data
*data
)
872 edge exit
= single_dom_exit (data
->current_loop
);
877 return niter_for_exit (data
, exit
);
880 /* Initializes data structures used by the iv optimization pass, stored
884 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
886 data
->version_info_size
= 2 * num_ssa_names
;
887 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
888 data
->relevant
= BITMAP_ALLOC (NULL
);
889 data
->important_candidates
= BITMAP_ALLOC (NULL
);
890 data
->max_inv_id
= 0;
892 data
->iv_uses
.create (20);
893 data
->iv_candidates
.create (20);
894 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
895 data
->inv_expr_id
= 0;
896 data
->name_expansion_cache
= NULL
;
897 decl_rtl_to_reset
.create (20);
898 gcc_obstack_init (&data
->iv_obstack
);
901 /* Returns a memory object to that EXPR points. In case we are able to
902 determine that it does not point to any such object, NULL is returned. */
905 determine_base_object (tree expr
)
907 enum tree_code code
= TREE_CODE (expr
);
910 /* If this is a pointer casted to any type, we need to determine
911 the base object for the pointer; so handle conversions before
912 throwing away non-pointer expressions. */
913 if (CONVERT_EXPR_P (expr
))
914 return determine_base_object (TREE_OPERAND (expr
, 0));
916 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
925 obj
= TREE_OPERAND (expr
, 0);
926 base
= get_base_address (obj
);
931 if (TREE_CODE (base
) == MEM_REF
)
932 return determine_base_object (TREE_OPERAND (base
, 0));
934 return fold_convert (ptr_type_node
,
935 build_fold_addr_expr (base
));
937 case POINTER_PLUS_EXPR
:
938 return determine_base_object (TREE_OPERAND (expr
, 0));
942 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
946 return fold_convert (ptr_type_node
, expr
);
950 /* Return true if address expression with non-DECL_P operand appears
954 contain_complex_addr_expr (tree expr
)
959 switch (TREE_CODE (expr
))
961 case POINTER_PLUS_EXPR
:
964 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
965 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
969 return (!DECL_P (TREE_OPERAND (expr
, 0)));
978 /* Allocates an induction variable with given initial value BASE and step STEP
979 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
982 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
983 bool no_overflow
= false)
986 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
988 gcc_assert (step
!= NULL_TREE
);
990 /* Lower address expression in base except ones with DECL_P as operand.
992 1) More accurate cost can be computed for address expressions;
993 2) Duplicate candidates won't be created for bases in different
994 forms, like &a[0] and &a. */
996 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
997 || contain_complex_addr_expr (expr
))
1000 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1001 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1005 iv
->base_object
= determine_base_object (base
);
1008 iv
->have_use_for
= false;
1010 iv
->ssa_name
= NULL_TREE
;
1011 iv
->no_overflow
= no_overflow
;
1012 iv
->have_address_use
= false;
1017 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1018 doesn't overflow. */
1021 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1024 struct version_info
*info
= name_info (data
, iv
);
1026 gcc_assert (!info
->iv
);
1028 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1029 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1030 info
->iv
->ssa_name
= iv
;
1033 /* Finds induction variable declaration for VAR. */
1036 get_iv (struct ivopts_data
*data
, tree var
)
1039 tree type
= TREE_TYPE (var
);
1041 if (!POINTER_TYPE_P (type
)
1042 && !INTEGRAL_TYPE_P (type
))
1045 if (!name_info (data
, var
)->iv
)
1047 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1050 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1051 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1054 return name_info (data
, var
)->iv
;
1057 /* Return the first non-invariant ssa var found in EXPR. */
1060 extract_single_var_from_expr (tree expr
)
1064 enum tree_code code
;
1066 if (!expr
|| is_gimple_min_invariant (expr
))
1069 code
= TREE_CODE (expr
);
1070 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1072 n
= TREE_OPERAND_LENGTH (expr
);
1073 for (i
= 0; i
< n
; i
++)
1075 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1081 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1084 /* Finds basic ivs. */
1087 find_bivs (struct ivopts_data
*data
)
1091 tree step
, type
, base
, stop
;
1093 struct loop
*loop
= data
->current_loop
;
1096 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1100 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1103 if (virtual_operand_p (PHI_RESULT (phi
)))
1106 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1109 if (integer_zerop (iv
.step
))
1113 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1114 /* Stop expanding iv base at the first ssa var referred by iv step.
1115 Ideally we should stop at any ssa var, because that's expensive
1116 and unusual to happen, we just do it on the first one.
1118 See PR64705 for the rationale. */
1119 stop
= extract_single_var_from_expr (step
);
1120 base
= expand_simple_operations (base
, stop
);
1121 if (contains_abnormal_ssa_name_p (base
)
1122 || contains_abnormal_ssa_name_p (step
))
1125 type
= TREE_TYPE (PHI_RESULT (phi
));
1126 base
= fold_convert (type
, base
);
1129 if (POINTER_TYPE_P (type
))
1130 step
= convert_to_ptrofftype (step
);
1132 step
= fold_convert (type
, step
);
1135 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1142 /* Marks basic ivs. */
1145 mark_bivs (struct ivopts_data
*data
)
1150 struct iv
*iv
, *incr_iv
;
1151 struct loop
*loop
= data
->current_loop
;
1152 basic_block incr_bb
;
1155 data
->bivs_not_used_in_addr
= 0;
1156 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1160 iv
= get_iv (data
, PHI_RESULT (phi
));
1164 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1165 def
= SSA_NAME_DEF_STMT (var
);
1166 /* Don't mark iv peeled from other one as biv. */
1168 && gimple_code (def
) == GIMPLE_PHI
1169 && gimple_bb (def
) == loop
->header
)
1172 incr_iv
= get_iv (data
, var
);
1176 /* If the increment is in the subloop, ignore it. */
1177 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1178 if (incr_bb
->loop_father
!= data
->current_loop
1179 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1183 incr_iv
->biv_p
= true;
1184 if (iv
->no_overflow
)
1185 data
->bivs_not_used_in_addr
++;
1186 if (incr_iv
->no_overflow
)
1187 data
->bivs_not_used_in_addr
++;
1191 /* Checks whether STMT defines a linear induction variable and stores its
1192 parameters to IV. */
1195 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1198 struct loop
*loop
= data
->current_loop
;
1200 iv
->base
= NULL_TREE
;
1201 iv
->step
= NULL_TREE
;
1203 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1206 lhs
= gimple_assign_lhs (stmt
);
1207 if (TREE_CODE (lhs
) != SSA_NAME
)
1210 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1213 /* Stop expanding iv base at the first ssa var referred by iv step.
1214 Ideally we should stop at any ssa var, because that's expensive
1215 and unusual to happen, we just do it on the first one.
1217 See PR64705 for the rationale. */
1218 stop
= extract_single_var_from_expr (iv
->step
);
1219 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1220 if (contains_abnormal_ssa_name_p (iv
->base
)
1221 || contains_abnormal_ssa_name_p (iv
->step
))
1224 /* If STMT could throw, then do not consider STMT as defining a GIV.
1225 While this will suppress optimizations, we can not safely delete this
1226 GIV and associated statements, even if it appears it is not used. */
1227 if (stmt_could_throw_p (stmt
))
1233 /* Finds general ivs in statement STMT. */
1236 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1240 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1243 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1246 /* Finds general ivs in basic block BB. */
1249 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1251 gimple_stmt_iterator bsi
;
1253 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1254 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1257 /* Finds general ivs. */
1260 find_givs (struct ivopts_data
*data
)
1262 struct loop
*loop
= data
->current_loop
;
1263 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1266 for (i
= 0; i
< loop
->num_nodes
; i
++)
1267 find_givs_in_bb (data
, body
[i
]);
1271 /* For each ssa name defined in LOOP determines whether it is an induction
1272 variable and if so, its initial value and step. */
1275 find_induction_variables (struct ivopts_data
*data
)
1280 if (!find_bivs (data
))
1286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1288 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1292 fprintf (dump_file
, " number of iterations ");
1293 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1294 if (!integer_zerop (niter
->may_be_zero
))
1296 fprintf (dump_file
, "; zero if ");
1297 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1299 fprintf (dump_file
, "\n\n");
1302 fprintf (dump_file
, "Induction variables:\n\n");
1304 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1306 if (ver_info (data
, i
)->iv
)
1307 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1314 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1315 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1316 is the const offset stripped from IV base. For uses of other types,
1317 ADDR_BASE and ADDR_OFFSET are zero by default. */
1319 static struct iv_use
*
1320 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1321 gimple
*stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1322 unsigned HOST_WIDE_INT addr_offset
= 0)
1324 struct iv_use
*use
= XCNEW (struct iv_use
);
1326 use
->id
= n_iv_uses (data
);
1328 use
->type
= use_type
;
1332 use
->related_cands
= BITMAP_ALLOC (NULL
);
1334 use
->addr_base
= addr_base
;
1335 use
->addr_offset
= addr_offset
;
1337 data
->iv_uses
.safe_push (use
);
1342 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1343 The sub use is recorded under the one whose use id is ID_GROUP. */
1345 static struct iv_use
*
1346 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1347 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
,
1348 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1349 unsigned int id_group
)
1351 struct iv_use
*use
= XCNEW (struct iv_use
);
1352 struct iv_use
*group
= iv_use (data
, id_group
);
1354 use
->id
= group
->id
;
1356 use
->type
= use_type
;
1360 use
->related_cands
= NULL
;
1361 use
->addr_base
= addr_base
;
1362 use
->addr_offset
= addr_offset
;
1364 /* Sub use list is maintained in offset ascending order. */
1365 if (addr_offset
<= group
->addr_offset
)
1367 use
->related_cands
= group
->related_cands
;
1368 group
->related_cands
= NULL
;
1370 data
->iv_uses
[id_group
] = use
;
1378 group
= group
->next
;
1380 while (group
&& addr_offset
> group
->addr_offset
);
1381 use
->next
= pre
->next
;
1388 /* Checks whether OP is a loop-level invariant and if so, records it.
1389 NONLINEAR_USE is true if the invariant is used in a way we do not
1390 handle specially. */
1393 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1396 struct version_info
*info
;
1398 if (TREE_CODE (op
) != SSA_NAME
1399 || virtual_operand_p (op
))
1402 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1404 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1407 info
= name_info (data
, op
);
1409 info
->has_nonlin_use
|= nonlinear_use
;
1411 info
->inv_id
= ++data
->max_inv_id
;
1412 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1415 /* Checks whether the use OP is interesting and if so, records it. */
1417 static struct iv_use
*
1418 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1424 if (TREE_CODE (op
) != SSA_NAME
)
1427 iv
= get_iv (data
, op
);
1431 if (iv
->have_use_for
)
1433 use
= iv_use (data
, iv
->use_id
);
1435 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1439 if (integer_zerop (iv
->step
))
1441 record_invariant (data
, op
, true);
1444 iv
->have_use_for
= true;
1446 stmt
= SSA_NAME_DEF_STMT (op
);
1447 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1448 || is_gimple_assign (stmt
));
1450 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1451 iv
->use_id
= use
->id
;
1456 /* Given a condition in statement STMT, checks whether it is a compare
1457 of an induction variable and an invariant. If this is the case,
1458 CONTROL_VAR is set to location of the iv, BOUND to the location of
1459 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1460 induction variable descriptions, and true is returned. If this is not
1461 the case, CONTROL_VAR and BOUND are set to the arguments of the
1462 condition and false is returned. */
1465 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1466 tree
**control_var
, tree
**bound
,
1467 struct iv
**iv_var
, struct iv
**iv_bound
)
1469 /* The objects returned when COND has constant operands. */
1470 static struct iv const_iv
;
1472 tree
*op0
= &zero
, *op1
= &zero
;
1473 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1476 if (gimple_code (stmt
) == GIMPLE_COND
)
1478 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1479 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1480 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1484 op0
= gimple_assign_rhs1_ptr (stmt
);
1485 op1
= gimple_assign_rhs2_ptr (stmt
);
1488 zero
= integer_zero_node
;
1489 const_iv
.step
= integer_zero_node
;
1491 if (TREE_CODE (*op0
) == SSA_NAME
)
1492 iv0
= get_iv (data
, *op0
);
1493 if (TREE_CODE (*op1
) == SSA_NAME
)
1494 iv1
= get_iv (data
, *op1
);
1496 /* Exactly one of the compared values must be an iv, and the other one must
1501 if (integer_zerop (iv0
->step
))
1503 /* Control variable may be on the other side. */
1504 std::swap (op0
, op1
);
1505 std::swap (iv0
, iv1
);
1507 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1522 /* Checks whether the condition in STMT is interesting and if so,
1526 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1528 tree
*var_p
, *bound_p
;
1531 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1533 find_interesting_uses_op (data
, *var_p
);
1534 find_interesting_uses_op (data
, *bound_p
);
1538 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1541 /* Returns the outermost loop EXPR is obviously invariant in
1542 relative to the loop LOOP, i.e. if all its operands are defined
1543 outside of the returned loop. Returns NULL if EXPR is not
1544 even obviously invariant in LOOP. */
1547 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1552 if (is_gimple_min_invariant (expr
))
1553 return current_loops
->tree_root
;
1555 if (TREE_CODE (expr
) == SSA_NAME
)
1557 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1560 if (flow_bb_inside_loop_p (loop
, def_bb
))
1562 return superloop_at_depth (loop
,
1563 loop_depth (def_bb
->loop_father
) + 1);
1566 return current_loops
->tree_root
;
1572 unsigned maxdepth
= 0;
1573 len
= TREE_OPERAND_LENGTH (expr
);
1574 for (i
= 0; i
< len
; i
++)
1576 struct loop
*ivloop
;
1577 if (!TREE_OPERAND (expr
, i
))
1580 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1583 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1586 return superloop_at_depth (loop
, maxdepth
);
1589 /* Returns true if expression EXPR is obviously invariant in LOOP,
1590 i.e. if all its operands are defined outside of the LOOP. LOOP
1591 should not be the function body. */
1594 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1599 gcc_assert (loop_depth (loop
) > 0);
1601 if (is_gimple_min_invariant (expr
))
1604 if (TREE_CODE (expr
) == SSA_NAME
)
1606 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1608 && flow_bb_inside_loop_p (loop
, def_bb
))
1617 len
= TREE_OPERAND_LENGTH (expr
);
1618 for (i
= 0; i
< len
; i
++)
1619 if (TREE_OPERAND (expr
, i
)
1620 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1626 /* Given expression EXPR which computes inductive values with respect
1627 to loop recorded in DATA, this function returns biv from which EXPR
1628 is derived by tracing definition chains of ssa variables in EXPR. */
1631 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1636 enum tree_code code
;
1639 if (expr
== NULL_TREE
)
1642 if (is_gimple_min_invariant (expr
))
1645 code
= TREE_CODE (expr
);
1646 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1648 n
= TREE_OPERAND_LENGTH (expr
);
1649 for (i
= 0; i
< n
; i
++)
1651 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1657 /* Stop if it's not ssa name. */
1658 if (code
!= SSA_NAME
)
1661 iv
= get_iv (data
, expr
);
1662 if (!iv
|| integer_zerop (iv
->step
))
1667 stmt
= SSA_NAME_DEF_STMT (expr
);
1668 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1671 use_operand_p use_p
;
1673 if (virtual_operand_p (gimple_phi_result (phi
)))
1676 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1678 tree use
= USE_FROM_PTR (use_p
);
1679 iv
= find_deriving_biv_for_expr (data
, use
);
1685 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1688 e1
= gimple_assign_rhs1 (stmt
);
1689 code
= gimple_assign_rhs_code (stmt
);
1690 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1691 return find_deriving_biv_for_expr (data
, e1
);
1698 case POINTER_PLUS_EXPR
:
1699 /* Increments, decrements and multiplications by a constant
1701 e2
= gimple_assign_rhs2 (stmt
);
1702 iv
= find_deriving_biv_for_expr (data
, e2
);
1708 /* Casts are simple. */
1709 return find_deriving_biv_for_expr (data
, e1
);
1718 /* Record BIV, its predecessor and successor that they are used in
1719 address type uses. */
1722 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1725 tree type
, base_1
, base_2
;
1728 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1729 || biv
->have_address_use
|| !biv
->no_overflow
)
1732 type
= TREE_TYPE (biv
->base
);
1733 if (!INTEGRAL_TYPE_P (type
))
1736 biv
->have_address_use
= true;
1737 data
->bivs_not_used_in_addr
--;
1738 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1739 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1741 struct iv
*iv
= ver_info (data
, i
)->iv
;
1743 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1744 || iv
->have_address_use
|| !iv
->no_overflow
)
1747 if (type
!= TREE_TYPE (iv
->base
)
1748 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1751 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1754 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1755 if (operand_equal_p (base_1
, iv
->base
, 0)
1756 || operand_equal_p (base_2
, biv
->base
, 0))
1758 iv
->have_address_use
= true;
1759 data
->bivs_not_used_in_addr
--;
1764 /* Cumulates the steps of indices into DATA and replaces their values with the
1765 initial ones. Returns false when the value of the index cannot be determined.
1766 Callback for for_each_index. */
1768 struct ifs_ivopts_data
1770 struct ivopts_data
*ivopts_data
;
1776 idx_find_step (tree base
, tree
*idx
, void *data
)
1778 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1780 bool use_overflow_semantics
= false;
1781 tree step
, iv_base
, iv_step
, lbound
, off
;
1782 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1784 /* If base is a component ref, require that the offset of the reference
1786 if (TREE_CODE (base
) == COMPONENT_REF
)
1788 off
= component_ref_field_offset (base
);
1789 return expr_invariant_in_loop_p (loop
, off
);
1792 /* If base is array, first check whether we will be able to move the
1793 reference out of the loop (in order to take its address in strength
1794 reduction). In order for this to work we need both lower bound
1795 and step to be loop invariants. */
1796 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1798 /* Moreover, for a range, the size needs to be invariant as well. */
1799 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1800 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1803 step
= array_ref_element_size (base
);
1804 lbound
= array_ref_low_bound (base
);
1806 if (!expr_invariant_in_loop_p (loop
, step
)
1807 || !expr_invariant_in_loop_p (loop
, lbound
))
1811 if (TREE_CODE (*idx
) != SSA_NAME
)
1814 iv
= get_iv (dta
->ivopts_data
, *idx
);
1818 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1819 *&x[0], which is not folded and does not trigger the
1820 ARRAY_REF path below. */
1823 if (integer_zerop (iv
->step
))
1826 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1828 step
= array_ref_element_size (base
);
1830 /* We only handle addresses whose step is an integer constant. */
1831 if (TREE_CODE (step
) != INTEGER_CST
)
1835 /* The step for pointer arithmetics already is 1 byte. */
1836 step
= size_one_node
;
1840 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1841 use_overflow_semantics
= true;
1843 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1844 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1845 use_overflow_semantics
))
1847 /* The index might wrap. */
1851 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1852 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1854 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
1857 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
1859 record_biv_for_address_use (dta
->ivopts_data
, iv
);
1864 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1865 object is passed to it in DATA. */
1868 idx_record_use (tree base
, tree
*idx
,
1871 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1872 find_interesting_uses_op (data
, *idx
);
1873 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1875 find_interesting_uses_op (data
, array_ref_element_size (base
));
1876 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1881 /* If we can prove that TOP = cst * BOT for some constant cst,
1882 store cst to MUL and return true. Otherwise return false.
1883 The returned value is always sign-extended, regardless of the
1884 signedness of TOP and BOT. */
1887 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1890 enum tree_code code
;
1891 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1892 widest_int res
, p0
, p1
;
1897 if (operand_equal_p (top
, bot
, 0))
1903 code
= TREE_CODE (top
);
1907 mby
= TREE_OPERAND (top
, 1);
1908 if (TREE_CODE (mby
) != INTEGER_CST
)
1911 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1914 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1919 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1920 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1923 if (code
== MINUS_EXPR
)
1925 *mul
= wi::sext (p0
+ p1
, precision
);
1929 if (TREE_CODE (bot
) != INTEGER_CST
)
1932 p0
= widest_int::from (top
, SIGNED
);
1933 p1
= widest_int::from (bot
, SIGNED
);
1936 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1944 /* Return true if memory reference REF with step STEP may be unaligned. */
1947 may_be_unaligned_p (tree ref
, tree step
)
1949 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1950 thus they are not misaligned. */
1951 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1954 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1955 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1956 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1958 unsigned HOST_WIDE_INT bitpos
;
1959 unsigned int ref_align
;
1960 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1961 if (ref_align
< align
1962 || (bitpos
% align
) != 0
1963 || (bitpos
% BITS_PER_UNIT
) != 0)
1966 unsigned int trailing_zeros
= tree_ctz (step
);
1967 if (trailing_zeros
< HOST_BITS_PER_INT
1968 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1974 /* Return true if EXPR may be non-addressable. */
1977 may_be_nonaddressable_p (tree expr
)
1979 switch (TREE_CODE (expr
))
1981 case TARGET_MEM_REF
:
1982 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1983 target, thus they are always addressable. */
1987 /* Likewise for MEM_REFs, modulo the storage order. */
1988 return REF_REVERSE_STORAGE_ORDER (expr
);
1991 if (REF_REVERSE_STORAGE_ORDER (expr
))
1993 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1996 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
1998 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1999 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2002 case ARRAY_RANGE_REF
:
2003 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2005 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2007 case VIEW_CONVERT_EXPR
:
2008 /* This kind of view-conversions may wrap non-addressable objects
2009 and make them look addressable. After some processing the
2010 non-addressability may be uncovered again, causing ADDR_EXPRs
2011 of inappropriate objects to be built. */
2012 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2013 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2015 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2028 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
2030 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2031 If there is an existing use which has same stripped iv base and step,
2032 this function records this one as a sub use to that; otherwise records
2033 it as a normal one. */
2035 static struct iv_use
*
2036 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
2037 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
)
2042 unsigned HOST_WIDE_INT addr_offset
;
2044 /* Only support sub use for address type uses, that is, with base
2046 if (!iv
->base_object
)
2047 return record_use (data
, use_p
, iv
, stmt
, use_type
);
2049 addr_base
= strip_offset (iv
->base
, &addr_offset
);
2050 for (i
= 0; i
< n_iv_uses (data
); i
++)
2052 use
= iv_use (data
, i
);
2053 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
2056 /* Check if it has the same stripped base and step. */
2057 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
2058 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
2059 && operand_equal_p (addr_base
, use
->addr_base
, 0))
2063 if (i
== n_iv_uses (data
))
2064 return record_use (data
, use_p
, iv
, stmt
,
2065 use_type
, addr_base
, addr_offset
);
2067 return record_sub_use (data
, use_p
, iv
, stmt
,
2068 use_type
, addr_base
, addr_offset
, i
);
2071 /* Finds addresses in *OP_P inside STMT. */
2074 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2077 tree base
= *op_p
, step
= size_zero_node
;
2079 struct ifs_ivopts_data ifs_ivopts_data
;
2081 /* Do not play with volatile memory references. A bit too conservative,
2082 perhaps, but safe. */
2083 if (gimple_has_volatile_ops (stmt
))
2086 /* Ignore bitfields for now. Not really something terribly complicated
2088 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2091 base
= unshare_expr (base
);
2093 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2095 tree type
= build_pointer_type (TREE_TYPE (base
));
2099 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2101 civ
= get_iv (data
, TMR_BASE (base
));
2105 TMR_BASE (base
) = civ
->base
;
2108 if (TMR_INDEX2 (base
)
2109 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2111 civ
= get_iv (data
, TMR_INDEX2 (base
));
2115 TMR_INDEX2 (base
) = civ
->base
;
2118 if (TMR_INDEX (base
)
2119 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2121 civ
= get_iv (data
, TMR_INDEX (base
));
2125 TMR_INDEX (base
) = civ
->base
;
2130 if (TMR_STEP (base
))
2131 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2133 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2137 if (integer_zerop (step
))
2139 base
= tree_mem_ref_addr (type
, base
);
2143 ifs_ivopts_data
.ivopts_data
= data
;
2144 ifs_ivopts_data
.stmt
= stmt
;
2145 ifs_ivopts_data
.step
= size_zero_node
;
2146 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2147 || integer_zerop (ifs_ivopts_data
.step
))
2149 step
= ifs_ivopts_data
.step
;
2151 /* Check that the base expression is addressable. This needs
2152 to be done after substituting bases of IVs into it. */
2153 if (may_be_nonaddressable_p (base
))
2156 /* Moreover, on strict alignment platforms, check that it is
2157 sufficiently aligned. */
2158 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2161 base
= build_fold_addr_expr (base
);
2163 /* Substituting bases of IVs into the base expression might
2164 have caused folding opportunities. */
2165 if (TREE_CODE (base
) == ADDR_EXPR
)
2167 tree
*ref
= &TREE_OPERAND (base
, 0);
2168 while (handled_component_p (*ref
))
2169 ref
= &TREE_OPERAND (*ref
, 0);
2170 if (TREE_CODE (*ref
) == MEM_REF
)
2172 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2173 TREE_OPERAND (*ref
, 0),
2174 TREE_OPERAND (*ref
, 1));
2181 civ
= alloc_iv (data
, base
, step
);
2182 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2186 for_each_index (op_p
, idx_record_use
, data
);
2189 /* Finds and records invariants used in STMT. */
2192 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2195 use_operand_p use_p
;
2198 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2200 op
= USE_FROM_PTR (use_p
);
2201 record_invariant (data
, op
, false);
2205 /* Finds interesting uses of induction variables in the statement STMT. */
2208 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2211 tree op
, *lhs
, *rhs
;
2213 use_operand_p use_p
;
2214 enum tree_code code
;
2216 find_invariants_stmt (data
, stmt
);
2218 if (gimple_code (stmt
) == GIMPLE_COND
)
2220 find_interesting_uses_cond (data
, stmt
);
2224 if (is_gimple_assign (stmt
))
2226 lhs
= gimple_assign_lhs_ptr (stmt
);
2227 rhs
= gimple_assign_rhs1_ptr (stmt
);
2229 if (TREE_CODE (*lhs
) == SSA_NAME
)
2231 /* If the statement defines an induction variable, the uses are not
2232 interesting by themselves. */
2234 iv
= get_iv (data
, *lhs
);
2236 if (iv
&& !integer_zerop (iv
->step
))
2240 code
= gimple_assign_rhs_code (stmt
);
2241 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2242 && (REFERENCE_CLASS_P (*rhs
)
2243 || is_gimple_val (*rhs
)))
2245 if (REFERENCE_CLASS_P (*rhs
))
2246 find_interesting_uses_address (data
, stmt
, rhs
);
2248 find_interesting_uses_op (data
, *rhs
);
2250 if (REFERENCE_CLASS_P (*lhs
))
2251 find_interesting_uses_address (data
, stmt
, lhs
);
2254 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2256 find_interesting_uses_cond (data
, stmt
);
2260 /* TODO -- we should also handle address uses of type
2262 memory = call (whatever);
2269 if (gimple_code (stmt
) == GIMPLE_PHI
2270 && gimple_bb (stmt
) == data
->current_loop
->header
)
2272 iv
= get_iv (data
, PHI_RESULT (stmt
));
2274 if (iv
&& !integer_zerop (iv
->step
))
2278 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2280 op
= USE_FROM_PTR (use_p
);
2282 if (TREE_CODE (op
) != SSA_NAME
)
2285 iv
= get_iv (data
, op
);
2289 find_interesting_uses_op (data
, op
);
2293 /* Finds interesting uses of induction variables outside of loops
2294 on loop exit edge EXIT. */
2297 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2303 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2306 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2307 if (!virtual_operand_p (def
))
2308 find_interesting_uses_op (data
, def
);
2312 /* Finds uses of the induction variables that are interesting. */
2315 find_interesting_uses (struct ivopts_data
*data
)
2318 gimple_stmt_iterator bsi
;
2319 basic_block
*body
= get_loop_body (data
->current_loop
);
2321 struct version_info
*info
;
2324 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2325 fprintf (dump_file
, "Uses:\n\n");
2327 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2332 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2333 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2334 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2335 find_interesting_uses_outside (data
, e
);
2337 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2338 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2339 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2340 if (!is_gimple_debug (gsi_stmt (bsi
)))
2341 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2344 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2348 fprintf (dump_file
, "\n");
2350 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2352 info
= ver_info (data
, i
);
2355 fprintf (dump_file
, " ");
2356 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2357 fprintf (dump_file
, " is invariant (%d)%s\n",
2358 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2362 fprintf (dump_file
, "\n");
2368 /* Compute maximum offset of [base + offset] addressing mode
2369 for memory reference represented by USE. */
2371 static HOST_WIDE_INT
2372 compute_max_addr_offset (struct iv_use
*use
)
2376 HOST_WIDE_INT i
, off
;
2377 unsigned list_index
, num
;
2379 machine_mode mem_mode
, addr_mode
;
2380 static vec
<HOST_WIDE_INT
> max_offset_list
;
2382 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2383 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2385 num
= max_offset_list
.length ();
2386 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2387 if (list_index
>= num
)
2389 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2390 for (; num
< max_offset_list
.length (); num
++)
2391 max_offset_list
[num
] = -1;
2394 off
= max_offset_list
[list_index
];
2398 addr_mode
= targetm
.addr_space
.address_mode (as
);
2399 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2400 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2402 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2403 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2404 width
= HOST_BITS_PER_WIDE_INT
- 1;
2406 for (i
= width
; i
> 0; i
--)
2408 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2409 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2410 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2413 /* For some strict-alignment targets, the offset must be naturally
2414 aligned. Try an aligned offset if mem_mode is not QImode. */
2415 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2416 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2418 off
-= GET_MODE_SIZE (mem_mode
);
2419 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2420 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2427 max_offset_list
[list_index
] = off
;
2431 /* Check if all small groups should be split. Return true if and
2434 1) At least one groups contain two uses with different offsets.
2435 2) No group contains more than two uses with different offsets.
2437 Return false otherwise. We want to split such groups because:
2439 1) Small groups don't have much benefit and may interfer with
2440 general candidate selection.
2441 2) Size for problem with only small groups is usually small and
2442 general algorithm can handle it well.
2444 TODO -- Above claim may not hold when auto increment is supported. */
2447 split_all_small_groups (struct ivopts_data
*data
)
2449 bool split_p
= false;
2450 unsigned int i
, n
, distinct
;
2451 struct iv_use
*pre
, *use
;
2453 n
= n_iv_uses (data
);
2454 for (i
= 0; i
< n
; i
++)
2456 use
= iv_use (data
, i
);
2461 gcc_assert (use
->type
== USE_ADDRESS
);
2462 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2464 if (pre
->addr_offset
!= use
->addr_offset
)
2477 /* For each group of address type uses, this function further groups
2478 these uses according to the maximum offset supported by target's
2479 [base + offset] addressing mode. */
2482 group_address_uses (struct ivopts_data
*data
)
2484 HOST_WIDE_INT max_offset
= -1;
2485 unsigned int i
, n
, sub_id
;
2486 struct iv_use
*pre
, *use
;
2487 unsigned HOST_WIDE_INT addr_offset_first
;
2489 /* Reset max offset to split all small groups. */
2490 if (split_all_small_groups (data
))
2493 n
= n_iv_uses (data
);
2494 for (i
= 0; i
< n
; i
++)
2496 use
= iv_use (data
, i
);
2500 gcc_assert (use
->type
== USE_ADDRESS
);
2501 if (max_offset
!= 0)
2502 max_offset
= compute_max_addr_offset (use
);
2507 addr_offset_first
= use
->addr_offset
;
2508 /* Only uses with offset that can fit in offset part against
2509 the first use can be grouped together. */
2510 for (pre
= use
, use
= use
->next
;
2511 use
&& (use
->addr_offset
- addr_offset_first
2512 <= (unsigned HOST_WIDE_INT
) max_offset
);
2513 pre
= use
, use
= use
->next
)
2516 use
->sub_id
= ++sub_id
;
2519 /* Break the list and create new group. */
2523 use
->id
= n_iv_uses (data
);
2524 use
->related_cands
= BITMAP_ALLOC (NULL
);
2525 data
->iv_uses
.safe_push (use
);
2530 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2531 dump_uses (dump_file
, data
);
2534 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2535 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2536 we are at the top-level of the processed address. */
2539 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2540 HOST_WIDE_INT
*offset
)
2542 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2543 enum tree_code code
;
2544 tree type
, orig_type
= TREE_TYPE (expr
);
2545 HOST_WIDE_INT off0
, off1
, st
;
2546 tree orig_expr
= expr
;
2550 type
= TREE_TYPE (expr
);
2551 code
= TREE_CODE (expr
);
2557 if (!cst_and_fits_in_hwi (expr
)
2558 || integer_zerop (expr
))
2561 *offset
= int_cst_value (expr
);
2562 return build_int_cst (orig_type
, 0);
2564 case POINTER_PLUS_EXPR
:
2567 op0
= TREE_OPERAND (expr
, 0);
2568 op1
= TREE_OPERAND (expr
, 1);
2570 op0
= strip_offset_1 (op0
, false, false, &off0
);
2571 op1
= strip_offset_1 (op1
, false, false, &off1
);
2573 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2574 if (op0
== TREE_OPERAND (expr
, 0)
2575 && op1
== TREE_OPERAND (expr
, 1))
2578 if (integer_zerop (op1
))
2580 else if (integer_zerop (op0
))
2582 if (code
== MINUS_EXPR
)
2583 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2588 expr
= fold_build2 (code
, type
, op0
, op1
);
2590 return fold_convert (orig_type
, expr
);
2593 op1
= TREE_OPERAND (expr
, 1);
2594 if (!cst_and_fits_in_hwi (op1
))
2597 op0
= TREE_OPERAND (expr
, 0);
2598 op0
= strip_offset_1 (op0
, false, false, &off0
);
2599 if (op0
== TREE_OPERAND (expr
, 0))
2602 *offset
= off0
* int_cst_value (op1
);
2603 if (integer_zerop (op0
))
2606 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2608 return fold_convert (orig_type
, expr
);
2611 case ARRAY_RANGE_REF
:
2615 step
= array_ref_element_size (expr
);
2616 if (!cst_and_fits_in_hwi (step
))
2619 st
= int_cst_value (step
);
2620 op1
= TREE_OPERAND (expr
, 1);
2621 op1
= strip_offset_1 (op1
, false, false, &off1
);
2622 *offset
= off1
* st
;
2625 && integer_zerop (op1
))
2627 /* Strip the component reference completely. */
2628 op0
= TREE_OPERAND (expr
, 0);
2629 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2642 tmp
= component_ref_field_offset (expr
);
2643 field
= TREE_OPERAND (expr
, 1);
2645 && cst_and_fits_in_hwi (tmp
)
2646 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2648 HOST_WIDE_INT boffset
, abs_off
;
2650 /* Strip the component reference completely. */
2651 op0
= TREE_OPERAND (expr
, 0);
2652 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2653 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2654 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2658 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2665 op0
= TREE_OPERAND (expr
, 0);
2666 op0
= strip_offset_1 (op0
, true, true, &off0
);
2669 if (op0
== TREE_OPERAND (expr
, 0))
2672 expr
= build_fold_addr_expr (op0
);
2673 return fold_convert (orig_type
, expr
);
2676 /* ??? Offset operand? */
2677 inside_addr
= false;
2684 /* Default handling of expressions for that we want to recurse into
2685 the first operand. */
2686 op0
= TREE_OPERAND (expr
, 0);
2687 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2690 if (op0
== TREE_OPERAND (expr
, 0)
2691 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2694 expr
= copy_node (expr
);
2695 TREE_OPERAND (expr
, 0) = op0
;
2697 TREE_OPERAND (expr
, 1) = op1
;
2699 /* Inside address, we might strip the top level component references,
2700 thus changing type of the expression. Handling of ADDR_EXPR
2702 expr
= fold_convert (orig_type
, expr
);
2707 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2710 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2713 tree core
= strip_offset_1 (expr
, false, false, &off
);
2718 /* Returns variant of TYPE that can be used as base for different uses.
2719 We return unsigned type with the same precision, which avoids problems
2723 generic_type_for (tree type
)
2725 if (POINTER_TYPE_P (type
))
2726 return unsigned_type_for (type
);
2728 if (TYPE_UNSIGNED (type
))
2731 return unsigned_type_for (type
);
2734 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2735 the bitmap to that we should store it. */
2737 static struct ivopts_data
*fd_ivopts_data
;
2739 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2741 bitmap
*depends_on
= (bitmap
*) data
;
2742 struct version_info
*info
;
2744 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2746 info
= name_info (fd_ivopts_data
, *expr_p
);
2748 if (!info
->inv_id
|| info
->has_nonlin_use
)
2752 *depends_on
= BITMAP_ALLOC (NULL
);
2753 bitmap_set_bit (*depends_on
, info
->inv_id
);
2758 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2759 position to POS. If USE is not NULL, the candidate is set as related to
2760 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2761 replacement of the final value of the iv by a direct computation. */
2763 static struct iv_cand
*
2764 add_candidate_1 (struct ivopts_data
*data
,
2765 tree base
, tree step
, bool important
, enum iv_position pos
,
2766 struct iv_use
*use
, gimple
*incremented_at
,
2767 struct iv
*orig_iv
= NULL
)
2770 struct iv_cand
*cand
= NULL
;
2771 tree type
, orig_type
;
2773 /* For non-original variables, make sure their values are computed in a type
2774 that does not invoke undefined behavior on overflows (since in general,
2775 we cannot prove that these induction variables are non-wrapping). */
2776 if (pos
!= IP_ORIGINAL
)
2778 orig_type
= TREE_TYPE (base
);
2779 type
= generic_type_for (orig_type
);
2780 if (type
!= orig_type
)
2782 base
= fold_convert (type
, base
);
2783 step
= fold_convert (type
, step
);
2787 for (i
= 0; i
< n_iv_cands (data
); i
++)
2789 cand
= iv_cand (data
, i
);
2791 if (cand
->pos
!= pos
)
2794 if (cand
->incremented_at
!= incremented_at
2795 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2796 && cand
->ainc_use
!= use
))
2810 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2811 && operand_equal_p (step
, cand
->iv
->step
, 0)
2812 && (TYPE_PRECISION (TREE_TYPE (base
))
2813 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2817 if (i
== n_iv_cands (data
))
2819 cand
= XCNEW (struct iv_cand
);
2825 cand
->iv
= alloc_iv (data
, base
, step
);
2828 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2830 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2831 cand
->var_after
= cand
->var_before
;
2833 cand
->important
= important
;
2834 cand
->incremented_at
= incremented_at
;
2835 data
->iv_candidates
.safe_push (cand
);
2838 && TREE_CODE (step
) != INTEGER_CST
)
2840 fd_ivopts_data
= data
;
2841 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2844 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2845 cand
->ainc_use
= use
;
2847 cand
->ainc_use
= NULL
;
2849 cand
->orig_iv
= orig_iv
;
2850 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2851 dump_cand (dump_file
, cand
);
2854 if (important
&& !cand
->important
)
2856 cand
->important
= true;
2857 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2858 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2863 bitmap_set_bit (use
->related_cands
, i
);
2864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2865 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2872 /* Returns true if incrementing the induction variable at the end of the LOOP
2875 The purpose is to avoid splitting latch edge with a biv increment, thus
2876 creating a jump, possibly confusing other optimization passes and leaving
2877 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2878 is not available (so we do not have a better alternative), or if the latch
2879 edge is already nonempty. */
2882 allow_ip_end_pos_p (struct loop
*loop
)
2884 if (!ip_normal_pos (loop
))
2887 if (!empty_block_p (ip_end_pos (loop
)))
2893 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2894 Important field is set to IMPORTANT. */
2897 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2898 bool important
, struct iv_use
*use
)
2900 basic_block use_bb
= gimple_bb (use
->stmt
);
2901 machine_mode mem_mode
;
2902 unsigned HOST_WIDE_INT cstepi
;
2904 /* If we insert the increment in any position other than the standard
2905 ones, we must ensure that it is incremented once per iteration.
2906 It must not be in an inner nested loop, or one side of an if
2908 if (use_bb
->loop_father
!= data
->current_loop
2909 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2910 || stmt_could_throw_p (use
->stmt
)
2911 || !cst_and_fits_in_hwi (step
))
2914 cstepi
= int_cst_value (step
);
2916 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2917 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2918 || USE_STORE_PRE_INCREMENT (mem_mode
))
2919 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2920 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2921 || USE_STORE_PRE_DECREMENT (mem_mode
))
2922 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2924 enum tree_code code
= MINUS_EXPR
;
2926 tree new_step
= step
;
2928 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2930 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2931 code
= POINTER_PLUS_EXPR
;
2934 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2935 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2936 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2939 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2940 || USE_STORE_POST_INCREMENT (mem_mode
))
2941 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2942 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2943 || USE_STORE_POST_DECREMENT (mem_mode
))
2944 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2946 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2951 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2952 position to POS. If USE is not NULL, the candidate is set as related to
2953 it. The candidate computation is scheduled before exit condition and at
2957 add_candidate (struct ivopts_data
*data
,
2958 tree base
, tree step
, bool important
, struct iv_use
*use
,
2959 struct iv
*orig_iv
= NULL
)
2961 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2963 if (ip_normal_pos (data
->current_loop
))
2964 add_candidate_1 (data
, base
, step
, important
,
2965 IP_NORMAL
, use
, NULL
, orig_iv
);
2966 if (ip_end_pos (data
->current_loop
)
2967 && allow_ip_end_pos_p (data
->current_loop
))
2968 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
2971 /* Adds standard iv candidates. */
2974 add_standard_iv_candidates (struct ivopts_data
*data
)
2976 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2978 /* The same for a double-integer type if it is still fast enough. */
2980 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2981 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2982 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2983 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2985 /* The same for a double-integer type if it is still fast enough. */
2987 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2988 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2989 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2990 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2994 /* Adds candidates bases on the old induction variable IV. */
2997 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3001 struct iv_cand
*cand
;
3003 /* Check if this biv is used in address type use. */
3004 if (iv
->no_overflow
&& iv
->have_address_use
3005 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3006 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3008 tree base
= fold_convert (sizetype
, iv
->base
);
3009 tree step
= fold_convert (sizetype
, iv
->step
);
3011 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3012 add_candidate (data
, base
, step
, true, NULL
, iv
);
3013 /* Add iv cand of the original type only if it has nonlinear use. */
3014 if (iv
->have_use_for
)
3015 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3018 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3020 /* The same, but with initial value zero. */
3021 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3022 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3024 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3025 iv
->step
, true, NULL
);
3027 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3028 if (gimple_code (phi
) == GIMPLE_PHI
)
3030 /* Additionally record the possibility of leaving the original iv
3032 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3033 /* Don't add candidate if it's from another PHI node because
3034 it's an affine iv appearing in the form of PEELED_CHREC. */
3035 phi
= SSA_NAME_DEF_STMT (def
);
3036 if (gimple_code (phi
) != GIMPLE_PHI
)
3038 cand
= add_candidate_1 (data
,
3039 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3040 SSA_NAME_DEF_STMT (def
));
3041 cand
->var_before
= iv
->ssa_name
;
3042 cand
->var_after
= def
;
3045 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3049 /* Adds candidates based on the old induction variables. */
3052 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3058 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3060 iv
= ver_info (data
, i
)->iv
;
3061 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3062 add_iv_candidate_for_biv (data
, iv
);
3066 /* Adds candidates based on the value of USE's iv. */
3069 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3071 unsigned HOST_WIDE_INT offset
;
3074 struct iv
*iv
= use
->iv
;
3076 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3078 /* The same, but with initial value zero. Make such variable important,
3079 since it is generic enough so that possibly many uses may be based
3081 basetype
= TREE_TYPE (iv
->base
);
3082 if (POINTER_TYPE_P (basetype
))
3083 basetype
= sizetype
;
3084 add_candidate (data
, build_int_cst (basetype
, 0), iv
->step
, true, use
);
3086 /* Third, try removing the constant offset. Make sure to even
3087 add a candidate for &a[0] vs. (T *)&a. */
3088 base
= strip_offset (iv
->base
, &offset
);
3089 if (offset
|| base
!= iv
->base
)
3090 add_candidate (data
, base
, iv
->step
, false, use
);
3092 /* At last, add auto-incremental candidates. Make such variables
3093 important since other iv uses with same base object may be based
3095 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3096 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3099 /* Adds candidates based on the uses. */
3102 add_iv_candidate_for_uses (struct ivopts_data
*data
)
3106 for (i
= 0; i
< n_iv_uses (data
); i
++)
3108 struct iv_use
*use
= iv_use (data
, i
);
3115 case USE_NONLINEAR_EXPR
:
3118 /* Just add the ivs based on the value of the iv used here. */
3119 add_iv_candidate_for_use (data
, use
);
3128 /* Record important candidates and add them to related_cands bitmaps
3132 record_important_candidates (struct ivopts_data
*data
)
3137 for (i
= 0; i
< n_iv_cands (data
); i
++)
3139 struct iv_cand
*cand
= iv_cand (data
, i
);
3141 if (cand
->important
)
3142 bitmap_set_bit (data
->important_candidates
, i
);
3145 data
->consider_all_candidates
= (n_iv_cands (data
)
3146 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3148 if (data
->consider_all_candidates
)
3150 /* We will not need "related_cands" bitmaps in this case,
3151 so release them to decrease peak memory consumption. */
3152 for (i
= 0; i
< n_iv_uses (data
); i
++)
3154 use
= iv_use (data
, i
);
3155 BITMAP_FREE (use
->related_cands
);
3160 /* Add important candidates to the related_cands bitmaps. */
3161 for (i
= 0; i
< n_iv_uses (data
); i
++)
3162 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
3163 data
->important_candidates
);
3167 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3168 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3169 we allocate a simple list to every use. */
3172 alloc_use_cost_map (struct ivopts_data
*data
)
3174 unsigned i
, size
, s
;
3176 for (i
= 0; i
< n_iv_uses (data
); i
++)
3178 struct iv_use
*use
= iv_use (data
, i
);
3180 if (data
->consider_all_candidates
)
3181 size
= n_iv_cands (data
);
3184 s
= bitmap_count_bits (use
->related_cands
);
3186 /* Round up to the power of two, so that moduling by it is fast. */
3187 size
= s
? (1 << ceil_log2 (s
)) : 1;
3190 use
->n_map_members
= size
;
3191 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3195 /* Returns description of computation cost of expression whose runtime
3196 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3199 new_cost (unsigned runtime
, unsigned complexity
)
3203 cost
.cost
= runtime
;
3204 cost
.complexity
= complexity
;
3209 /* Returns true if COST is infinite. */
3212 infinite_cost_p (comp_cost cost
)
3214 return cost
.cost
== INFTY
;
3217 /* Adds costs COST1 and COST2. */
3220 add_costs (comp_cost cost1
, comp_cost cost2
)
3222 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3223 return infinite_cost
;
3225 cost1
.cost
+= cost2
.cost
;
3226 cost1
.complexity
+= cost2
.complexity
;
3230 /* Subtracts costs COST1 and COST2. */
3233 sub_costs (comp_cost cost1
, comp_cost cost2
)
3235 cost1
.cost
-= cost2
.cost
;
3236 cost1
.complexity
-= cost2
.complexity
;
3241 /* Returns a negative number if COST1 < COST2, a positive number if
3242 COST1 > COST2, and 0 if COST1 = COST2. */
3245 compare_costs (comp_cost cost1
, comp_cost cost2
)
3247 if (cost1
.cost
== cost2
.cost
)
3248 return cost1
.complexity
- cost2
.complexity
;
3250 return cost1
.cost
- cost2
.cost
;
3253 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3254 on invariants DEPENDS_ON and that the value used in expressing it
3255 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3258 set_use_iv_cost (struct ivopts_data
*data
,
3259 struct iv_use
*use
, struct iv_cand
*cand
,
3260 comp_cost cost
, bitmap depends_on
, tree value
,
3261 enum tree_code comp
, int inv_expr_id
)
3265 if (infinite_cost_p (cost
))
3267 BITMAP_FREE (depends_on
);
3271 if (data
->consider_all_candidates
)
3273 use
->cost_map
[cand
->id
].cand
= cand
;
3274 use
->cost_map
[cand
->id
].cost
= cost
;
3275 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3276 use
->cost_map
[cand
->id
].value
= value
;
3277 use
->cost_map
[cand
->id
].comp
= comp
;
3278 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3282 /* n_map_members is a power of two, so this computes modulo. */
3283 s
= cand
->id
& (use
->n_map_members
- 1);
3284 for (i
= s
; i
< use
->n_map_members
; i
++)
3285 if (!use
->cost_map
[i
].cand
)
3287 for (i
= 0; i
< s
; i
++)
3288 if (!use
->cost_map
[i
].cand
)
3294 use
->cost_map
[i
].cand
= cand
;
3295 use
->cost_map
[i
].cost
= cost
;
3296 use
->cost_map
[i
].depends_on
= depends_on
;
3297 use
->cost_map
[i
].value
= value
;
3298 use
->cost_map
[i
].comp
= comp
;
3299 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3302 /* Gets cost of (USE, CANDIDATE) pair. */
3304 static struct cost_pair
*
3305 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3306 struct iv_cand
*cand
)
3309 struct cost_pair
*ret
;
3314 if (data
->consider_all_candidates
)
3316 ret
= use
->cost_map
+ cand
->id
;
3323 /* n_map_members is a power of two, so this computes modulo. */
3324 s
= cand
->id
& (use
->n_map_members
- 1);
3325 for (i
= s
; i
< use
->n_map_members
; i
++)
3326 if (use
->cost_map
[i
].cand
== cand
)
3327 return use
->cost_map
+ i
;
3328 else if (use
->cost_map
[i
].cand
== NULL
)
3330 for (i
= 0; i
< s
; i
++)
3331 if (use
->cost_map
[i
].cand
== cand
)
3332 return use
->cost_map
+ i
;
3333 else if (use
->cost_map
[i
].cand
== NULL
)
3339 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3341 produce_memory_decl_rtl (tree obj
, int *regno
)
3343 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3344 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3348 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3350 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3351 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3352 SET_SYMBOL_REF_DECL (x
, obj
);
3353 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3354 set_mem_addr_space (x
, as
);
3355 targetm
.encode_section_info (obj
, x
, true);
3359 x
= gen_raw_REG (address_mode
, (*regno
)++);
3360 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3361 set_mem_addr_space (x
, as
);
3367 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3368 walk_tree. DATA contains the actual fake register number. */
3371 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3373 tree obj
= NULL_TREE
;
3375 int *regno
= (int *) data
;
3377 switch (TREE_CODE (*expr_p
))
3380 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3381 handled_component_p (*expr_p
);
3382 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3385 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3386 x
= produce_memory_decl_rtl (obj
, regno
);
3391 obj
= SSA_NAME_VAR (*expr_p
);
3392 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3395 if (!DECL_RTL_SET_P (obj
))
3396 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3405 if (DECL_RTL_SET_P (obj
))
3408 if (DECL_MODE (obj
) == BLKmode
)
3409 x
= produce_memory_decl_rtl (obj
, regno
);
3411 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3421 decl_rtl_to_reset
.safe_push (obj
);
3422 SET_DECL_RTL (obj
, x
);
3428 /* Determines cost of the computation of EXPR. */
3431 computation_cost (tree expr
, bool speed
)
3435 tree type
= TREE_TYPE (expr
);
3437 /* Avoid using hard regs in ways which may be unsupported. */
3438 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3439 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3440 enum node_frequency real_frequency
= node
->frequency
;
3442 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3443 crtl
->maybe_hot_insn_p
= speed
;
3444 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3446 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3449 default_rtl_profile ();
3450 node
->frequency
= real_frequency
;
3452 cost
= seq_cost (seq
, speed
);
3454 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3455 TYPE_ADDR_SPACE (type
), speed
);
3456 else if (!REG_P (rslt
))
3457 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3462 /* Returns variable containing the value of candidate CAND at statement AT. */
3465 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3467 if (stmt_after_increment (loop
, cand
, stmt
))
3468 return cand
->var_after
;
3470 return cand
->var_before
;
3473 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3474 same precision that is at least as wide as the precision of TYPE, stores
3475 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3479 determine_common_wider_type (tree
*a
, tree
*b
)
3481 tree wider_type
= NULL
;
3483 tree atype
= TREE_TYPE (*a
);
3485 if (CONVERT_EXPR_P (*a
))
3487 suba
= TREE_OPERAND (*a
, 0);
3488 wider_type
= TREE_TYPE (suba
);
3489 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3495 if (CONVERT_EXPR_P (*b
))
3497 subb
= TREE_OPERAND (*b
, 0);
3498 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3509 /* Determines the expression by that USE is expressed from induction variable
3510 CAND at statement AT in LOOP. The expression is stored in a decomposed
3511 form into AFF. Returns false if USE cannot be expressed using CAND. */
3514 get_computation_aff (struct loop
*loop
,
3515 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3516 struct aff_tree
*aff
)
3518 tree ubase
= use
->iv
->base
;
3519 tree ustep
= use
->iv
->step
;
3520 tree cbase
= cand
->iv
->base
;
3521 tree cstep
= cand
->iv
->step
, cstep_common
;
3522 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3523 tree common_type
, var
;
3525 aff_tree cbase_aff
, var_aff
;
3528 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3530 /* We do not have a precision to express the values of use. */
3534 var
= var_at_stmt (loop
, cand
, at
);
3535 uutype
= unsigned_type_for (utype
);
3537 /* If the conversion is not noop, perform it. */
3538 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3540 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3541 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3543 tree inner_base
, inner_step
, inner_type
;
3544 inner_base
= TREE_OPERAND (cbase
, 0);
3545 if (CONVERT_EXPR_P (cstep
))
3546 inner_step
= TREE_OPERAND (cstep
, 0);
3550 inner_type
= TREE_TYPE (inner_base
);
3551 /* If candidate is added from a biv whose type is smaller than
3552 ctype, we know both candidate and the biv won't overflow.
3553 In this case, it's safe to skip the convertion in candidate.
3554 As an example, (unsigned short)((unsigned long)A) equals to
3555 (unsigned short)A, if A has a type no larger than short. */
3556 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3562 cstep
= fold_convert (uutype
, cstep
);
3563 cbase
= fold_convert (uutype
, cbase
);
3564 var
= fold_convert (uutype
, var
);
3567 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3570 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3571 type, we achieve better folding by computing their difference in this
3572 wider type, and cast the result to UUTYPE. We do not need to worry about
3573 overflows, as all the arithmetics will in the end be performed in UUTYPE
3575 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3577 /* use = ubase - ratio * cbase + ratio * var. */
3578 tree_to_aff_combination (ubase
, common_type
, aff
);
3579 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3580 tree_to_aff_combination (var
, uutype
, &var_aff
);
3582 /* We need to shift the value if we are after the increment. */
3583 if (stmt_after_increment (loop
, cand
, at
))
3587 if (common_type
!= uutype
)
3588 cstep_common
= fold_convert (common_type
, cstep
);
3590 cstep_common
= cstep
;
3592 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3593 aff_combination_add (&cbase_aff
, &cstep_aff
);
3596 aff_combination_scale (&cbase_aff
, -rat
);
3597 aff_combination_add (aff
, &cbase_aff
);
3598 if (common_type
!= uutype
)
3599 aff_combination_convert (aff
, uutype
);
3601 aff_combination_scale (&var_aff
, rat
);
3602 aff_combination_add (aff
, &var_aff
);
3607 /* Return the type of USE. */
3610 get_use_type (struct iv_use
*use
)
3612 tree base_type
= TREE_TYPE (use
->iv
->base
);
3615 if (use
->type
== USE_ADDRESS
)
3617 /* The base_type may be a void pointer. Create a pointer type based on
3618 the mem_ref instead. */
3619 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3620 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3621 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3629 /* Determines the expression by that USE is expressed from induction variable
3630 CAND at statement AT in LOOP. The computation is unshared. */
3633 get_computation_at (struct loop
*loop
,
3634 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3637 tree type
= get_use_type (use
);
3639 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3641 unshare_aff_combination (&aff
);
3642 return fold_convert (type
, aff_combination_to_tree (&aff
));
3645 /* Determines the expression by that USE is expressed from induction variable
3646 CAND in LOOP. The computation is unshared. */
3649 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3651 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3654 /* Adjust the cost COST for being in loop setup rather than loop body.
3655 If we're optimizing for space, the loop setup overhead is constant;
3656 if we're optimizing for speed, amortize it over the per-iteration cost. */
3658 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3662 else if (optimize_loop_for_speed_p (data
->current_loop
))
3663 return cost
/ avg_loop_niter (data
->current_loop
);
3668 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3669 validity for a memory reference accessing memory of mode MODE in
3670 address space AS. */
3674 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3677 #define MAX_RATIO 128
3678 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3679 static vec
<sbitmap
> valid_mult_list
;
3682 if (data_index
>= valid_mult_list
.length ())
3683 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3685 valid_mult
= valid_mult_list
[data_index
];
3688 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3689 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3690 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3694 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3695 bitmap_clear (valid_mult
);
3696 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3697 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3698 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3700 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3701 if (memory_address_addr_space_p (mode
, addr
, as
)
3702 || memory_address_addr_space_p (mode
, scaled
, as
))
3703 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3708 fprintf (dump_file
, " allowed multipliers:");
3709 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3710 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3711 fprintf (dump_file
, " %d", (int) i
);
3712 fprintf (dump_file
, "\n");
3713 fprintf (dump_file
, "\n");
3716 valid_mult_list
[data_index
] = valid_mult
;
3719 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3722 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3725 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3726 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3727 variable is omitted. Compute the cost for a memory reference that accesses
3728 a memory location of mode MEM_MODE in address space AS.
3730 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3731 size of MEM_MODE / RATIO) is available. To make this determination, we
3732 look at the size of the increment to be made, which is given in CSTEP.
3733 CSTEP may be zero if the step is unknown.
3734 STMT_AFTER_INC is true iff the statement we're looking at is after the
3735 increment of the original biv.
3737 TODO -- there must be some better way. This all is quite crude. */
3741 AINC_PRE_INC
, /* Pre increment. */
3742 AINC_PRE_DEC
, /* Pre decrement. */
3743 AINC_POST_INC
, /* Post increment. */
3744 AINC_POST_DEC
, /* Post decrement. */
3745 AINC_NONE
/* Also the number of auto increment types. */
3748 struct address_cost_data
3750 HOST_WIDE_INT min_offset
, max_offset
;
3751 unsigned costs
[2][2][2][2];
3752 unsigned ainc_costs
[AINC_NONE
];
3757 get_address_cost (bool symbol_present
, bool var_present
,
3758 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3759 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3760 addr_space_t as
, bool speed
,
3761 bool stmt_after_inc
, bool *may_autoinc
)
3763 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3764 static vec
<address_cost_data
*> address_cost_data_list
;
3765 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3766 address_cost_data
*data
;
3767 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3768 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3769 unsigned cost
, acost
, complexity
;
3770 enum ainc_type autoinc_type
;
3771 bool offset_p
, ratio_p
, autoinc
;
3772 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3773 unsigned HOST_WIDE_INT mask
;
3776 if (data_index
>= address_cost_data_list
.length ())
3777 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3779 data
= address_cost_data_list
[data_index
];
3783 HOST_WIDE_INT rat
, off
= 0;
3784 int old_cse_not_expected
, width
;
3785 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3790 data
= (address_cost_data
*) xcalloc (1, sizeof (*data
));
3792 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3794 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3795 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3796 width
= HOST_BITS_PER_WIDE_INT
- 1;
3797 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3799 for (i
= width
; i
>= 0; i
--)
3801 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3802 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3803 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3806 data
->min_offset
= (i
== -1? 0 : off
);
3808 for (i
= width
; i
>= 0; i
--)
3810 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3811 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3812 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3814 /* For some strict-alignment targets, the offset must be naturally
3815 aligned. Try an aligned offset if mem_mode is not QImode. */
3816 off
= mem_mode
!= QImode
3817 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3818 - GET_MODE_SIZE (mem_mode
)
3822 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3823 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3829 data
->max_offset
= off
;
3831 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3833 fprintf (dump_file
, "get_address_cost:\n");
3834 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3835 GET_MODE_NAME (mem_mode
),
3837 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3838 GET_MODE_NAME (mem_mode
),
3843 for (i
= 2; i
<= MAX_RATIO
; i
++)
3844 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3850 /* Compute the cost of various addressing modes. */
3852 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3853 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3855 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3856 || USE_STORE_PRE_DECREMENT (mem_mode
))
3858 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3859 has_predec
[mem_mode
]
3860 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3862 if (has_predec
[mem_mode
])
3863 data
->ainc_costs
[AINC_PRE_DEC
]
3864 = address_cost (addr
, mem_mode
, as
, speed
);
3866 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3867 || USE_STORE_POST_DECREMENT (mem_mode
))
3869 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3870 has_postdec
[mem_mode
]
3871 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3873 if (has_postdec
[mem_mode
])
3874 data
->ainc_costs
[AINC_POST_DEC
]
3875 = address_cost (addr
, mem_mode
, as
, speed
);
3877 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3878 || USE_STORE_PRE_DECREMENT (mem_mode
))
3880 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3881 has_preinc
[mem_mode
]
3882 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3884 if (has_preinc
[mem_mode
])
3885 data
->ainc_costs
[AINC_PRE_INC
]
3886 = address_cost (addr
, mem_mode
, as
, speed
);
3888 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3889 || USE_STORE_POST_INCREMENT (mem_mode
))
3891 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3892 has_postinc
[mem_mode
]
3893 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3895 if (has_postinc
[mem_mode
])
3896 data
->ainc_costs
[AINC_POST_INC
]
3897 = address_cost (addr
, mem_mode
, as
, speed
);
3899 for (i
= 0; i
< 16; i
++)
3902 var_p
= (i
>> 1) & 1;
3903 off_p
= (i
>> 2) & 1;
3904 rat_p
= (i
>> 3) & 1;
3908 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3909 gen_int_mode (rat
, address_mode
));
3912 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3916 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3917 /* ??? We can run into trouble with some backends by presenting
3918 it with symbols which haven't been properly passed through
3919 targetm.encode_section_info. By setting the local bit, we
3920 enhance the probability of things working. */
3921 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3924 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3926 (PLUS
, address_mode
, base
,
3927 gen_int_mode (off
, address_mode
)));
3930 base
= gen_int_mode (off
, address_mode
);
3935 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3938 /* To avoid splitting addressing modes, pretend that no cse will
3940 old_cse_not_expected
= cse_not_expected
;
3941 cse_not_expected
= true;
3942 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3943 cse_not_expected
= old_cse_not_expected
;
3947 acost
= seq_cost (seq
, speed
);
3948 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3952 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3955 /* On some targets, it is quite expensive to load symbol to a register,
3956 which makes addresses that contain symbols look much more expensive.
3957 However, the symbol will have to be loaded in any case before the
3958 loop (and quite likely we have it in register already), so it does not
3959 make much sense to penalize them too heavily. So make some final
3960 tweaks for the SYMBOL_PRESENT modes:
3962 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3963 var is cheaper, use this mode with small penalty.
3964 If VAR_PRESENT is true, try whether the mode with
3965 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3966 if this is the case, use it. */
3967 add_c
= add_cost (speed
, address_mode
);
3968 for (i
= 0; i
< 8; i
++)
3971 off_p
= (i
>> 1) & 1;
3972 rat_p
= (i
>> 2) & 1;
3974 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3978 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3979 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3982 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3984 fprintf (dump_file
, "Address costs:\n");
3986 for (i
= 0; i
< 16; i
++)
3989 var_p
= (i
>> 1) & 1;
3990 off_p
= (i
>> 2) & 1;
3991 rat_p
= (i
>> 3) & 1;
3993 fprintf (dump_file
, " ");
3995 fprintf (dump_file
, "sym + ");
3997 fprintf (dump_file
, "var + ");
3999 fprintf (dump_file
, "cst + ");
4001 fprintf (dump_file
, "rat * ");
4003 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4004 fprintf (dump_file
, "index costs %d\n", acost
);
4006 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4007 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4008 fprintf (dump_file
, " May include autoinc/dec\n");
4009 fprintf (dump_file
, "\n");
4012 address_cost_data_list
[data_index
] = data
;
4015 bits
= GET_MODE_BITSIZE (address_mode
);
4016 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4018 if ((offset
>> (bits
- 1) & 1))
4023 autoinc_type
= AINC_NONE
;
4024 msize
= GET_MODE_SIZE (mem_mode
);
4025 autoinc_offset
= offset
;
4027 autoinc_offset
+= ratio
* cstep
;
4028 if (symbol_present
|| var_present
|| ratio
!= 1)
4032 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4034 autoinc_type
= AINC_POST_INC
;
4035 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4037 autoinc_type
= AINC_POST_DEC
;
4038 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4040 autoinc_type
= AINC_PRE_INC
;
4041 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4043 autoinc_type
= AINC_PRE_DEC
;
4045 if (autoinc_type
!= AINC_NONE
)
4050 offset_p
= (s_offset
!= 0
4051 && data
->min_offset
<= s_offset
4052 && s_offset
<= data
->max_offset
);
4053 ratio_p
= (ratio
!= 1
4054 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4056 if (ratio
!= 1 && !ratio_p
)
4057 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4059 if (s_offset
&& !offset_p
&& !symbol_present
)
4060 cost
+= add_cost (speed
, address_mode
);
4063 *may_autoinc
= autoinc
;
4065 acost
= data
->ainc_costs
[autoinc_type
];
4067 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4068 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4069 return new_cost (cost
+ acost
, complexity
);
4072 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4073 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4074 calculating the operands of EXPR. Returns true if successful, and returns
4075 the cost in COST. */
4078 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4079 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4082 tree op1
= TREE_OPERAND (expr
, 1);
4083 tree cst
= TREE_OPERAND (mult
, 1);
4084 tree multop
= TREE_OPERAND (mult
, 0);
4085 int m
= exact_log2 (int_cst_value (cst
));
4086 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4087 int as_cost
, sa_cost
;
4090 if (!(m
>= 0 && m
< maxm
))
4094 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4096 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4098 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4099 use that in preference to a shift insn followed by an add insn. */
4100 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4101 ? shiftadd_cost (speed
, mode
, m
)
4103 ? shiftsub1_cost (speed
, mode
, m
)
4104 : shiftsub0_cost (speed
, mode
, m
)));
4106 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
4107 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
4109 STRIP_NOPS (multop
);
4110 if (!is_gimple_val (multop
))
4111 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
4117 /* Estimates cost of forcing expression EXPR into a variable. */
4120 force_expr_to_var_cost (tree expr
, bool speed
)
4122 static bool costs_initialized
= false;
4123 static unsigned integer_cost
[2];
4124 static unsigned symbol_cost
[2];
4125 static unsigned address_cost
[2];
4127 comp_cost cost0
, cost1
, cost
;
4130 if (!costs_initialized
)
4132 tree type
= build_pointer_type (integer_type_node
);
4137 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4138 TREE_STATIC (var
) = 1;
4139 x
= produce_memory_decl_rtl (var
, NULL
);
4140 SET_DECL_RTL (var
, x
);
4142 addr
= build1 (ADDR_EXPR
, type
, var
);
4145 for (i
= 0; i
< 2; i
++)
4147 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4150 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4153 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4157 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4158 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4159 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4160 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4161 fprintf (dump_file
, "\n");
4165 costs_initialized
= true;
4170 if (SSA_VAR_P (expr
))
4173 if (is_gimple_min_invariant (expr
))
4175 if (TREE_CODE (expr
) == INTEGER_CST
)
4176 return new_cost (integer_cost
[speed
], 0);
4178 if (TREE_CODE (expr
) == ADDR_EXPR
)
4180 tree obj
= TREE_OPERAND (expr
, 0);
4182 if (TREE_CODE (obj
) == VAR_DECL
4183 || TREE_CODE (obj
) == PARM_DECL
4184 || TREE_CODE (obj
) == RESULT_DECL
)
4185 return new_cost (symbol_cost
[speed
], 0);
4188 return new_cost (address_cost
[speed
], 0);
4191 switch (TREE_CODE (expr
))
4193 case POINTER_PLUS_EXPR
:
4197 op0
= TREE_OPERAND (expr
, 0);
4198 op1
= TREE_OPERAND (expr
, 1);
4205 op0
= TREE_OPERAND (expr
, 0);
4211 /* Just an arbitrary value, FIXME. */
4212 return new_cost (target_spill_cost
[speed
], 0);
4215 if (op0
== NULL_TREE
4216 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4219 cost0
= force_expr_to_var_cost (op0
, speed
);
4221 if (op1
== NULL_TREE
4222 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4225 cost1
= force_expr_to_var_cost (op1
, speed
);
4227 mode
= TYPE_MODE (TREE_TYPE (expr
));
4228 switch (TREE_CODE (expr
))
4230 case POINTER_PLUS_EXPR
:
4234 cost
= new_cost (add_cost (speed
, mode
), 0);
4235 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4237 tree mult
= NULL_TREE
;
4239 if (TREE_CODE (op1
) == MULT_EXPR
)
4241 else if (TREE_CODE (op0
) == MULT_EXPR
)
4244 if (mult
!= NULL_TREE
4245 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4246 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4254 tree inner_mode
, outer_mode
;
4255 outer_mode
= TREE_TYPE (expr
);
4256 inner_mode
= TREE_TYPE (op0
);
4257 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4258 TYPE_MODE (inner_mode
), speed
), 0);
4263 if (cst_and_fits_in_hwi (op0
))
4264 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4266 else if (cst_and_fits_in_hwi (op1
))
4267 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4270 return new_cost (target_spill_cost
[speed
], 0);
4277 cost
= add_costs (cost
, cost0
);
4278 cost
= add_costs (cost
, cost1
);
4280 /* Bound the cost by target_spill_cost. The parts of complicated
4281 computations often are either loop invariant or at least can
4282 be shared between several iv uses, so letting this grow without
4283 limits would not give reasonable results. */
4284 if (cost
.cost
> (int) target_spill_cost
[speed
])
4285 cost
.cost
= target_spill_cost
[speed
];
4290 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4291 invariants the computation depends on. */
4294 force_var_cost (struct ivopts_data
*data
,
4295 tree expr
, bitmap
*depends_on
)
4299 fd_ivopts_data
= data
;
4300 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4303 return force_expr_to_var_cost (expr
, data
->speed
);
4306 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4307 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4308 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4309 invariants the computation depends on. */
4312 split_address_cost (struct ivopts_data
*data
,
4313 tree addr
, bool *symbol_present
, bool *var_present
,
4314 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4317 HOST_WIDE_INT bitsize
;
4318 HOST_WIDE_INT bitpos
;
4321 int unsignedp
, reversep
, volatilep
;
4323 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4324 &unsignedp
, &reversep
, &volatilep
, false);
4327 || bitpos
% BITS_PER_UNIT
!= 0
4329 || TREE_CODE (core
) != VAR_DECL
)
4331 *symbol_present
= false;
4332 *var_present
= true;
4333 fd_ivopts_data
= data
;
4335 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4337 return new_cost (target_spill_cost
[data
->speed
], 0);
4340 *offset
+= bitpos
/ BITS_PER_UNIT
;
4341 if (TREE_STATIC (core
)
4342 || DECL_EXTERNAL (core
))
4344 *symbol_present
= true;
4345 *var_present
= false;
4349 *symbol_present
= false;
4350 *var_present
= true;
4354 /* Estimates cost of expressing difference of addresses E1 - E2 as
4355 var + symbol + offset. The value of offset is added to OFFSET,
4356 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4357 part is missing. DEPENDS_ON is a set of the invariants the computation
4361 ptr_difference_cost (struct ivopts_data
*data
,
4362 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4363 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4365 HOST_WIDE_INT diff
= 0;
4366 aff_tree aff_e1
, aff_e2
;
4369 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4371 if (ptr_difference_const (e1
, e2
, &diff
))
4374 *symbol_present
= false;
4375 *var_present
= false;
4379 if (integer_zerop (e2
))
4380 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4381 symbol_present
, var_present
, offset
, depends_on
);
4383 *symbol_present
= false;
4384 *var_present
= true;
4386 type
= signed_type_for (TREE_TYPE (e1
));
4387 tree_to_aff_combination (e1
, type
, &aff_e1
);
4388 tree_to_aff_combination (e2
, type
, &aff_e2
);
4389 aff_combination_scale (&aff_e2
, -1);
4390 aff_combination_add (&aff_e1
, &aff_e2
);
4392 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4395 /* Estimates cost of expressing difference E1 - E2 as
4396 var + symbol + offset. The value of offset is added to OFFSET,
4397 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4398 part is missing. DEPENDS_ON is a set of the invariants the computation
4402 difference_cost (struct ivopts_data
*data
,
4403 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4404 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4406 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4407 unsigned HOST_WIDE_INT off1
, off2
;
4408 aff_tree aff_e1
, aff_e2
;
4411 e1
= strip_offset (e1
, &off1
);
4412 e2
= strip_offset (e2
, &off2
);
4413 *offset
+= off1
- off2
;
4418 if (TREE_CODE (e1
) == ADDR_EXPR
)
4419 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4420 offset
, depends_on
);
4421 *symbol_present
= false;
4423 if (operand_equal_p (e1
, e2
, 0))
4425 *var_present
= false;
4429 *var_present
= true;
4431 if (integer_zerop (e2
))
4432 return force_var_cost (data
, e1
, depends_on
);
4434 if (integer_zerop (e1
))
4436 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4437 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4441 type
= signed_type_for (TREE_TYPE (e1
));
4442 tree_to_aff_combination (e1
, type
, &aff_e1
);
4443 tree_to_aff_combination (e2
, type
, &aff_e2
);
4444 aff_combination_scale (&aff_e2
, -1);
4445 aff_combination_add (&aff_e1
, &aff_e2
);
4447 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4450 /* Returns true if AFF1 and AFF2 are identical. */
4453 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4457 if (aff1
->n
!= aff2
->n
)
4460 for (i
= 0; i
< aff1
->n
; i
++)
4462 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4465 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4471 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4474 get_expr_id (struct ivopts_data
*data
, tree expr
)
4476 struct iv_inv_expr_ent ent
;
4477 struct iv_inv_expr_ent
**slot
;
4480 ent
.hash
= iterative_hash_expr (expr
, 0);
4481 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4485 *slot
= XNEW (struct iv_inv_expr_ent
);
4486 (*slot
)->expr
= expr
;
4487 (*slot
)->hash
= ent
.hash
;
4488 (*slot
)->id
= data
->inv_expr_id
++;
4492 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4493 requires a new compiler generated temporary. Returns -1 otherwise.
4494 ADDRESS_P is a flag indicating if the expression is for address
4498 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4499 tree cbase
, HOST_WIDE_INT ratio
,
4502 aff_tree ubase_aff
, cbase_aff
;
4510 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4511 && (TREE_CODE (cbase
) == INTEGER_CST
))
4514 /* Strips the constant part. */
4515 if (TREE_CODE (ubase
) == PLUS_EXPR
4516 || TREE_CODE (ubase
) == MINUS_EXPR
4517 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4519 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4520 ubase
= TREE_OPERAND (ubase
, 0);
4523 /* Strips the constant part. */
4524 if (TREE_CODE (cbase
) == PLUS_EXPR
4525 || TREE_CODE (cbase
) == MINUS_EXPR
4526 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4528 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4529 cbase
= TREE_OPERAND (cbase
, 0);
4534 if (((TREE_CODE (ubase
) == SSA_NAME
)
4535 || (TREE_CODE (ubase
) == ADDR_EXPR
4536 && is_gimple_min_invariant (ubase
)))
4537 && (TREE_CODE (cbase
) == INTEGER_CST
))
4540 if (((TREE_CODE (cbase
) == SSA_NAME
)
4541 || (TREE_CODE (cbase
) == ADDR_EXPR
4542 && is_gimple_min_invariant (cbase
)))
4543 && (TREE_CODE (ubase
) == INTEGER_CST
))
4549 if (operand_equal_p (ubase
, cbase
, 0))
4552 if (TREE_CODE (ubase
) == ADDR_EXPR
4553 && TREE_CODE (cbase
) == ADDR_EXPR
)
4557 usym
= TREE_OPERAND (ubase
, 0);
4558 csym
= TREE_OPERAND (cbase
, 0);
4559 if (TREE_CODE (usym
) == ARRAY_REF
)
4561 tree ind
= TREE_OPERAND (usym
, 1);
4562 if (TREE_CODE (ind
) == INTEGER_CST
4563 && tree_fits_shwi_p (ind
)
4564 && tree_to_shwi (ind
) == 0)
4565 usym
= TREE_OPERAND (usym
, 0);
4567 if (TREE_CODE (csym
) == ARRAY_REF
)
4569 tree ind
= TREE_OPERAND (csym
, 1);
4570 if (TREE_CODE (ind
) == INTEGER_CST
4571 && tree_fits_shwi_p (ind
)
4572 && tree_to_shwi (ind
) == 0)
4573 csym
= TREE_OPERAND (csym
, 0);
4575 if (operand_equal_p (usym
, csym
, 0))
4578 /* Now do more complex comparison */
4579 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4580 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4581 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4585 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4586 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4588 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4589 aff_combination_add (&ubase_aff
, &cbase_aff
);
4590 expr
= aff_combination_to_tree (&ubase_aff
);
4591 return get_expr_id (data
, expr
);
4596 /* Determines the cost of the computation by that USE is expressed
4597 from induction variable CAND. If ADDRESS_P is true, we just need
4598 to create an address from it, otherwise we want to get it into
4599 register. A set of invariants we depend on is stored in
4600 DEPENDS_ON. AT is the statement at that the value is computed.
4601 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4602 addressing is likely. */
4605 get_computation_cost_at (struct ivopts_data
*data
,
4606 struct iv_use
*use
, struct iv_cand
*cand
,
4607 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4611 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4613 tree utype
= TREE_TYPE (ubase
), ctype
;
4614 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4615 HOST_WIDE_INT ratio
, aratio
;
4616 bool var_present
, symbol_present
, stmt_is_after_inc
;
4619 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4620 machine_mode mem_mode
= (address_p
4621 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4627 /* Only consider real candidates. */
4629 return infinite_cost
;
4631 cbase
= cand
->iv
->base
;
4632 cstep
= cand
->iv
->step
;
4633 ctype
= TREE_TYPE (cbase
);
4635 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4637 /* We do not have a precision to express the values of use. */
4638 return infinite_cost
;
4642 || (use
->iv
->base_object
4643 && cand
->iv
->base_object
4644 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4645 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4647 /* Do not try to express address of an object with computation based
4648 on address of a different object. This may cause problems in rtl
4649 level alias analysis (that does not expect this to be happening,
4650 as this is illegal in C), and would be unlikely to be useful
4652 if (use
->iv
->base_object
4653 && cand
->iv
->base_object
4654 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4655 return infinite_cost
;
4658 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4660 /* TODO -- add direct handling of this case. */
4664 /* CSTEPI is removed from the offset in case statement is after the
4665 increment. If the step is not constant, we use zero instead.
4666 This is a bit imprecise (there is the extra addition), but
4667 redundancy elimination is likely to transform the code so that
4668 it uses value of the variable before increment anyway,
4669 so it is not that much unrealistic. */
4670 if (cst_and_fits_in_hwi (cstep
))
4671 cstepi
= int_cst_value (cstep
);
4675 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4676 return infinite_cost
;
4678 if (wi::fits_shwi_p (rat
))
4679 ratio
= rat
.to_shwi ();
4681 return infinite_cost
;
4684 ctype
= TREE_TYPE (cbase
);
4686 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4688 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4689 or ratio == 1, it is better to handle this like
4691 ubase - ratio * cbase + ratio * var
4693 (also holds in the case ratio == -1, TODO. */
4695 if (cst_and_fits_in_hwi (cbase
))
4697 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4698 cost
= difference_cost (data
,
4699 ubase
, build_int_cst (utype
, 0),
4700 &symbol_present
, &var_present
, &offset
,
4702 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4704 else if (ratio
== 1)
4706 tree real_cbase
= cbase
;
4708 /* Check to see if any adjustment is needed. */
4709 if (cstepi
== 0 && stmt_is_after_inc
)
4711 aff_tree real_cbase_aff
;
4714 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4716 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4718 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4719 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4722 cost
= difference_cost (data
,
4724 &symbol_present
, &var_present
, &offset
,
4726 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4729 && !POINTER_TYPE_P (ctype
)
4730 && multiplier_allowed_in_address_p
4732 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4734 if (cstepi
== 0 && stmt_is_after_inc
)
4736 if (POINTER_TYPE_P (ctype
))
4737 cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4739 cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4742 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4743 cost
= difference_cost (data
,
4745 &symbol_present
, &var_present
, &offset
,
4747 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4751 cost
= force_var_cost (data
, cbase
, depends_on
);
4752 cost
= add_costs (cost
,
4753 difference_cost (data
,
4754 ubase
, build_int_cst (utype
, 0),
4755 &symbol_present
, &var_present
,
4756 &offset
, depends_on
));
4757 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4758 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4761 /* Set of invariants depended on by sub use has already been computed
4762 for the first use in the group. */
4766 if (depends_on
&& *depends_on
)
4767 bitmap_clear (*depends_on
);
4769 else if (inv_expr_id
)
4772 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4773 /* Clear depends on. */
4774 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4775 bitmap_clear (*depends_on
);
4778 /* If we are after the increment, the value of the candidate is higher by
4780 if (stmt_is_after_inc
)
4781 offset
-= ratio
* cstepi
;
4783 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4784 (symbol/var1/const parts may be omitted). If we are looking for an
4785 address, find the cost of addressing this. */
4787 return add_costs (cost
,
4788 get_address_cost (symbol_present
, var_present
,
4789 offset
, ratio
, cstepi
,
4791 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4792 speed
, stmt_is_after_inc
,
4795 /* Otherwise estimate the costs for computing the expression. */
4796 if (!symbol_present
&& !var_present
&& !offset
)
4799 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4803 /* Symbol + offset should be compile-time computable so consider that they
4804 are added once to the variable, if present. */
4805 if (var_present
&& (symbol_present
|| offset
))
4806 cost
.cost
+= adjust_setup_cost (data
,
4807 add_cost (speed
, TYPE_MODE (ctype
)));
4809 /* Having offset does not affect runtime cost in case it is added to
4810 symbol, but it increases complexity. */
4814 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4816 aratio
= ratio
> 0 ? ratio
: -ratio
;
4818 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4823 *can_autoinc
= false;
4826 /* Just get the expression, expand it and measure the cost. */
4827 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4830 return infinite_cost
;
4833 comp
= build_simple_mem_ref (comp
);
4835 return new_cost (computation_cost (comp
, speed
), 0);
4839 /* Determines the cost of the computation by that USE is expressed
4840 from induction variable CAND. If ADDRESS_P is true, we just need
4841 to create an address from it, otherwise we want to get it into
4842 register. A set of invariants we depend on is stored in
4843 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4844 autoinc addressing is likely. */
4847 get_computation_cost (struct ivopts_data
*data
,
4848 struct iv_use
*use
, struct iv_cand
*cand
,
4849 bool address_p
, bitmap
*depends_on
,
4850 bool *can_autoinc
, int *inv_expr_id
)
4852 return get_computation_cost_at (data
,
4853 use
, cand
, address_p
, depends_on
, use
->stmt
,
4854 can_autoinc
, inv_expr_id
);
4857 /* Determines cost of basing replacement of USE on CAND in a generic
4861 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4862 struct iv_use
*use
, struct iv_cand
*cand
)
4866 int inv_expr_id
= -1;
4868 /* The simple case first -- if we need to express value of the preserved
4869 original biv, the cost is 0. This also prevents us from counting the
4870 cost of increment twice -- once at this use and once in the cost of
4872 if (cand
->pos
== IP_ORIGINAL
4873 && cand
->incremented_at
== use
->stmt
)
4875 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4880 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4881 NULL
, &inv_expr_id
);
4883 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4886 return !infinite_cost_p (cost
);
4889 /* Determines cost of basing replacement of USE on CAND in an address. */
4892 determine_use_iv_cost_address (struct ivopts_data
*data
,
4893 struct iv_use
*use
, struct iv_cand
*cand
)
4897 int inv_expr_id
= -1;
4898 struct iv_use
*sub_use
;
4900 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4901 &can_autoinc
, &inv_expr_id
);
4903 if (cand
->ainc_use
== use
)
4906 cost
.cost
-= cand
->cost_step
;
4907 /* If we generated the candidate solely for exploiting autoincrement
4908 opportunities, and it turns out it can't be used, set the cost to
4909 infinity to make sure we ignore it. */
4910 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4911 cost
= infinite_cost
;
4913 for (sub_use
= use
->next
;
4914 sub_use
&& !infinite_cost_p (cost
);
4915 sub_use
= sub_use
->next
)
4917 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, NULL
,
4918 &can_autoinc
, NULL
);
4919 cost
= add_costs (cost
, sub_cost
);
4922 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4925 return !infinite_cost_p (cost
);
4928 /* Computes value of candidate CAND at position AT in iteration NITER, and
4929 stores it to VAL. */
4932 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4935 aff_tree step
, delta
, nit
;
4936 struct iv
*iv
= cand
->iv
;
4937 tree type
= TREE_TYPE (iv
->base
);
4938 tree steptype
= type
;
4939 if (POINTER_TYPE_P (type
))
4940 steptype
= sizetype
;
4941 steptype
= unsigned_type_for (type
);
4943 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4944 aff_combination_convert (&step
, steptype
);
4945 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4946 aff_combination_convert (&nit
, steptype
);
4947 aff_combination_mult (&nit
, &step
, &delta
);
4948 if (stmt_after_increment (loop
, cand
, at
))
4949 aff_combination_add (&delta
, &step
);
4951 tree_to_aff_combination (iv
->base
, type
, val
);
4952 if (!POINTER_TYPE_P (type
))
4953 aff_combination_convert (val
, steptype
);
4954 aff_combination_add (val
, &delta
);
4957 /* Returns period of induction variable iv. */
4960 iv_period (struct iv
*iv
)
4962 tree step
= iv
->step
, period
, type
;
4965 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4967 type
= unsigned_type_for (TREE_TYPE (step
));
4968 /* Period of the iv is lcm (step, type_range)/step -1,
4969 i.e., N*type_range/step - 1. Since type range is power
4970 of two, N == (step >> num_of_ending_zeros_binary (step),
4971 so the final result is
4973 (type_range >> num_of_ending_zeros_binary (step)) - 1
4976 pow2div
= num_ending_zeros (step
);
4978 period
= build_low_bits_mask (type
,
4979 (TYPE_PRECISION (type
)
4980 - tree_to_uhwi (pow2div
)));
4985 /* Returns the comparison operator used when eliminating the iv USE. */
4987 static enum tree_code
4988 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4990 struct loop
*loop
= data
->current_loop
;
4994 ex_bb
= gimple_bb (use
->stmt
);
4995 exit
= EDGE_SUCC (ex_bb
, 0);
4996 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4997 exit
= EDGE_SUCC (ex_bb
, 1);
4999 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
5002 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5003 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5004 calculation is performed in non-wrapping type.
5006 TODO: More generally, we could test for the situation that
5007 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5008 This would require knowing the sign of OFFSET. */
5011 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5013 enum tree_code code
;
5015 aff_tree aff_e1
, aff_e2
, aff_offset
;
5017 if (!nowrap_type_p (TREE_TYPE (base
)))
5020 base
= expand_simple_operations (base
);
5022 if (TREE_CODE (base
) == SSA_NAME
)
5024 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5026 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5029 code
= gimple_assign_rhs_code (stmt
);
5030 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5033 e1
= gimple_assign_rhs1 (stmt
);
5034 e2
= gimple_assign_rhs2 (stmt
);
5038 code
= TREE_CODE (base
);
5039 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5041 e1
= TREE_OPERAND (base
, 0);
5042 e2
= TREE_OPERAND (base
, 1);
5045 /* Use affine expansion as deeper inspection to prove the equality. */
5046 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5047 &aff_e2
, &data
->name_expansion_cache
);
5048 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5049 &aff_offset
, &data
->name_expansion_cache
);
5050 aff_combination_scale (&aff_offset
, -1);
5054 aff_combination_add (&aff_e2
, &aff_offset
);
5055 if (aff_combination_zero_p (&aff_e2
))
5058 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5059 &aff_e1
, &data
->name_expansion_cache
);
5060 aff_combination_add (&aff_e1
, &aff_offset
);
5061 return aff_combination_zero_p (&aff_e1
);
5063 case POINTER_PLUS_EXPR
:
5064 aff_combination_add (&aff_e2
, &aff_offset
);
5065 return aff_combination_zero_p (&aff_e2
);
5072 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5073 comparison with CAND. NITER describes the number of iterations of
5074 the loops. If successful, the comparison in COMP_P is altered accordingly.
5076 We aim to handle the following situation:
5092 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5093 We aim to optimize this to
5101 while (p < p_0 - a + b);
5103 This preserves the correctness, since the pointer arithmetics does not
5104 overflow. More precisely:
5106 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5107 overflow in computing it or the values of p.
5108 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5109 overflow. To prove this, we use the fact that p_0 = base + a. */
5112 iv_elimination_compare_lt (struct ivopts_data
*data
,
5113 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5114 struct tree_niter_desc
*niter
)
5116 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5117 struct aff_tree nit
, tmpa
, tmpb
;
5118 enum tree_code comp
;
5121 /* We need to know that the candidate induction variable does not overflow.
5122 While more complex analysis may be used to prove this, for now just
5123 check that the variable appears in the original program and that it
5124 is computed in a type that guarantees no overflows. */
5125 cand_type
= TREE_TYPE (cand
->iv
->base
);
5126 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5129 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5130 the calculation of the BOUND could overflow, making the comparison
5132 if (!data
->loop_single_exit_p
)
5135 /* We need to be able to decide whether candidate is increasing or decreasing
5136 in order to choose the right comparison operator. */
5137 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5139 step
= int_cst_value (cand
->iv
->step
);
5141 /* Check that the number of iterations matches the expected pattern:
5142 a + 1 > b ? 0 : b - a - 1. */
5143 mbz
= niter
->may_be_zero
;
5144 if (TREE_CODE (mbz
) == GT_EXPR
)
5146 /* Handle a + 1 > b. */
5147 tree op0
= TREE_OPERAND (mbz
, 0);
5148 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5150 a
= TREE_OPERAND (op0
, 0);
5151 b
= TREE_OPERAND (mbz
, 1);
5156 else if (TREE_CODE (mbz
) == LT_EXPR
)
5158 tree op1
= TREE_OPERAND (mbz
, 1);
5160 /* Handle b < a + 1. */
5161 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5163 a
= TREE_OPERAND (op1
, 0);
5164 b
= TREE_OPERAND (mbz
, 0);
5172 /* Expected number of iterations is B - A - 1. Check that it matches
5173 the actual number, i.e., that B - A - NITER = 1. */
5174 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5175 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5176 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5177 aff_combination_scale (&nit
, -1);
5178 aff_combination_scale (&tmpa
, -1);
5179 aff_combination_add (&tmpb
, &tmpa
);
5180 aff_combination_add (&tmpb
, &nit
);
5181 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5184 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5186 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5188 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5189 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5192 /* Determine the new comparison operator. */
5193 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5194 if (*comp_p
== NE_EXPR
)
5196 else if (*comp_p
== EQ_EXPR
)
5197 *comp_p
= invert_tree_comparison (comp
, false);
5204 /* Check whether it is possible to express the condition in USE by comparison
5205 of candidate CAND. If so, store the value compared with to BOUND, and the
5206 comparison operator to COMP. */
5209 may_eliminate_iv (struct ivopts_data
*data
,
5210 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5211 enum tree_code
*comp
)
5216 struct loop
*loop
= data
->current_loop
;
5218 struct tree_niter_desc
*desc
= NULL
;
5220 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5223 /* For now works only for exits that dominate the loop latch.
5224 TODO: extend to other conditions inside loop body. */
5225 ex_bb
= gimple_bb (use
->stmt
);
5226 if (use
->stmt
!= last_stmt (ex_bb
)
5227 || gimple_code (use
->stmt
) != GIMPLE_COND
5228 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5231 exit
= EDGE_SUCC (ex_bb
, 0);
5232 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5233 exit
= EDGE_SUCC (ex_bb
, 1);
5234 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5237 desc
= niter_for_exit (data
, exit
);
5241 /* Determine whether we can use the variable to test the exit condition.
5242 This is the case iff the period of the induction variable is greater
5243 than the number of iterations for which the exit condition is true. */
5244 period
= iv_period (cand
->iv
);
5246 /* If the number of iterations is constant, compare against it directly. */
5247 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5249 /* See cand_value_at. */
5250 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5252 if (!tree_int_cst_lt (desc
->niter
, period
))
5257 if (tree_int_cst_lt (period
, desc
->niter
))
5262 /* If not, and if this is the only possible exit of the loop, see whether
5263 we can get a conservative estimate on the number of iterations of the
5264 entire loop and compare against that instead. */
5267 widest_int period_value
, max_niter
;
5269 max_niter
= desc
->max
;
5270 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5272 period_value
= wi::to_widest (period
);
5273 if (wi::gtu_p (max_niter
, period_value
))
5275 /* See if we can take advantage of inferred loop bound information. */
5276 if (data
->loop_single_exit_p
)
5278 if (!max_loop_iterations (loop
, &max_niter
))
5280 /* The loop bound is already adjusted by adding 1. */
5281 if (wi::gtu_p (max_niter
, period_value
))
5289 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5291 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5292 aff_combination_to_tree (&bnd
));
5293 *comp
= iv_elimination_compare (data
, use
);
5295 /* It is unlikely that computing the number of iterations using division
5296 would be more profitable than keeping the original induction variable. */
5297 if (expression_expensive_p (*bound
))
5300 /* Sometimes, it is possible to handle the situation that the number of
5301 iterations may be zero unless additional assumtions by using <
5302 instead of != in the exit condition.
5304 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5305 base the exit condition on it. However, that is often too
5307 if (!integer_zerop (desc
->may_be_zero
))
5308 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5313 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5314 be copied, if it is used in the loop body and DATA->body_includes_call. */
5317 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5319 tree sbound
= bound
;
5320 STRIP_NOPS (sbound
);
5322 if (TREE_CODE (sbound
) == SSA_NAME
5323 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5324 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5325 && data
->body_includes_call
)
5326 return COSTS_N_INSNS (1);
5331 /* Determines cost of basing replacement of USE on CAND in a condition. */
5334 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5335 struct iv_use
*use
, struct iv_cand
*cand
)
5337 tree bound
= NULL_TREE
;
5339 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5340 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5342 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5343 tree
*control_var
, *bound_cst
;
5344 enum tree_code comp
= ERROR_MARK
;
5346 /* Only consider real candidates. */
5349 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5354 /* Try iv elimination. */
5355 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5357 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5358 if (elim_cost
.cost
== 0)
5359 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5360 else if (TREE_CODE (bound
) == INTEGER_CST
)
5362 /* If we replace a loop condition 'i < n' with 'p < base + n',
5363 depends_on_elim will have 'base' and 'n' set, which implies
5364 that both 'base' and 'n' will be live during the loop. More likely,
5365 'base + n' will be loop invariant, resulting in only one live value
5366 during the loop. So in that case we clear depends_on_elim and set
5367 elim_inv_expr_id instead. */
5368 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5370 elim_inv_expr_id
= get_expr_id (data
, bound
);
5371 bitmap_clear (depends_on_elim
);
5373 /* The bound is a loop invariant, so it will be only computed
5375 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5378 elim_cost
= infinite_cost
;
5380 /* Try expressing the original giv. If it is compared with an invariant,
5381 note that we cannot get rid of it. */
5382 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5386 /* When the condition is a comparison of the candidate IV against
5387 zero, prefer this IV.
5389 TODO: The constant that we're subtracting from the cost should
5390 be target-dependent. This information should be added to the
5391 target costs for each backend. */
5392 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5393 && integer_zerop (*bound_cst
)
5394 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5395 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5396 elim_cost
.cost
-= 1;
5398 express_cost
= get_computation_cost (data
, use
, cand
, false,
5399 &depends_on_express
, NULL
,
5400 &express_inv_expr_id
);
5401 fd_ivopts_data
= data
;
5402 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5404 /* Count the cost of the original bound as well. */
5405 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5406 if (bound_cost
.cost
== 0)
5407 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5408 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5409 bound_cost
.cost
= 0;
5410 express_cost
.cost
+= bound_cost
.cost
;
5412 /* Choose the better approach, preferring the eliminated IV. */
5413 if (compare_costs (elim_cost
, express_cost
) <= 0)
5416 depends_on
= depends_on_elim
;
5417 depends_on_elim
= NULL
;
5418 inv_expr_id
= elim_inv_expr_id
;
5422 cost
= express_cost
;
5423 depends_on
= depends_on_express
;
5424 depends_on_express
= NULL
;
5427 inv_expr_id
= express_inv_expr_id
;
5430 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5432 if (depends_on_elim
)
5433 BITMAP_FREE (depends_on_elim
);
5434 if (depends_on_express
)
5435 BITMAP_FREE (depends_on_express
);
5437 return !infinite_cost_p (cost
);
5440 /* Determines cost of basing replacement of USE on CAND. Returns false
5441 if USE cannot be based on CAND. */
5444 determine_use_iv_cost (struct ivopts_data
*data
,
5445 struct iv_use
*use
, struct iv_cand
*cand
)
5449 case USE_NONLINEAR_EXPR
:
5450 return determine_use_iv_cost_generic (data
, use
, cand
);
5453 return determine_use_iv_cost_address (data
, use
, cand
);
5456 return determine_use_iv_cost_condition (data
, use
, cand
);
5463 /* Return true if get_computation_cost indicates that autoincrement is
5464 a possibility for the pair of USE and CAND, false otherwise. */
5467 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5468 struct iv_cand
*cand
)
5474 if (use
->type
!= USE_ADDRESS
)
5477 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5478 &can_autoinc
, NULL
);
5480 BITMAP_FREE (depends_on
);
5482 return !infinite_cost_p (cost
) && can_autoinc
;
5485 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5486 use that allows autoincrement, and set their AINC_USE if possible. */
5489 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5493 for (i
= 0; i
< n_iv_cands (data
); i
++)
5495 struct iv_cand
*cand
= iv_cand (data
, i
);
5496 struct iv_use
*closest_before
= NULL
;
5497 struct iv_use
*closest_after
= NULL
;
5498 if (cand
->pos
!= IP_ORIGINAL
)
5501 for (j
= 0; j
< n_iv_uses (data
); j
++)
5503 struct iv_use
*use
= iv_use (data
, j
);
5504 unsigned uid
= gimple_uid (use
->stmt
);
5506 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5509 if (uid
< gimple_uid (cand
->incremented_at
)
5510 && (closest_before
== NULL
5511 || uid
> gimple_uid (closest_before
->stmt
)))
5512 closest_before
= use
;
5514 if (uid
> gimple_uid (cand
->incremented_at
)
5515 && (closest_after
== NULL
5516 || uid
< gimple_uid (closest_after
->stmt
)))
5517 closest_after
= use
;
5520 if (closest_before
!= NULL
5521 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5522 cand
->ainc_use
= closest_before
;
5523 else if (closest_after
!= NULL
5524 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5525 cand
->ainc_use
= closest_after
;
5529 /* Finds the candidates for the induction variables. */
5532 find_iv_candidates (struct ivopts_data
*data
)
5534 /* Add commonly used ivs. */
5535 add_standard_iv_candidates (data
);
5537 /* Add old induction variables. */
5538 add_iv_candidate_for_bivs (data
);
5540 /* Add induction variables derived from uses. */
5541 add_iv_candidate_for_uses (data
);
5543 set_autoinc_for_original_candidates (data
);
5545 /* Record the important candidates. */
5546 record_important_candidates (data
);
5549 /* Determines costs of basing the use of the iv on an iv candidate. */
5552 determine_use_iv_costs (struct ivopts_data
*data
)
5556 struct iv_cand
*cand
;
5557 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5559 alloc_use_cost_map (data
);
5561 for (i
= 0; i
< n_iv_uses (data
); i
++)
5563 use
= iv_use (data
, i
);
5565 if (data
->consider_all_candidates
)
5567 for (j
= 0; j
< n_iv_cands (data
); j
++)
5569 cand
= iv_cand (data
, j
);
5570 determine_use_iv_cost (data
, use
, cand
);
5577 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5579 cand
= iv_cand (data
, j
);
5580 if (!determine_use_iv_cost (data
, use
, cand
))
5581 bitmap_set_bit (to_clear
, j
);
5584 /* Remove the candidates for that the cost is infinite from
5585 the list of related candidates. */
5586 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5587 bitmap_clear (to_clear
);
5591 BITMAP_FREE (to_clear
);
5593 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5595 fprintf (dump_file
, "Use-candidate costs:\n");
5597 for (i
= 0; i
< n_iv_uses (data
); i
++)
5599 use
= iv_use (data
, i
);
5601 fprintf (dump_file
, "Use %d:\n", i
);
5602 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5603 for (j
= 0; j
< use
->n_map_members
; j
++)
5605 if (!use
->cost_map
[j
].cand
5606 || infinite_cost_p (use
->cost_map
[j
].cost
))
5609 fprintf (dump_file
, " %d\t%d\t%d\t",
5610 use
->cost_map
[j
].cand
->id
,
5611 use
->cost_map
[j
].cost
.cost
,
5612 use
->cost_map
[j
].cost
.complexity
);
5613 if (use
->cost_map
[j
].depends_on
)
5614 bitmap_print (dump_file
,
5615 use
->cost_map
[j
].depends_on
, "","");
5616 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5617 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5618 fprintf (dump_file
, "\n");
5621 fprintf (dump_file
, "\n");
5623 fprintf (dump_file
, "\n");
5627 /* Determines cost of the candidate CAND. */
5630 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5632 comp_cost cost_base
;
5633 unsigned cost
, cost_step
;
5642 /* There are two costs associated with the candidate -- its increment
5643 and its initialization. The second is almost negligible for any loop
5644 that rolls enough, so we take it just very little into account. */
5646 base
= cand
->iv
->base
;
5647 cost_base
= force_var_cost (data
, base
, NULL
);
5648 /* It will be exceptional that the iv register happens to be initialized with
5649 the proper value at no cost. In general, there will at least be a regcopy
5651 if (cost_base
.cost
== 0)
5652 cost_base
.cost
= COSTS_N_INSNS (1);
5653 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5655 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5657 /* Prefer the original ivs unless we may gain something by replacing it.
5658 The reason is to make debugging simpler; so this is not relevant for
5659 artificial ivs created by other optimization passes. */
5660 if (cand
->pos
!= IP_ORIGINAL
5661 || !SSA_NAME_VAR (cand
->var_before
)
5662 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5665 /* Prefer not to insert statements into latch unless there are some
5666 already (so that we do not create unnecessary jumps). */
5667 if (cand
->pos
== IP_END
5668 && empty_block_p (ip_end_pos (data
->current_loop
)))
5672 cand
->cost_step
= cost_step
;
5675 /* Determines costs of computation of the candidates. */
5678 determine_iv_costs (struct ivopts_data
*data
)
5682 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5684 fprintf (dump_file
, "Candidate costs:\n");
5685 fprintf (dump_file
, " cand\tcost\n");
5688 for (i
= 0; i
< n_iv_cands (data
); i
++)
5690 struct iv_cand
*cand
= iv_cand (data
, i
);
5692 determine_iv_cost (data
, cand
);
5694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5695 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5699 fprintf (dump_file
, "\n");
5702 /* Calculates cost for having SIZE induction variables. */
5705 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5707 /* We add size to the cost, so that we prefer eliminating ivs
5709 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5710 data
->body_includes_call
);
5713 /* For each size of the induction variable set determine the penalty. */
5716 determine_set_costs (struct ivopts_data
*data
)
5722 struct loop
*loop
= data
->current_loop
;
5725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5727 fprintf (dump_file
, "Global costs:\n");
5728 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5729 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5730 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5731 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5735 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5738 op
= PHI_RESULT (phi
);
5740 if (virtual_operand_p (op
))
5743 if (get_iv (data
, op
))
5749 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5751 struct version_info
*info
= ver_info (data
, j
);
5753 if (info
->inv_id
&& info
->has_nonlin_use
)
5757 data
->regs_used
= n
;
5758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5759 fprintf (dump_file
, " regs_used %d\n", n
);
5761 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5763 fprintf (dump_file
, " cost for size:\n");
5764 fprintf (dump_file
, " ivs\tcost\n");
5765 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5766 fprintf (dump_file
, " %d\t%d\n", j
,
5767 ivopts_global_cost_for_size (data
, j
));
5768 fprintf (dump_file
, "\n");
5772 /* Returns true if A is a cheaper cost pair than B. */
5775 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5785 cmp
= compare_costs (a
->cost
, b
->cost
);
5792 /* In case the costs are the same, prefer the cheaper candidate. */
5793 if (a
->cand
->cost
< b
->cand
->cost
)
5800 /* Returns candidate by that USE is expressed in IVS. */
5802 static struct cost_pair
*
5803 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5805 return ivs
->cand_for_use
[use
->id
];
5808 /* Computes the cost field of IVS structure. */
5811 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5813 comp_cost cost
= ivs
->cand_use_cost
;
5815 cost
.cost
+= ivs
->cand_cost
;
5817 cost
.cost
+= ivopts_global_cost_for_size (data
,
5818 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5823 /* Remove invariants in set INVS to set IVS. */
5826 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5834 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5836 ivs
->n_invariant_uses
[iid
]--;
5837 if (ivs
->n_invariant_uses
[iid
] == 0)
5842 /* Set USE not to be expressed by any candidate in IVS. */
5845 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5848 unsigned uid
= use
->id
, cid
;
5849 struct cost_pair
*cp
;
5851 cp
= ivs
->cand_for_use
[uid
];
5857 ivs
->cand_for_use
[uid
] = NULL
;
5858 ivs
->n_cand_uses
[cid
]--;
5860 if (ivs
->n_cand_uses
[cid
] == 0)
5862 bitmap_clear_bit (ivs
->cands
, cid
);
5863 /* Do not count the pseudocandidates. */
5867 ivs
->cand_cost
-= cp
->cand
->cost
;
5869 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5872 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5874 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5876 if (cp
->inv_expr_id
!= -1)
5878 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5879 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5880 ivs
->num_used_inv_expr
--;
5882 iv_ca_recount_cost (data
, ivs
);
5885 /* Add invariants in set INVS to set IVS. */
5888 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5896 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5898 ivs
->n_invariant_uses
[iid
]++;
5899 if (ivs
->n_invariant_uses
[iid
] == 1)
5904 /* Set cost pair for USE in set IVS to CP. */
5907 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5908 struct iv_use
*use
, struct cost_pair
*cp
)
5910 unsigned uid
= use
->id
, cid
;
5912 if (ivs
->cand_for_use
[uid
] == cp
)
5915 if (ivs
->cand_for_use
[uid
])
5916 iv_ca_set_no_cp (data
, ivs
, use
);
5923 ivs
->cand_for_use
[uid
] = cp
;
5924 ivs
->n_cand_uses
[cid
]++;
5925 if (ivs
->n_cand_uses
[cid
] == 1)
5927 bitmap_set_bit (ivs
->cands
, cid
);
5928 /* Do not count the pseudocandidates. */
5932 ivs
->cand_cost
+= cp
->cand
->cost
;
5934 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5937 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5938 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5940 if (cp
->inv_expr_id
!= -1)
5942 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5943 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5944 ivs
->num_used_inv_expr
++;
5946 iv_ca_recount_cost (data
, ivs
);
5950 /* Extend set IVS by expressing USE by some of the candidates in it
5951 if possible. Consider all important candidates if candidates in
5952 set IVS don't give any result. */
5955 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5958 struct cost_pair
*best_cp
= NULL
, *cp
;
5961 struct iv_cand
*cand
;
5963 gcc_assert (ivs
->upto
>= use
->id
);
5967 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5969 cand
= iv_cand (data
, i
);
5970 cp
= get_use_iv_cost (data
, use
, cand
);
5971 if (cheaper_cost_pair (cp
, best_cp
))
5975 if (best_cp
== NULL
)
5977 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5979 cand
= iv_cand (data
, i
);
5980 cp
= get_use_iv_cost (data
, use
, cand
);
5981 if (cheaper_cost_pair (cp
, best_cp
))
5986 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5989 /* Get cost for assignment IVS. */
5992 iv_ca_cost (struct iv_ca
*ivs
)
5994 /* This was a conditional expression but it triggered a bug in
5997 return infinite_cost
;
6002 /* Returns true if all dependences of CP are among invariants in IVS. */
6005 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6010 if (!cp
->depends_on
)
6013 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6015 if (ivs
->n_invariant_uses
[i
] == 0)
6022 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6023 it before NEXT_CHANGE. */
6025 static struct iv_ca_delta
*
6026 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
6027 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
6029 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6032 change
->old_cp
= old_cp
;
6033 change
->new_cp
= new_cp
;
6034 change
->next_change
= next_change
;
6039 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6042 static struct iv_ca_delta
*
6043 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6045 struct iv_ca_delta
*last
;
6053 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
6055 last
->next_change
= l2
;
6060 /* Reverse the list of changes DELTA, forming the inverse to it. */
6062 static struct iv_ca_delta
*
6063 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6065 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6067 for (act
= delta
; act
; act
= next
)
6069 next
= act
->next_change
;
6070 act
->next_change
= prev
;
6073 std::swap (act
->old_cp
, act
->new_cp
);
6079 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6080 reverted instead. */
6083 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6084 struct iv_ca_delta
*delta
, bool forward
)
6086 struct cost_pair
*from
, *to
;
6087 struct iv_ca_delta
*act
;
6090 delta
= iv_ca_delta_reverse (delta
);
6092 for (act
= delta
; act
; act
= act
->next_change
)
6096 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
6097 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
6101 iv_ca_delta_reverse (delta
);
6104 /* Returns true if CAND is used in IVS. */
6107 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6109 return ivs
->n_cand_uses
[cand
->id
] > 0;
6112 /* Returns number of induction variable candidates in the set IVS. */
6115 iv_ca_n_cands (struct iv_ca
*ivs
)
6117 return ivs
->n_cands
;
6120 /* Free the list of changes DELTA. */
6123 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6125 struct iv_ca_delta
*act
, *next
;
6127 for (act
= *delta
; act
; act
= next
)
6129 next
= act
->next_change
;
6136 /* Allocates new iv candidates assignment. */
6138 static struct iv_ca
*
6139 iv_ca_new (struct ivopts_data
*data
)
6141 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6145 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
6146 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
6147 nw
->cands
= BITMAP_ALLOC (NULL
);
6150 nw
->cand_use_cost
= no_cost
;
6152 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6154 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
6155 nw
->num_used_inv_expr
= 0;
6160 /* Free memory occupied by the set IVS. */
6163 iv_ca_free (struct iv_ca
**ivs
)
6165 free ((*ivs
)->cand_for_use
);
6166 free ((*ivs
)->n_cand_uses
);
6167 BITMAP_FREE ((*ivs
)->cands
);
6168 free ((*ivs
)->n_invariant_uses
);
6169 free ((*ivs
)->used_inv_expr
);
6174 /* Dumps IVS to FILE. */
6177 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6179 const char *pref
= " invariants ";
6181 comp_cost cost
= iv_ca_cost (ivs
);
6183 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6184 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6185 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6186 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6188 for (i
= 0; i
< ivs
->upto
; i
++)
6190 struct iv_use
*use
= iv_use (data
, i
);
6191 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6193 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6194 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6196 fprintf (file
, " use:%d --> ??\n", use
->id
);
6199 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6200 if (ivs
->n_invariant_uses
[i
])
6202 fprintf (file
, "%s%d", pref
, i
);
6205 fprintf (file
, "\n\n");
6208 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6209 new set, and store differences in DELTA. Number of induction variables
6210 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6211 the function will try to find a solution with mimimal iv candidates. */
6214 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6215 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6216 unsigned *n_ivs
, bool min_ncand
)
6221 struct cost_pair
*old_cp
, *new_cp
;
6224 for (i
= 0; i
< ivs
->upto
; i
++)
6226 use
= iv_use (data
, i
);
6227 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6230 && old_cp
->cand
== cand
)
6233 new_cp
= get_use_iv_cost (data
, use
, cand
);
6237 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6240 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6243 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6246 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6247 cost
= iv_ca_cost (ivs
);
6249 *n_ivs
= iv_ca_n_cands (ivs
);
6250 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6255 /* Try narrowing set IVS by removing CAND. Return the cost of
6256 the new set and store the differences in DELTA. START is
6257 the candidate with which we start narrowing. */
6260 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6261 struct iv_cand
*cand
, struct iv_cand
*start
,
6262 struct iv_ca_delta
**delta
)
6266 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6268 struct iv_cand
*cnd
;
6269 comp_cost cost
, best_cost
, acost
;
6272 for (i
= 0; i
< n_iv_uses (data
); i
++)
6274 use
= iv_use (data
, i
);
6276 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6277 if (old_cp
->cand
!= cand
)
6280 best_cost
= iv_ca_cost (ivs
);
6281 /* Start narrowing with START. */
6282 new_cp
= get_use_iv_cost (data
, use
, start
);
6284 if (data
->consider_all_candidates
)
6286 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6288 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6291 cnd
= iv_cand (data
, ci
);
6293 cp
= get_use_iv_cost (data
, use
, cnd
);
6297 iv_ca_set_cp (data
, ivs
, use
, cp
);
6298 acost
= iv_ca_cost (ivs
);
6300 if (compare_costs (acost
, best_cost
) < 0)
6309 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6311 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6314 cnd
= iv_cand (data
, ci
);
6316 cp
= get_use_iv_cost (data
, use
, cnd
);
6320 iv_ca_set_cp (data
, ivs
, use
, cp
);
6321 acost
= iv_ca_cost (ivs
);
6323 if (compare_costs (acost
, best_cost
) < 0)
6330 /* Restore to old cp for use. */
6331 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6335 iv_ca_delta_free (delta
);
6336 return infinite_cost
;
6339 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6342 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6343 cost
= iv_ca_cost (ivs
);
6344 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6349 /* Try optimizing the set of candidates IVS by removing candidates different
6350 from to EXCEPT_CAND from it. Return cost of the new set, and store
6351 differences in DELTA. */
6354 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6355 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6358 struct iv_ca_delta
*act_delta
, *best_delta
;
6360 comp_cost best_cost
, acost
;
6361 struct iv_cand
*cand
;
6364 best_cost
= iv_ca_cost (ivs
);
6366 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6368 cand
= iv_cand (data
, i
);
6370 if (cand
== except_cand
)
6373 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6375 if (compare_costs (acost
, best_cost
) < 0)
6378 iv_ca_delta_free (&best_delta
);
6379 best_delta
= act_delta
;
6382 iv_ca_delta_free (&act_delta
);
6391 /* Recurse to possibly remove other unnecessary ivs. */
6392 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6393 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6394 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6395 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6399 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6400 cheaper local cost for USE than BEST_CP. Return pointer to
6401 the corresponding cost_pair, otherwise just return BEST_CP. */
6403 static struct cost_pair
*
6404 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6405 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6406 struct cost_pair
*best_cp
)
6408 struct iv_cand
*cand
;
6409 struct cost_pair
*cp
;
6411 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6412 if (cand_idx
== old_cand
->id
)
6415 cand
= iv_cand (data
, cand_idx
);
6416 cp
= get_use_iv_cost (data
, use
, cand
);
6417 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6423 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6424 which are used by more than one iv uses. For each of those candidates,
6425 this function tries to represent iv uses under that candidate using
6426 other ones with lower local cost, then tries to prune the new set.
6427 If the new set has lower cost, It returns the new cost after recording
6428 candidate replacement in list DELTA. */
6431 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6432 struct iv_ca_delta
**delta
)
6434 bitmap_iterator bi
, bj
;
6435 unsigned int i
, j
, k
;
6437 struct iv_cand
*cand
;
6438 comp_cost orig_cost
, acost
;
6439 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6440 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6443 orig_cost
= iv_ca_cost (ivs
);
6445 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6447 if (ivs
->n_cand_uses
[i
] == 1
6448 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6451 cand
= iv_cand (data
, i
);
6454 /* Represent uses under current candidate using other ones with
6455 lower local cost. */
6456 for (j
= 0; j
< ivs
->upto
; j
++)
6458 use
= iv_use (data
, j
);
6459 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6461 if (old_cp
->cand
!= cand
)
6465 if (data
->consider_all_candidates
)
6466 for (k
= 0; k
< n_iv_cands (data
); k
++)
6467 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6468 old_cp
->cand
, best_cp
);
6470 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6471 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6472 old_cp
->cand
, best_cp
);
6474 if (best_cp
== old_cp
)
6477 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6479 /* No need for further prune. */
6483 /* Prune the new candidate set. */
6484 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6485 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6486 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6487 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6489 if (compare_costs (acost
, orig_cost
) < 0)
6495 iv_ca_delta_free (&act_delta
);
6501 /* Tries to extend the sets IVS in the best possible way in order
6502 to express the USE. If ORIGINALP is true, prefer candidates from
6503 the original set of IVs, otherwise favor important candidates not
6504 based on any memory object. */
6507 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6508 struct iv_use
*use
, bool originalp
)
6510 comp_cost best_cost
, act_cost
;
6513 struct iv_cand
*cand
;
6514 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6515 struct cost_pair
*cp
;
6517 iv_ca_add_use (data
, ivs
, use
);
6518 best_cost
= iv_ca_cost (ivs
);
6519 cp
= iv_ca_cand_for_use (ivs
, use
);
6522 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6523 iv_ca_set_no_cp (data
, ivs
, use
);
6526 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6527 first try important candidates not based on any memory object. Only if
6528 this fails, try the specific ones. Rationale -- in loops with many
6529 variables the best choice often is to use just one generic biv. If we
6530 added here many ivs specific to the uses, the optimization algorithm later
6531 would be likely to get stuck in a local minimum, thus causing us to create
6532 too many ivs. The approach from few ivs to more seems more likely to be
6533 successful -- starting from few ivs, replacing an expensive use by a
6534 specific iv should always be a win. */
6535 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6537 cand
= iv_cand (data
, i
);
6539 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6542 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6545 if (iv_ca_cand_used_p (ivs
, cand
))
6548 cp
= get_use_iv_cost (data
, use
, cand
);
6552 iv_ca_set_cp (data
, ivs
, use
, cp
);
6553 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6555 iv_ca_set_no_cp (data
, ivs
, use
);
6556 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6558 if (compare_costs (act_cost
, best_cost
) < 0)
6560 best_cost
= act_cost
;
6562 iv_ca_delta_free (&best_delta
);
6563 best_delta
= act_delta
;
6566 iv_ca_delta_free (&act_delta
);
6569 if (infinite_cost_p (best_cost
))
6571 for (i
= 0; i
< use
->n_map_members
; i
++)
6573 cp
= use
->cost_map
+ i
;
6578 /* Already tried this. */
6579 if (cand
->important
)
6581 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6583 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6587 if (iv_ca_cand_used_p (ivs
, cand
))
6591 iv_ca_set_cp (data
, ivs
, use
, cp
);
6592 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6593 iv_ca_set_no_cp (data
, ivs
, use
);
6594 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6597 if (compare_costs (act_cost
, best_cost
) < 0)
6599 best_cost
= act_cost
;
6602 iv_ca_delta_free (&best_delta
);
6603 best_delta
= act_delta
;
6606 iv_ca_delta_free (&act_delta
);
6610 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6611 iv_ca_delta_free (&best_delta
);
6613 return !infinite_cost_p (best_cost
);
6616 /* Finds an initial assignment of candidates to uses. */
6618 static struct iv_ca
*
6619 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6621 struct iv_ca
*ivs
= iv_ca_new (data
);
6624 for (i
= 0; i
< n_iv_uses (data
); i
++)
6625 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6634 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6635 points to a bool variable, this function tries to break local
6636 optimal fixed-point by replacing candidates in IVS if it's true. */
6639 try_improve_iv_set (struct ivopts_data
*data
,
6640 struct iv_ca
*ivs
, bool *try_replace_p
)
6643 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6644 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6645 struct iv_cand
*cand
;
6647 /* Try extending the set of induction variables by one. */
6648 for (i
= 0; i
< n_iv_cands (data
); i
++)
6650 cand
= iv_cand (data
, i
);
6652 if (iv_ca_cand_used_p (ivs
, cand
))
6655 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6659 /* If we successfully added the candidate and the set is small enough,
6660 try optimizing it by removing other candidates. */
6661 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6663 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6664 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6665 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6666 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6669 if (compare_costs (acost
, best_cost
) < 0)
6672 iv_ca_delta_free (&best_delta
);
6673 best_delta
= act_delta
;
6676 iv_ca_delta_free (&act_delta
);
6681 /* Try removing the candidates from the set instead. */
6682 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6684 if (!best_delta
&& *try_replace_p
)
6686 *try_replace_p
= false;
6687 /* So far candidate selecting algorithm tends to choose fewer IVs
6688 so that it can handle cases in which loops have many variables
6689 but the best choice is often to use only one general biv. One
6690 weakness is it can't handle opposite cases, in which different
6691 candidates should be chosen with respect to each use. To solve
6692 the problem, we replace candidates in a manner described by the
6693 comments of iv_ca_replace, thus give general algorithm a chance
6694 to break local optimal fixed-point in these cases. */
6695 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6702 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6703 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6704 iv_ca_delta_free (&best_delta
);
6708 /* Attempts to find the optimal set of induction variables. We do simple
6709 greedy heuristic -- we try to replace at most one candidate in the selected
6710 solution and remove the unused ivs while this improves the cost. */
6712 static struct iv_ca
*
6713 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6716 bool try_replace_p
= true;
6718 /* Get the initial solution. */
6719 set
= get_initial_solution (data
, originalp
);
6722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6723 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6727 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6729 fprintf (dump_file
, "Initial set of candidates:\n");
6730 iv_ca_dump (data
, dump_file
, set
);
6733 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6737 fprintf (dump_file
, "Improved to:\n");
6738 iv_ca_dump (data
, dump_file
, set
);
6745 static struct iv_ca
*
6746 find_optimal_iv_set (struct ivopts_data
*data
)
6749 struct iv_ca
*set
, *origset
;
6751 comp_cost cost
, origcost
;
6753 /* Determine the cost based on a strategy that starts with original IVs,
6754 and try again using a strategy that prefers candidates not based
6756 origset
= find_optimal_iv_set_1 (data
, true);
6757 set
= find_optimal_iv_set_1 (data
, false);
6759 if (!origset
&& !set
)
6762 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6763 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6767 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6768 origcost
.cost
, origcost
.complexity
);
6769 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6770 cost
.cost
, cost
.complexity
);
6773 /* Choose the one with the best cost. */
6774 if (compare_costs (origcost
, cost
) <= 0)
6781 iv_ca_free (&origset
);
6783 for (i
= 0; i
< n_iv_uses (data
); i
++)
6785 use
= iv_use (data
, i
);
6786 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6792 /* Creates a new induction variable corresponding to CAND. */
6795 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6797 gimple_stmt_iterator incr_pos
;
6807 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6811 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6819 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6823 /* Mark that the iv is preserved. */
6824 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6825 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6827 /* Rewrite the increment so that it uses var_before directly. */
6828 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6832 gimple_add_tmp_var (cand
->var_before
);
6834 base
= unshare_expr (cand
->iv
->base
);
6836 create_iv (base
, unshare_expr (cand
->iv
->step
),
6837 cand
->var_before
, data
->current_loop
,
6838 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6841 /* Creates new induction variables described in SET. */
6844 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6847 struct iv_cand
*cand
;
6850 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6852 cand
= iv_cand (data
, i
);
6853 create_new_iv (data
, cand
);
6856 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6858 fprintf (dump_file
, "Selected IV set for loop %d",
6859 data
->current_loop
->num
);
6860 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6861 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6862 LOCATION_LINE (data
->loop_loc
));
6863 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6864 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6866 cand
= iv_cand (data
, i
);
6867 dump_cand (dump_file
, cand
);
6869 fprintf (dump_file
, "\n");
6873 /* Rewrites USE (definition of iv used in a nonlinear expression)
6874 using candidate CAND. */
6877 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6878 struct iv_use
*use
, struct iv_cand
*cand
)
6883 gimple_stmt_iterator bsi
;
6885 /* An important special case -- if we are asked to express value of
6886 the original iv by itself, just exit; there is no need to
6887 introduce a new computation (that might also need casting the
6888 variable to unsigned and back). */
6889 if (cand
->pos
== IP_ORIGINAL
6890 && cand
->incremented_at
== use
->stmt
)
6892 enum tree_code stmt_code
;
6894 gcc_assert (is_gimple_assign (use
->stmt
));
6895 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6897 /* Check whether we may leave the computation unchanged.
6898 This is the case only if it does not rely on other
6899 computations in the loop -- otherwise, the computation
6900 we rely upon may be removed in remove_unused_ivs,
6901 thus leading to ICE. */
6902 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6903 if (stmt_code
== PLUS_EXPR
6904 || stmt_code
== MINUS_EXPR
6905 || stmt_code
== POINTER_PLUS_EXPR
)
6907 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6908 op
= gimple_assign_rhs2 (use
->stmt
);
6909 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6910 op
= gimple_assign_rhs1 (use
->stmt
);
6917 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6921 comp
= get_computation (data
->current_loop
, use
, cand
);
6922 gcc_assert (comp
!= NULL_TREE
);
6924 switch (gimple_code (use
->stmt
))
6927 tgt
= PHI_RESULT (use
->stmt
);
6929 /* If we should keep the biv, do not replace it. */
6930 if (name_info (data
, tgt
)->preserve_biv
)
6933 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6937 tgt
= gimple_assign_lhs (use
->stmt
);
6938 bsi
= gsi_for_stmt (use
->stmt
);
6945 if (!valid_gimple_rhs_p (comp
)
6946 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6947 /* We can't allow re-allocating the stmt as it might be pointed
6949 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6950 >= gimple_num_ops (gsi_stmt (bsi
)))))
6952 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6953 true, GSI_SAME_STMT
);
6954 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6956 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6957 /* As this isn't a plain copy we have to reset alignment
6959 if (SSA_NAME_PTR_INFO (comp
))
6960 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6964 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6966 ass
= gimple_build_assign (tgt
, comp
);
6967 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6969 bsi
= gsi_for_stmt (use
->stmt
);
6970 remove_phi_node (&bsi
, false);
6974 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6975 use
->stmt
= gsi_stmt (bsi
);
6979 /* Performs a peephole optimization to reorder the iv update statement with
6980 a mem ref to enable instruction combining in later phases. The mem ref uses
6981 the iv value before the update, so the reordering transformation requires
6982 adjustment of the offset. CAND is the selected IV_CAND.
6986 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6994 directly propagating t over to (1) will introduce overlapping live range
6995 thus increase register pressure. This peephole transform it into:
6999 t = MEM_REF (base, iv2, 8, 8);
7006 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7009 gimple
*iv_update
, *stmt
;
7011 gimple_stmt_iterator gsi
, gsi_iv
;
7013 if (cand
->pos
!= IP_NORMAL
)
7016 var_after
= cand
->var_after
;
7017 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7019 bb
= gimple_bb (iv_update
);
7020 gsi
= gsi_last_nondebug_bb (bb
);
7021 stmt
= gsi_stmt (gsi
);
7023 /* Only handle conditional statement for now. */
7024 if (gimple_code (stmt
) != GIMPLE_COND
)
7027 gsi_prev_nondebug (&gsi
);
7028 stmt
= gsi_stmt (gsi
);
7029 if (stmt
!= iv_update
)
7032 gsi_prev_nondebug (&gsi
);
7033 if (gsi_end_p (gsi
))
7036 stmt
= gsi_stmt (gsi
);
7037 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7040 if (stmt
!= use
->stmt
)
7043 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7048 fprintf (dump_file
, "Reordering \n");
7049 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7050 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7051 fprintf (dump_file
, "\n");
7054 gsi
= gsi_for_stmt (use
->stmt
);
7055 gsi_iv
= gsi_for_stmt (iv_update
);
7056 gsi_move_before (&gsi_iv
, &gsi
);
7058 cand
->pos
= IP_BEFORE_USE
;
7059 cand
->incremented_at
= use
->stmt
;
7062 /* Rewrites USE (address that is an iv) using candidate CAND. */
7065 rewrite_use_address_1 (struct ivopts_data
*data
,
7066 struct iv_use
*use
, struct iv_cand
*cand
)
7069 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7070 tree base_hint
= NULL_TREE
;
7074 adjust_iv_update_pos (cand
, use
);
7075 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7077 unshare_aff_combination (&aff
);
7079 /* To avoid undefined overflow problems, all IV candidates use unsigned
7080 integer types. The drawback is that this makes it impossible for
7081 create_mem_ref to distinguish an IV that is based on a memory object
7082 from one that represents simply an offset.
7084 To work around this problem, we pass a hint to create_mem_ref that
7085 indicates which variable (if any) in aff is an IV based on a memory
7086 object. Note that we only consider the candidate. If this is not
7087 based on an object, the base of the reference is in some subexpression
7088 of the use -- but these will use pointer types, so they are recognized
7089 by the create_mem_ref heuristics anyway. */
7090 if (cand
->iv
->base_object
)
7091 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7093 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7094 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7095 reference_alias_ptr_type (*use
->op_p
),
7096 iv
, base_hint
, data
->speed
);
7097 copy_ref_info (ref
, *use
->op_p
);
7101 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7102 first use of a group, rewrites sub uses in the group too. */
7105 rewrite_use_address (struct ivopts_data
*data
,
7106 struct iv_use
*use
, struct iv_cand
*cand
)
7108 struct iv_use
*next
;
7110 gcc_assert (use
->sub_id
== 0);
7111 rewrite_use_address_1 (data
, use
, cand
);
7112 update_stmt (use
->stmt
);
7114 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
7116 rewrite_use_address_1 (data
, next
, cand
);
7117 update_stmt (next
->stmt
);
7123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7127 rewrite_use_compare (struct ivopts_data
*data
,
7128 struct iv_use
*use
, struct iv_cand
*cand
)
7130 tree comp
, *var_p
, op
, bound
;
7131 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7132 enum tree_code compare
;
7133 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
7139 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7140 tree var_type
= TREE_TYPE (var
);
7143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7145 fprintf (dump_file
, "Replacing exit test: ");
7146 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7149 bound
= unshare_expr (fold_convert (var_type
, bound
));
7150 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7152 gsi_insert_seq_on_edge_immediate (
7153 loop_preheader_edge (data
->current_loop
),
7156 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7157 gimple_cond_set_lhs (cond_stmt
, var
);
7158 gimple_cond_set_code (cond_stmt
, compare
);
7159 gimple_cond_set_rhs (cond_stmt
, op
);
7163 /* The induction variable elimination failed; just express the original
7165 comp
= get_computation (data
->current_loop
, use
, cand
);
7166 gcc_assert (comp
!= NULL_TREE
);
7168 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7171 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7172 true, GSI_SAME_STMT
);
7175 /* Rewrites USE using candidate CAND. */
7178 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7182 case USE_NONLINEAR_EXPR
:
7183 rewrite_use_nonlinear_expr (data
, use
, cand
);
7187 rewrite_use_address (data
, use
, cand
);
7191 rewrite_use_compare (data
, use
, cand
);
7198 update_stmt (use
->stmt
);
7201 /* Rewrite the uses using the selected induction variables. */
7204 rewrite_uses (struct ivopts_data
*data
)
7207 struct iv_cand
*cand
;
7210 for (i
= 0; i
< n_iv_uses (data
); i
++)
7212 use
= iv_use (data
, i
);
7213 cand
= use
->selected
;
7216 rewrite_use (data
, use
, cand
);
7220 /* Removes the ivs that are not used after rewriting. */
7223 remove_unused_ivs (struct ivopts_data
*data
)
7227 bitmap toremove
= BITMAP_ALLOC (NULL
);
7229 /* Figure out an order in which to release SSA DEFs so that we don't
7230 release something that we'd have to propagate into a debug stmt
7232 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7234 struct version_info
*info
;
7236 info
= ver_info (data
, j
);
7238 && !integer_zerop (info
->iv
->step
)
7240 && !info
->iv
->have_use_for
7241 && !info
->preserve_biv
)
7243 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7245 tree def
= info
->iv
->ssa_name
;
7247 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7249 imm_use_iterator imm_iter
;
7250 use_operand_p use_p
;
7254 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7256 if (!gimple_debug_bind_p (stmt
))
7259 /* We just want to determine whether to do nothing
7260 (count == 0), to substitute the computed
7261 expression into a single use of the SSA DEF by
7262 itself (count == 1), or to use a debug temp
7263 because the SSA DEF is used multiple times or as
7264 part of a larger expression (count > 1). */
7266 if (gimple_debug_bind_get_value (stmt
) != def
)
7270 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7276 struct iv_use dummy_use
;
7277 struct iv_cand
*best_cand
= NULL
, *cand
;
7278 unsigned i
, best_pref
= 0, cand_pref
;
7280 memset (&dummy_use
, 0, sizeof (dummy_use
));
7281 dummy_use
.iv
= info
->iv
;
7282 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7284 cand
= iv_use (data
, i
)->selected
;
7285 if (cand
== best_cand
)
7287 cand_pref
= operand_equal_p (cand
->iv
->step
,
7291 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7292 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7295 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7297 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7300 best_pref
= cand_pref
;
7307 tree comp
= get_computation_at (data
->current_loop
,
7308 &dummy_use
, best_cand
,
7309 SSA_NAME_DEF_STMT (def
));
7315 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7316 DECL_ARTIFICIAL (vexpr
) = 1;
7317 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7318 if (SSA_NAME_VAR (def
))
7319 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7321 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7323 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7324 gimple_stmt_iterator gsi
;
7326 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7327 gsi
= gsi_after_labels (gimple_bb
7328 (SSA_NAME_DEF_STMT (def
)));
7330 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7332 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7336 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7338 if (!gimple_debug_bind_p (stmt
))
7341 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7342 SET_USE (use_p
, comp
);
7350 release_defs_bitset (toremove
);
7352 BITMAP_FREE (toremove
);
7355 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7356 for hash_map::traverse. */
7359 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7365 /* Frees data allocated by the optimization of a single loop. */
7368 free_loop_data (struct ivopts_data
*data
)
7376 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7377 delete data
->niters
;
7378 data
->niters
= NULL
;
7381 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7383 struct version_info
*info
;
7385 info
= ver_info (data
, i
);
7387 info
->has_nonlin_use
= false;
7388 info
->preserve_biv
= false;
7391 bitmap_clear (data
->relevant
);
7392 bitmap_clear (data
->important_candidates
);
7394 for (i
= 0; i
< n_iv_uses (data
); i
++)
7396 struct iv_use
*use
= iv_use (data
, i
);
7397 struct iv_use
*pre
= use
, *sub
= use
->next
;
7401 gcc_assert (sub
->related_cands
== NULL
);
7402 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7409 BITMAP_FREE (use
->related_cands
);
7410 for (j
= 0; j
< use
->n_map_members
; j
++)
7411 if (use
->cost_map
[j
].depends_on
)
7412 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7413 free (use
->cost_map
);
7416 data
->iv_uses
.truncate (0);
7418 for (i
= 0; i
< n_iv_cands (data
); i
++)
7420 struct iv_cand
*cand
= iv_cand (data
, i
);
7422 if (cand
->depends_on
)
7423 BITMAP_FREE (cand
->depends_on
);
7426 data
->iv_candidates
.truncate (0);
7428 if (data
->version_info_size
< num_ssa_names
)
7430 data
->version_info_size
= 2 * num_ssa_names
;
7431 free (data
->version_info
);
7432 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7435 data
->max_inv_id
= 0;
7437 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7438 SET_DECL_RTL (obj
, NULL_RTX
);
7440 decl_rtl_to_reset
.truncate (0);
7442 data
->inv_expr_tab
->empty ();
7443 data
->inv_expr_id
= 0;
7446 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7450 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7452 free_loop_data (data
);
7453 free (data
->version_info
);
7454 BITMAP_FREE (data
->relevant
);
7455 BITMAP_FREE (data
->important_candidates
);
7457 decl_rtl_to_reset
.release ();
7458 data
->iv_uses
.release ();
7459 data
->iv_candidates
.release ();
7460 delete data
->inv_expr_tab
;
7461 data
->inv_expr_tab
= NULL
;
7462 free_affine_expand_cache (&data
->name_expansion_cache
);
7463 obstack_free (&data
->iv_obstack
, NULL
);
7466 /* Returns true if the loop body BODY includes any function calls. */
7469 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7471 gimple_stmt_iterator gsi
;
7474 for (i
= 0; i
< num_nodes
; i
++)
7475 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7477 gimple
*stmt
= gsi_stmt (gsi
);
7478 if (is_gimple_call (stmt
)
7479 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7485 /* Optimizes the LOOP. Returns true if anything changed. */
7488 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7490 bool changed
= false;
7491 struct iv_ca
*iv_ca
;
7492 edge exit
= single_dom_exit (loop
);
7495 gcc_assert (!data
->niters
);
7496 data
->current_loop
= loop
;
7497 data
->loop_loc
= find_loop_location (loop
);
7498 data
->speed
= optimize_loop_for_speed_p (loop
);
7500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7502 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7503 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7504 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7505 LOCATION_LINE (data
->loop_loc
));
7506 fprintf (dump_file
, "\n");
7510 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7511 exit
->src
->index
, exit
->dest
->index
);
7512 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7513 fprintf (dump_file
, "\n");
7516 fprintf (dump_file
, "\n");
7519 body
= get_loop_body (loop
);
7520 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7521 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7524 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7526 /* For each ssa name determines whether it behaves as an induction variable
7528 if (!find_induction_variables (data
))
7531 /* Finds interesting uses (item 1). */
7532 find_interesting_uses (data
);
7533 group_address_uses (data
);
7534 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7537 /* Finds candidates for the induction variables (item 2). */
7538 find_iv_candidates (data
);
7540 /* Calculates the costs (item 3, part 1). */
7541 determine_iv_costs (data
);
7542 determine_use_iv_costs (data
);
7543 determine_set_costs (data
);
7545 /* Find the optimal set of induction variables (item 3, part 2). */
7546 iv_ca
= find_optimal_iv_set (data
);
7551 /* Create the new induction variables (item 4, part 1). */
7552 create_new_ivs (data
, iv_ca
);
7553 iv_ca_free (&iv_ca
);
7555 /* Rewrite the uses (item 4, part 2). */
7556 rewrite_uses (data
);
7558 /* Remove the ivs that are unused after rewriting. */
7559 remove_unused_ivs (data
);
7561 /* We have changed the structure of induction variables; it might happen
7562 that definitions in the scev database refer to some of them that were
7567 free_loop_data (data
);
7572 /* Main entry point. Optimizes induction variables in loops. */
7575 tree_ssa_iv_optimize (void)
7578 struct ivopts_data data
;
7580 tree_ssa_iv_optimize_init (&data
);
7582 /* Optimize the loops starting with the innermost ones. */
7583 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7585 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7586 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7588 tree_ssa_iv_optimize_loop (&data
, loop
);
7591 tree_ssa_iv_optimize_finalize (&data
);